From 6b9ebe95e5773933bec133f828b7a3de85fe1d02 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Fri, 19 Jan 2024 09:10:23 +0100
Subject: [PATCH 1/3] Squashed 'src/c-blosc2/' changes from
 5b38ffeb6..e512ce0ac

e512ce0ac Getting ready for release 2.12.0
c3541c77d Getting ready for release 2.11.4
72d279eb9 Comment out test that might cause crashes
2d166040c Change reallof by realloc
e47fbead2 Add funcs for getting chunk indexs of slice
733ed9b3c Add a reason in commented out test
0d95ba6fe Go back to using 2 threads
9a827fd7b Tests for CI AARCH64
12952aefc Fix handler name
87605a3e5 Register the grok codec (ID 37)
017b2780d Bump actions/upload-artifact from 3 to 4
1594bc341 Update README.rst
fbe03a229 Update README.rst
9ffd8f262 Update README.rst
d781240aa Post 2.11.3 release actions done
6bc96bf65 Getting ready for release 2.11.3
211e5a956 Fix mention to PR
eeac9ec39 Allow typesizes > 255 in frames.
182f5571d Use typedef for blosc_timestamp_t
a0716d809 Set compiler versions known to work with AVX512 dispatch
da14975c2 Post 2.11.3 release actions done
3ea8b4ae2 Getting ready for release 2.11.2
0531353c0 Disable bitshuffle NEON for all ARM platforms
231a98f83 Activate unaligned access only on ARM64 on 64bit platforms
369fe109f Post 2.11.1 release actions done

git-subtree-dir: src/c-blosc2
git-subtree-split: e512ce0acbc66ccd69c11fd1aff3f39f9a25b79e
---
 .github/workflows/fuzz.yml       |   2 +-
 ANNOUNCE.md                      |  10 +-
 CMakeLists.txt                   |   9 +-
 README.rst                       |   2 +-
 RELEASE_NOTES.md                 |  39 +++++++
 blosc/CMakeLists.txt             |   4 +-
 blosc/b2nd-private.h             |  40 +++++++
 blosc/b2nd.c                     |  86 ++++++++++++++++
 blosc/blosc2.c                   |  24 +++++
 blosc/frame.c                    |   4 +-
 blosc/schunk-private.h           |  40 +++++++
 blosc/schunk.c                   |  21 ++++
 include/blosc2.h                 |  27 +++--
 include/blosc2/blosc2-common.h   |   2 +-
 include/blosc2/codecs-registry.h |   1 +
 plugins/codecs/codecs-registry.c |   9 ++
 tests/b2nd/test_b2nd_save.c      |   4 +-
 tests/test_get_slice_nchunks.c   | 172 +++++++++++++++++++++++++++++++
 18 files changed, 476 insertions(+), 20 deletions(-)
 create mode 100644 blosc/b2nd-private.h
 create mode 100644 blosc/schunk-private.h
 create mode 100644 tests/test_get_slice_nchunks.c

diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
index 6df4526d..220a5d5b 100644
--- a/.github/workflows/fuzz.yml
+++ b/.github/workflows/fuzz.yml
@@ -18,7 +18,7 @@ jobs:
         dry-run: false
 
     - name: Upload Crash
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
       if: failure()
       with:
         name: artifacts
diff --git a/ANNOUNCE.md b/ANNOUNCE.md
index 69458298..e3f9a65c 100644
--- a/ANNOUNCE.md
+++ b/ANNOUNCE.md
@@ -1,12 +1,12 @@
-# Announcing C-Blosc2 2.11.1
+# Announcing C-Blosc2 2.12.0
 A fast, compressed and persistent binary data store library for C.
 
 ## What is new?
 
-This is a maintenance release for fixing the ALTIVEC header.
-Only affects to IBM POWER builds.
-
-Also, some other fixes and improvements are in.
+Now the `grok` codec is available globally and will be loaded dynamically. See more
+info about the codec in our blog post: https://www.blosc.org/posts/blosc2-grok-release/
+Furthermore, a new function has been added to get the unidimensional chunk indexes
+needed to get the slice of a Blosc2 container.
 
 For more info, please see the release notes in:
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bff2f36c..7867187d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -285,7 +285,10 @@ if(CMAKE_SYSTEM_PROCESSOR STREQUAL i386 OR
         else()
             set(COMPILER_SUPPORT_AVX2 FALSE)
         endif()
-        if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 4.9)
+        # GCC 10.3.2 (the version in manylinux_2014) seems to have issues supporting dynamic dispatching
+        # of AVX512.  GCC 11.4 is the first minimal version that works well here.
+        # That means that Linux wheels will have AVX512 disabled, but that's life.
+        if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 11.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 11.4)
             set(COMPILER_SUPPORT_AVX512 TRUE)
         else()
             set(COMPILER_SUPPORT_AVX512 FALSE)
@@ -297,7 +300,9 @@ if(CMAKE_SYSTEM_PROCESSOR STREQUAL i386 OR
         else()
             set(COMPILER_SUPPORT_AVX2 FALSE)
         endif()
-        if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.0 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.0)
+        # Clang 13 is the minimum version that we know that works with AVX512 dynamic dispatch.
+        # Perhaps lesser versions work too, better to err on the safe side.
+        if(CMAKE_C_COMPILER_VERSION VERSION_GREATER 13.0 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 13.0)
             set(COMPILER_SUPPORT_AVX512 TRUE)
         else()
             set(COMPILER_SUPPORT_AVX512 FALSE)
diff --git a/README.rst b/README.rst
index 64f24557..bd1bdffa 100644
--- a/README.rst
+++ b/README.rst
@@ -250,4 +250,4 @@ See `THANKS document <https://github.com/Blosc/c-blosc2/blob/main/THANKS.rst>`_.
 
 ----
 
-**Enjoy data!**
+-- The Blosc Development Team.  **We make compression better.**
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index c8cfa685..964a0235 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -1,6 +1,45 @@
 Release notes for C-Blosc2
 ==========================
 
+Changes from 2.11.3 to 2.12.0
+=============================
+
+* New `blosc2_get_slice_nchunks` function for getting the unidimensional chunk indexes of a Blosc2 container slice.
+
+* Globally registered new codec `grok`. This will be loaded dynamically.
+
+
+Changes from 2.11.2 to 2.11.3
+=============================
+
+* Frames accept now typesizes that are larger than 255 (and up to 2**31).
+  See https://github.com/PyTables/PyTables/issues/1086.  Thanks to
+  @chris-allan for the report.
+
+* AVX512 runtime dispatching has been fixed (basically disabled) for GCC
+  versions <= 10.
+
+* Use typedef for blosc_timestamp_t.  Thanks to Magnus Ulimoen.
+
+
+Changes from 2.11.1 to 2.11.2
+=============================
+
+* Added support for ARMv7l platforms (Raspberry Pi).  The NEON version
+  of the bitshuffle filter was not compiling there, and besides it offered
+  no performance advantage over the generic bitshuffle version (it is 2x to
+  3x slower actually). So bitshuffle-neon.c has been disabled by default in
+  all ARM platforms.
+
+* Also, unaligned access has been disabled in all ARM non-64bits platforms.
+  It turned out that, at least the armv7l CPU in Raspberry Pi 4, had issues
+  because `__ARM_FEATURE_UNALIGNED` C macro was asserted in the compiler
+  (both gcc and clang), but it actually made binaries to raise a "Bus error".
+
+* Thanks to Ben Nuttall for providing a Raspberry Pi for tracking down these
+  issues.
+
+
 Changes from 2.11.0 to 2.11.1
 =============================
 
diff --git a/blosc/CMakeLists.txt b/blosc/CMakeLists.txt
index bf8527da..469b0e09 100644
--- a/blosc/CMakeLists.txt
+++ b/blosc/CMakeLists.txt
@@ -280,7 +280,9 @@ if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL arm64)
 endif()
 if(COMPILER_SUPPORT_NEON)
     message(STATUS "Adding run-time support for NEON")
-    list(APPEND SOURCES blosc/shuffle-neon.c blosc/bitshuffle-neon.c)
+    # bitshuffle-neon.c does not offer better speed than generic on arm64 (Mac M1).
+    # Besides, it does not compile on raspberry pi (armv7l), so disable it.
+    list(APPEND SOURCES blosc/shuffle-neon.c)  # blosc/bitshuffle-neon.c)
 endif()
 if(COMPILER_SUPPORT_ALTIVEC)
     message(STATUS "Adding run-time support for ALTIVEC")
diff --git a/blosc/b2nd-private.h b/blosc/b2nd-private.h
new file mode 100644
index 00000000..be69acfc
--- /dev/null
+++ b/blosc/b2nd-private.h
@@ -0,0 +1,40 @@
+/*********************************************************************
+  Blosc - Blocked Shuffling and Compression Library
+
+  Copyright (c) 2021  The Blosc Development Team <blosc@blosc.org>
+  https://blosc.org
+  License: BSD 3-Clause (see LICENSE.txt)
+
+  See LICENSE.txt for details about copyright and rights to use.
+**********************************************************************/
+
+
+#ifndef BLOSC_B2ND_PRIVATE_H
+#define BLOSC_B2ND_PRIVATE_H
+
+#include "b2nd.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/*********************************************************************
+
+  Functions meant to be used internally.
+
+*********************************************************************/
+
+/**
+ * @brief Get the chunk indexes needed to get the slice.
+ *
+ * @param array The b2nd array.
+ * @param start The coordinates where the slice will begin.
+ * @param stop The coordinates where the slice will end.
+ * @param chunks_idx The pointer to the buffer where the indexes of the chunks will be written.
+ *
+ * @return The number of chunks needed to get the slice. If some problem is
+ * detected, a negative code is returned instead.
+ */
+int b2nd_get_slice_nchunks(b2nd_array_t *array, const int64_t *start, const int64_t *stop, int64_t **chunks_idx);
+
+#endif /* BLOSC_B2ND_PRIVATE_H */
diff --git a/blosc/b2nd.c b/blosc/b2nd.c
index ed367cc8..767a16fb 100644
--- a/blosc/b2nd.c
+++ b/blosc/b2nd.c
@@ -926,6 +926,92 @@ int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, const b2nd_array_t
 }
 
 
+int b2nd_get_slice_nchunks(b2nd_array_t *array, const int64_t *start, const int64_t *stop, int64_t **chunks_idx) {
+  BLOSC_ERROR_NULL(array, BLOSC2_ERROR_NULL_POINTER);
+  BLOSC_ERROR_NULL(start, BLOSC2_ERROR_NULL_POINTER);
+  BLOSC_ERROR_NULL(stop, BLOSC2_ERROR_NULL_POINTER);
+
+  int8_t ndim = array->ndim;
+
+  // 0-dim case
+  if (ndim == 0) {
+    *chunks_idx = malloc(1 * sizeof(int64_t));
+    *chunks_idx[0] = 0;
+    return 1;
+  }
+
+  int64_t chunks_in_array[B2ND_MAX_DIM] = {0};
+  for (int i = 0; i < ndim; ++i) {
+    chunks_in_array[i] = array->extshape[i] / array->chunkshape[i];
+  }
+
+  int64_t chunks_in_array_strides[B2ND_MAX_DIM];
+  chunks_in_array_strides[ndim - 1] = 1;
+  for (int i = ndim - 2; i >= 0; --i) {
+    chunks_in_array_strides[i] = chunks_in_array_strides[i + 1] * chunks_in_array[i + 1];
+  }
+
+  // Compute the number of chunks to update
+  int64_t update_start[B2ND_MAX_DIM];
+  int64_t update_shape[B2ND_MAX_DIM];
+
+  int64_t update_nchunks = 1;
+  for (int i = 0; i < ndim; ++i) {
+    int64_t pos = 0;
+    while (pos <= start[i]) {
+      pos += array->chunkshape[i];
+    }
+    update_start[i] = pos / array->chunkshape[i] - 1;
+    while (pos < stop[i]) {
+      pos += array->chunkshape[i];
+    }
+    update_shape[i] = pos / array->chunkshape[i] - update_start[i];
+    update_nchunks *= update_shape[i];
+  }
+
+  int nchunks = 0;
+  // Initially we do not know the number of chunks that will be affected
+  *chunks_idx = malloc(array->sc->nchunks * sizeof(int64_t));
+  int64_t *ptr = *chunks_idx;
+  for (int update_nchunk = 0; update_nchunk < update_nchunks; ++update_nchunk) {
+    int64_t nchunk_ndim[B2ND_MAX_DIM] = {0};
+    blosc2_unidim_to_multidim(ndim, update_shape, update_nchunk, nchunk_ndim);
+    for (int i = 0; i < ndim; ++i) {
+      nchunk_ndim[i] += update_start[i];
+    }
+    int64_t nchunk;
+    blosc2_multidim_to_unidim(nchunk_ndim, ndim, chunks_in_array_strides, &nchunk);
+
+    // Check if the chunk is inside the slice domain
+    int64_t chunk_start[B2ND_MAX_DIM] = {0};
+    int64_t chunk_stop[B2ND_MAX_DIM] = {0};
+    for (int i = 0; i < ndim; ++i) {
+      chunk_start[i] = nchunk_ndim[i] * array->chunkshape[i];
+      chunk_stop[i] = chunk_start[i] + array->chunkshape[i];
+      if (chunk_stop[i] > array->shape[i]) {
+        chunk_stop[i] = array->shape[i];
+      }
+    }
+    bool chunk_empty = false;
+    for (int i = 0; i < ndim; ++i) {
+      chunk_empty |= (chunk_stop[i] <= start[i] || chunk_start[i] >= stop[i]);
+    }
+    if (chunk_empty) {
+      continue;
+    }
+
+    ptr[nchunks] = nchunk;
+    nchunks++;
+  }
+
+  if (nchunks < array->sc->nchunks) {
+    *chunks_idx = realloc(ptr, nchunks * sizeof(int64_t));
+  }
+
+  return nchunks;
+}
+
+
 int b2nd_squeeze(b2nd_array_t *array) {
   BLOSC_ERROR_NULL(array, BLOSC2_ERROR_NULL_POINTER);
 
diff --git a/blosc/blosc2.c b/blosc/blosc2.c
index 5dea6bc7..9734c466 100644
--- a/blosc/blosc2.c
+++ b/blosc/blosc2.c
@@ -13,6 +13,8 @@
 #include "blosc-private.h"
 #include "../plugins/codecs/zfp/blosc2-zfp.h"
 #include "frame.h"
+#include "b2nd-private.h"
+#include "schunk-private.h"
 
 #if defined(USING_CMAKE)
   #include "config.h"
@@ -4656,3 +4658,25 @@ void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t
     *i += index[j] * strides[j];
   }
 }
+
+int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx) {
+  BLOSC_ERROR_NULL(schunk, BLOSC2_ERROR_NULL_POINTER);
+  if (blosc2_meta_exists(schunk, "b2nd") < 0) {
+    // Try with a caterva metalayer; we are meant to be backward compatible with it
+    if (blosc2_meta_exists(schunk, "caterva") < 0) {
+      return schunk_get_slice_nchunks(schunk, *start, *stop, chunks_idx);
+    }
+  }
+
+  b2nd_array_t *array;
+  int rc = b2nd_from_schunk(schunk, &array);
+  if (rc < 0) {
+    BLOSC_TRACE_ERROR("Could not get b2nd array from schunk.");
+    return rc;
+  }
+  rc = b2nd_get_slice_nchunks(array, start, stop, chunks_idx);
+  array->sc = NULL; // Free only array struct
+  b2nd_free(array);
+
+  return rc;
+}
diff --git a/blosc/frame.c b/blosc/frame.c
index 4808a6cc..1c5290b3 100644
--- a/blosc/frame.c
+++ b/blosc/frame.c
@@ -428,8 +428,8 @@ int get_header_info(blosc2_frame_s *frame, int32_t *header_len, int64_t *frame_l
   }
   if (typesize != NULL) {
     from_big(typesize, framep + FRAME_TYPESIZE, sizeof(*typesize));
-    if (*typesize <= 0 || *typesize > BLOSC_MAX_TYPESIZE) {
-      BLOSC_TRACE_ERROR("`typesize` is zero or greater than max allowed.");
+    if (*typesize <= 0) {
+      BLOSC_TRACE_ERROR("`typesize` cannot be zero or negative.");
       return BLOSC2_ERROR_INVALID_HEADER;
     }
   }
diff --git a/blosc/schunk-private.h b/blosc/schunk-private.h
new file mode 100644
index 00000000..51a5ae74
--- /dev/null
+++ b/blosc/schunk-private.h
@@ -0,0 +1,40 @@
+/*********************************************************************
+  Blosc - Blocked Shuffling and Compression Library
+
+  Copyright (c) 2021  The Blosc Development Team <blosc@blosc.org>
+  https://blosc.org
+  License: BSD 3-Clause (see LICENSE.txt)
+
+  See LICENSE.txt for details about copyright and rights to use.
+**********************************************************************/
+
+
+#ifndef BLOSC_SCHUNK_PRIVATE_H
+#define BLOSC_SCHUNK_PRIVATE_H
+
+#include "b2nd.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/*********************************************************************
+
+  Functions meant to be used internally.
+
+*********************************************************************/
+
+/**
+ * @brief Get the chunk indexes needed to get the slice.
+ *
+ * @param schunk The super-chunk.
+ * @param start Index (0-based) where the slice begins.
+ * @param stop The first index (0-based) that is not in the selected slice.
+ * @param chunks_idx The pointer to the buffer where the indexes will be written.
+ *
+ *
+ * @return The number of chunks needed to get the slice. If some problem is
+ * detected, a negative code is returned instead.
+ */
+int schunk_get_slice_nchunks(blosc2_schunk *schunk, int64_t start, int64_t stop, int64_t **chunks_idx);
+#endif /* BLOSC_SCHUNK_PRIVATE_H */
diff --git a/blosc/schunk.c b/blosc/schunk.c
index 20f0bf33..0cd1444b 100644
--- a/blosc/schunk.c
+++ b/blosc/schunk.c
@@ -1379,6 +1379,27 @@ int blosc2_schunk_set_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t
 }
 
 
+int schunk_get_slice_nchunks(blosc2_schunk *schunk, int64_t start, int64_t stop, int64_t **chunks_idx) {
+  BLOSC_ERROR_NULL(schunk, BLOSC2_ERROR_NULL_POINTER);
+
+  int64_t byte_start = start * schunk->typesize;
+  int64_t byte_stop = stop * schunk->typesize;
+  int64_t nchunk_start = byte_start / schunk->chunksize;
+  int64_t nchunk_stop = byte_stop / schunk->chunksize;
+  if (byte_stop % schunk->chunksize != 0) {
+    nchunk_stop++;
+  }
+  int64_t nchunk = nchunk_start;
+  int nchunks = (int)(nchunk_stop - nchunk_start);
+  *chunks_idx = malloc(nchunks * sizeof(int64_t));
+  int64_t *ptr = *chunks_idx;
+  for (int64_t i = 0; i < nchunks; ++i) {
+    ptr[i] = nchunk;
+    nchunk++;
+  }
+  return nchunks;
+}
+
 /* Reorder the chunk offsets of an existing super-chunk. */
 int blosc2_schunk_reorder_offsets(blosc2_schunk *schunk, int64_t *offsets_order) {
   // Check that the offsets order are correct
diff --git a/include/blosc2.h b/include/blosc2.h
index 56d29b38..e327b2d8 100644
--- a/include/blosc2.h
+++ b/include/blosc2.h
@@ -82,11 +82,11 @@ extern "C" {
 
 /* Version numbers */
 #define BLOSC2_VERSION_MAJOR    2    /* for major interface/format changes  */
-#define BLOSC2_VERSION_MINOR    11   /* for minor interface/format changes  */
-#define BLOSC2_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
+#define BLOSC2_VERSION_MINOR    12   /* for minor interface/format changes  */
+#define BLOSC2_VERSION_RELEASE  0    /* for tweaks, bug-fixes, or development */
 
-#define BLOSC2_VERSION_STRING   "2.11.1"  /* string version.  Sync with above! */
-#define BLOSC2_VERSION_DATE     "$Date:: 2023-11-05 #$"    /* date version */
+#define BLOSC2_VERSION_STRING   "2.12.0"  /* string version.  Sync with above! */
+#define BLOSC2_VERSION_DATE     "$Date:: 2023-12-28 #$"    /* date version */
 
 
 /* The maximum number of dimensions for Blosc2 NDim arrays */
@@ -2306,9 +2306,9 @@ BLOSC_EXPORT int blosc2_vlmeta_get_names(blosc2_schunk *schunk, char **names);
 
 /* The type of timestamp used on this system. */
 #if defined(_WIN32)
-#define blosc_timestamp_t LARGE_INTEGER
+typedef LARGE_INTEGER blosc_timestamp_t;
 #else
-#define blosc_timestamp_t struct timespec
+typedef struct timespec blosc_timestamp_t;
 #endif
 
 /*
@@ -2486,6 +2486,21 @@ BLOSC_EXPORT void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_
  */
 BLOSC_EXPORT void blosc2_multidim_to_unidim(const int64_t *index, int8_t ndim, const int64_t *strides, int64_t *i);
 
+/*
+ * @brief Get the unidimensional chunk indexes needed to get a slice of a schunk or a b2nd array
+ *
+ * @param schunk The super-chunk (of b2nd array or not).
+ * @param start Index (0-based if it is a schunk) where the slice begins.
+ * @param stop The first index (0-based if it is a schunk) that is not in the selected slice.
+ * @param chunks_idx The pointer to the buffer where the indexes will be written. It is the user responsibility
+ * to free the buffer.
+ *
+ *
+ * @return The number of chunks needed to get the slice. If some problem is
+ * detected, a negative code is returned instead.
+ */
+BLOSC_EXPORT int blosc2_get_slice_nchunks(blosc2_schunk* schunk, int64_t *start, int64_t *stop, int64_t **chunks_idx);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/blosc2/blosc2-common.h b/include/blosc2/blosc2-common.h
index e03042e7..c8a4505d 100644
--- a/include/blosc2/blosc2-common.h
+++ b/include/blosc2/blosc2-common.h
@@ -61,7 +61,7 @@
 #undef BLOSC_STRICT_ALIGN
 /* Modern ARM systems (like ARM64) should support unaligned access
    quite efficiently. */
-#elif defined(__ARM_FEATURE_UNALIGNED)   /* ARM, GNU C */
+#elif defined(__ARM_FEATURE_UNALIGNED) && defined(__ARM64_ARCH_8__)
 #undef BLOSC_STRICT_ALIGN
 #elif defined(_ARCH_PPC) || defined(__PPC__)
 /* Modern PowerPC systems (like POWER8) should support unaligned access
diff --git a/include/blosc2/codecs-registry.h b/include/blosc2/codecs-registry.h
index 058fe744..fdc25d8c 100644
--- a/include/blosc2/codecs-registry.h
+++ b/include/blosc2/codecs-registry.h
@@ -23,6 +23,7 @@ enum {
     BLOSC_CODEC_ZFP_FIXED_PRECISION = 34,
     BLOSC_CODEC_ZFP_FIXED_RATE = 35,
     BLOSC_CODEC_OPENHTJ2K = 36,
+    BLOSC_CODEC_GROK = 37,
 };
 
 void register_codecs(void);
diff --git a/plugins/codecs/codecs-registry.c b/plugins/codecs/codecs-registry.c
index 7fdccba3..45e16016 100644
--- a/plugins/codecs/codecs-registry.c
+++ b/plugins/codecs/codecs-registry.c
@@ -56,4 +56,13 @@ void register_codecs(void) {
   openhtj2k.decoder = NULL;
   openhtj2k.compname = "openhtj2k";
   register_codec_private(&openhtj2k);
+
+  blosc2_codec grok;
+  grok.compcode = BLOSC_CODEC_GROK;
+  grok.version = 1;
+  grok.complib = BLOSC_CODEC_GROK;
+  grok.encoder = NULL;
+  grok.decoder = NULL;
+  grok.compname = "grok";
+  register_codec_private(&grok);
 }
diff --git a/tests/b2nd/test_b2nd_save.c b/tests/b2nd/test_b2nd_save.c
index 63048808..95b3dd71 100644
--- a/tests/b2nd/test_b2nd_save.c
+++ b/tests/b2nd/test_b2nd_save.c
@@ -39,7 +39,9 @@ CUTEST_TEST_SETUP(save) {
       {1, {10}, {7}, {2}}, // 1-idim
       {2, {100, 100}, {20, 20}, {10, 10}},
       {3, {40, 55, 23}, {31, 5, 22}, {4, 4, 4}},
-      {3, {100, 0, 12}, {31, 0, 12}, {10, 0, 12}},
+      // The test below makes CI to crash.  I have not been able to reproduce this
+      // manually on a Linux (Fedora) and ARM64 (M1), so anyways.
+      // {3, {100, 0, 12}, {31, 0, 12}, {10, 0, 12}},
   ));
   CUTEST_PARAMETRIZE(backend, _test_backend, CUTEST_DATA(
       {true, false},
diff --git a/tests/test_get_slice_nchunks.c b/tests/test_get_slice_nchunks.c
new file mode 100644
index 00000000..89a4f4d8
--- /dev/null
+++ b/tests/test_get_slice_nchunks.c
@@ -0,0 +1,172 @@
+/*
+  Copyright (c) 2021  The Blosc Development Team <blosc@blosc.org>
+  https://blosc.org
+  License: BSD 3-Clause (see LICENSE.txt)
+
+  See LICENSE.txt for details about copyright and rights to use.
+*/
+
+#include <stdio.h>
+#include "test_common.h"
+
+#define CHUNKSIZE (200 * 1000)
+#define NTHREADS (2)
+
+/* Global vars */
+int tests_run = 0;
+
+typedef struct {
+    int nchunks;
+    int64_t start;
+    int64_t stop;
+    char* urlpath;
+    bool contiguous;
+    bool shorter_last_chunk;
+    int64_t nchunk_start;
+    int64_t nchunk_stop;
+} test_data;
+
+test_data tdata;
+
+typedef struct {
+    int nchunks;
+    int64_t start;
+    int64_t stop;
+    bool shorter_last_chunk;
+    int64_t nchunk_start;
+    int64_t nchunk_stop;
+} test_ndata;
+
+test_ndata tndata[] = {
+        {10, 0, 10 * CHUNKSIZE, false, 0, 10}, //whole schunk
+        {5,  3, 200, false, 0, 1}, //piece of 1 block
+        {33, 5, 679, false, 0, 1}, // blocks of same chunk
+        {12,  129 * 100, 134 * 100 * 3, false, 0, 1}, // blocks of different chunks
+        {2, 200 * 100, CHUNKSIZE * 2, false, 0, 2}, // 1 chunk
+        {5, 0, CHUNKSIZE * 5 + 200 * 100 + 300, true, 0, 6}, // last chunk shorter
+        {2, 10, CHUNKSIZE * 2 + 400, true, 0, 3}, // start != 0, last chunk shorter
+        {12,  CHUNKSIZE * 1 + 300, CHUNKSIZE * 4 + 100, false, 1, 5}, // start not in first chunk
+};
+
+typedef struct {
+    bool contiguous;
+    char *urlpath;
+} test_storage;
+
+test_storage tstorage[] = {
+        {false, NULL},  // memory - schunk
+        {true, NULL},  // memory - cframe
+        {true, "test_get_slice_nchunks.b2frame"}, // disk - cframe
+        {false, "test_get_slice_nchunks.b2frame"}, // disk - sframe
+};
+
+
+static char* test_get_slice_nchunks(void) {
+  static int32_t data[CHUNKSIZE];
+  int32_t *data_;
+  int32_t isize = CHUNKSIZE * sizeof(int32_t);
+  int rc;
+  blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS;
+  blosc2_dparams dparams = BLOSC2_DPARAMS_DEFAULTS;
+  blosc2_schunk* schunk;
+
+  /* Initialize the Blosc compressor */
+  blosc2_init();
+
+  /* Create a super-chunk container */
+  blosc2_remove_urlpath(tdata.urlpath);
+  cparams.typesize = sizeof(int32_t);
+  cparams.clevel = 5;
+  cparams.nthreads = NTHREADS;
+  dparams.nthreads = NTHREADS;
+  cparams.blocksize = 0;
+  blosc2_storage storage = {.cparams=&cparams, .dparams=&dparams,
+                            .urlpath=tdata.urlpath, .contiguous=tdata.contiguous};
+  schunk = blosc2_schunk_new(&storage);
+
+  // Feed it with data
+  if (!tdata.shorter_last_chunk) {
+    for (int nchunk = 0; nchunk < tdata.nchunks; nchunk++) {
+      for (int i = 0; i < CHUNKSIZE; i++) {
+        data[i] = i + nchunk * CHUNKSIZE;
+      }
+      int64_t nchunks_ = blosc2_schunk_append_buffer(schunk, data, isize);
+      mu_assert("ERROR: bad append in frame", nchunks_ > 0);
+    }
+  }
+  else {
+    data_ = malloc(sizeof(int32_t) * tdata.stop);
+    for (int i = 0; i < tdata.stop; i++) {
+      data_[i] = i;
+    }
+    for (int nchunk = 0; nchunk < tdata.nchunks; nchunk++) {
+      int64_t nchunks_ = blosc2_schunk_append_buffer(schunk, data_ + nchunk * CHUNKSIZE, isize);
+      mu_assert("ERROR: bad append in frame", nchunks_ > 0);
+    }
+    int64_t nchunks_ = blosc2_schunk_append_buffer(schunk, data_ + tdata.nchunks * CHUNKSIZE,
+                                                   (tdata.stop % CHUNKSIZE) * sizeof(int32_t));
+    mu_assert("ERROR: bad append in frame", nchunks_ > 0);
+  }
+
+  // Get slice nchunks
+  int64_t *chunks_indexes;
+  rc = blosc2_get_slice_nchunks(schunk, &tdata.start, &tdata.stop, &chunks_indexes);
+  mu_assert("ERROR: cannot get slice correctly.", rc >= 0);
+  mu_assert("ERROR: wrong number of chunks.", rc == (tdata.nchunk_stop - tdata.nchunk_start));
+  int nchunk = tdata.nchunk_start;
+  for (int i = 0; i < rc; ++i) {
+    mu_assert("ERROR: wrong nchunk index retrieved.", chunks_indexes[i] == nchunk);
+    nchunk++;
+  }
+
+
+  /* Free resources */
+  free(chunks_indexes);
+  blosc2_schunk_free(schunk);
+  blosc2_remove_urlpath(tdata.urlpath);
+  /* Destroy the Blosc environment */
+  blosc2_destroy();
+
+
+  return EXIT_SUCCESS;
+}
+
+static char *all_tests(void) {
+  for (int i = 0; i < (int) ARRAY_SIZE(tstorage); ++i) {
+    for (int j = 0; j < (int) ARRAY_SIZE(tndata); ++j) {
+      tdata.contiguous = tstorage[i].contiguous;
+      tdata.urlpath = tstorage[i].urlpath;
+      tdata.nchunks = tndata[j].nchunks;
+      tdata.start = tndata[j].start;
+      tdata.stop = tndata[j].stop;
+      tdata.shorter_last_chunk = tndata[j].shorter_last_chunk;
+      tdata.nchunk_start = tndata[j].nchunk_start;
+      tdata.nchunk_stop = tndata[j].nchunk_stop;
+      mu_run_test(test_get_slice_nchunks);
+    }
+  }
+
+  return EXIT_SUCCESS;
+}
+
+
+int main(void) {
+  char *result;
+
+  install_blosc_callback_test(); /* optionally install callback test */
+  blosc2_init();
+
+  /* Run all the suite */
+  result = all_tests();
+  if (result != EXIT_SUCCESS) {
+    printf(" (%s)\n", result);
+  }
+  else {
+    printf(" ALL TESTS PASSED");
+  }
+  printf("\tTests run: %d\n", tests_run);
+
+  blosc2_destroy();
+
+  return result != EXIT_SUCCESS;
+}

From 464364b55468e5a78a3579ea05f9703011fd31bc Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Fri, 19 Jan 2024 09:11:16 +0100
Subject: [PATCH 2/3] Squashed 'src/zfp/' changes from e8edaced1..f40868a6a

f40868a6a Release 1.0.1
cd174e0bf Bump version number
6bdada5f3 Set release version and date
913e67fa7 Add dependency on six
a0bab6739 Add sphinx-fortran dependency
325bba6ca Add RTD v2 yaml file
c5879e222 Mention R&D 100 award
113767ee3 Add missing resolved issues to CHANGELOG
5f3ade7e2 Fix broken links
e892d713b Document that OS=mac is needed when building dylibs with gmake (resolves #170)
c81062e2b Clarify that 64-bit OS is not required (resolves #194)
63eeefa32 Fix for issue #220
c7f329ab8 Merge pull request #217 from hmaarrfk/distutils
d6bccbd71 Explicitly use old CMake FindPython policy
b345842fb Merge pull request #204 from diegorsjv/bugfix/fortran-interface-stride-passbyvalue
bcc5a2548 Update gitlabCI runner config [skip ci]
f838d6cfd Merge pull request #216 from LLNL/dependabot/github_actions/actions/checkout-4
83c6e47ec Update to support Python 3.12
c61eca71d Bump actions/checkout from 3 to 4
fa1014f5e Merge pull request #188 from DimitriPapadopoulos/actions
21f4f399d Merge pull request #215 from DimitriPapadopoulos/_WIN32
4c5d622ed Fix typo found by codespell
ad2f02b3e Testing _WIN32 is enough, no need to test _WIN64
920df4d96 Automatically update GitHub Actions in the future
d846d33fa Merge pull request #211 from LLNL/markcmiller86-add-rd-100-badge
6cdff3060 Add R&D100 badge
bc5e24145 update gitlab compiler versions
d2a52e633 update gitlab build options
2b523d92a Removed unused functions from python test utils
c1845815e Add void to functions taking no parameters (resolves #208)
300e77d12 Clean up minor issues in documentation [skip ci]
a3afa49f6 Clarify intent of zfp_stream_maximum_size() [skip ci]
3a90823d9 Clarify that thread safety requires OpenMP [skip ci]
fbca45407 Modified the set_stride_xd functions in zfp.f90 so that stride values are actually passed by value and not by reference as they were. This resulted in segmentation faults when using strided compression with the Fortran bindings
f15d00720 Update badges and actions names
b78055fa3 Organize actions and enable codecov upload
2c7cdeaa2 Add code coverage action
ee10feb73 squash merge misc/actions-fixes into develop
c4f175667 Create actions debug workflow for linux [skip ci]
0d7de48d7 Create actions debug workflow for macos
8a1dd0e0e Revert gitlab change
ff3a33926 update gitlabCI user
fc96c9158 Merge pull request #189 from LLNL/misc/build-tests
54c8706de Add BUILD_TESTING_FULL to documentation [skip ci]
b50a4613e update github actions flag
c578028e0 Add BUILD_TESTING_FULL option (issue #177)
28cb77742 Fix zfpy type issues (issue #171)
2ac91d785 Merge pull request #180 from DimitriPapadopoulos/actions
6c307aa70 Merge pull request #184 from seung-lab/fix_numpy_abi
4de8c1b7e CMake RPath fix (issue #181)
50dfaacdc build: use oldest numpy ABI to ensure maximum compatibility
92424bf65 Merge pull request #174 from seung-lab/wms_py_header_reader
228c7a48a Update GitHub Actions
e57895219 Merge branch 'develop' into wms_py_header_reader
b2ad09e55 Fix issue #176
51afffc05 fix: recommended changes
c05eba980 fix: remove strides from header dict
fc0a19c54 fix: cast everything to python int
adfeb3584 feat(python): add full header reader for zfp streams
c616bbec3 Update CHANGELOG
6d7d2424e Prevent LTO in libm dependency check (resolves #169)
fdc30ff69 Set ZFP_VERSION_DEVELOP
35155961e Merge branch 'release1.0.0' into develop
f39af7264 Release 1.0.0
82a557c82 Set release version and date
7f84893b1 Clarify ZFP_VERSION_DEVELOP meaning and value
074f01092 Point badges and documentation to release1.0.0 branch
1fc3539e9 Align license.rst with LICENSE
ed01090c8 Add version_tweak to zFORp
9ac63801c Document change in ZFP_VERSION mapping
fff64a14a Revert to gencodec for consistency with zfpcodec
07485e1a3 Update ReadTheDocs badge link
e1af8487b Bump copyright date, add copyright notice to zfp.hpp
2445a7c3c Bump zFORp_library_version
3ed74345d Ensure make clean removes libcfp
6c66c6652 Proofread and ensure documentation is up to date
9cc3a25b1 Update expected testzfp errors for large arrays
8efecf10c Fix various bugs in diffusionC
49db1371b Fix incorrect order of distance() arguments
afabe40bb Add cfp references, pointer, iterators, serialization to CHANGELOG
302d0a734 Fix C89 incompatibility in iteratorC.c
a9203c644 Add missing const_array::params() and const_array::set_params()
c6974acec Add zfp_field_blocks() to zFORp
46deb6105 Update execution doc with planned future capabilities
e4357d0e2 Cosmetic changes to compression modes doc
f88a41319 Add variable-length code example
3deba5f1b Add Apple clang OpenMP instructions
bc29521be Update authors, zfp homepage URL
43d29affc Update links to zfp applications
c9ec980f7 Update zfp homepage URL
3c5445648 Update authors and contributors
4ce851be7 Update versions.rst to align with CHANGELOG.md
44d4f849d Remove unused variable in decoder test
9a9a22637 update doc versions and add some missing changes
962a48bde update version in license and tests
c86be611d Update zfpy version number
c99a41f4c Fix mismatched memory (de)allocation calls
6825ee263 Add missing space between literal and identifier
91f1823fa Update version identifiers for next release
cb618438b Correct suffix for examples/array2d.h
539db0866 Update CHANGELOG to reflect directory restructuring
1c66f76b7 Silence compiler warnings in tests
b2ea04160 Remove -I../array
1bcbf0ecb Ensure proper make clean in examples
8eecbfbb5 Document new name for zFORp module
ec96f631f Document lower bound on maxbits in expert mode
c597619ce Fix compiler warnings in tests
b1ce0c16b Merge branch 'develop' into refactor/install-directory
73aeb4644 Fix compiler warnings/errors in examples
7edcd917d Update time limit on gitlab tests
725f5cd7f remove fortran modules directory from git tracking
982ca8683 Update cfp header includes, rename main cfp header
0f530708a Silence copy constructor warning
54966d1e5 Fix gmake build issues
7c9b4d23d Merge branch 'develop' into refactor/install-directory
81dbdd02d Fix Fortran GNU builds and compiler requirements
97fa1d75b Disable thread safety checks when OpenMP is absent
6c46dccf9 Update makefiles and fix a few inconsistencies
f08994139 Clarify that low-level API ignores execution policy
736581dd4 Silence implicit conversion warnings
86ffc4f20 Rename ZFP_VERSION_DEVELOP, remove ZFP_VERSION_RELEASE
0135c037e Merge branch 'bugfix/thread-safety' into develop (resolves #154)
20a2efdcb Use zfp_field_blocks() in codec
5c424bbff Update CHANGELOG [skip ci]
11f992849 Enabled OpenMP with testviews
e89d80383 Handle empty field in zfp_field_blocks()
0447f7e56 Make codec thread-safe
b9166ad1a Add LDFLAGS to support -lomp on Mac
5190cae2d Add zfp_field_blocks()
e1ac5e4ee Merge pull request #165 from DimitriPapadopoulos/codespell
20e62c582 Fix typos found by codespell
05b3f4c56 Update fortran module name and move internal codec headers
c13a6c7b9 Update bitstream.c include in cuda [skip ci]
9e78178a4 Fix include in zfpy
3674669df Update zforp install location
5fd6de39f update zfp util
f56070f6a updated tests fixed additional missing includes
615e86f4b Update headers in examples and fix bad include
eeac3a28a Updated cfp includes, fixed build issues
c73261887 Update cmake
b2366c0e7 updated inline src and moved cfp src
4aafbc81a Update hpp include guards
afe6a01f9 Update includes for internal headers
013210b06 Update includes for all non-internal headers
e1e4840c0 Move include files
9b0d278f8 Update where gitlab cpu tests are run
26678cded Replace stdio with iostream in testviews
bb59d8457 Fix minor documentation issues
537c12052 Add compressed-array example
cc2e85008 add policy mode validation checks
938ffd2e9 change exec policy params to use void*
7335959b8 Merge branch 'refactor/stream-offsets' into develop
24bcfa28b Merge branch 'develop' into refactor/stream-offsets
2d975ce89 Correct type mismatches to silence compiler warnings
bf718a604 Update bitstream types in CUDA tests
9a1e3409f Introduce bitstream_{offset,size,count} types for 64-bit support
b40ab53dd temporarily disable actions side of cmake test
9dcf734c8 temporarily disable actions side of cmake test
72fcac571 Add cmake version checking to tests
980155a8e Merge pull request #162 from jonashaag/patch-1
cf7bd9eac Fix exception handling in compress_numpy
5d3ac203e Revert accidental change to ZFP_VERSION_RELEASE define
0335096e6 Fix version.h compile error
dd915b95a Update version files to support intermediate version numbers
15d77178c Move memory management to internal namespace
0f06782e4 Rename word -> stream_word
1d6fbe597 Silence compiler warnings in tests
f49a7fe5d Silence compiler warnings in tests
baaa782ac Remove debugging code from diffusion example
76d684ded Update CMakeLists to support OpenMP with AppleClang
19b261d6c Add macros for constructing version number and string
ad926b6a8 Add back references to execution section in docs
f6880a6a6 Add #150 to CHANGELOG
d36f54d9b Fixes #150
6bfd003ab Revert attempted fix for #150
e620e546d Undo failed attempt to fix #150
a7c6047fa Attempted fix for issue #150 (cmocka macOS build)
da4e28293 Allow setting BIT_STREAM_WORD_TYPE on make line
33065d237 Add support for generic codec in diffusion example
f4e0850c6 Clarify that rounding mode, daz supported only on CPU
2b03babb8 Add conda zfp package and zarr application to docs
4a33aab52 add missing zfp field and stream tests
38e3cbbe2 Silence const compiler warnings in ppm
aeb707c1d Add CUDA tests for unsupported 4D data
5fe5ed198 Change name of actions workflow
ba2df4bcd move gitlab cpu tests to new system
a674bf58b Remove Travis CI files
fd5731846 Move status badge from travis to actions
64e5db65e Update python cmake build scripts
7ad847694 Add python build hints to tests
50774dcfd Enable cython action temporarily disable win tests
d42fb0eea enable actions pytests and split py adv decompress test into bytes/memview versions
afac3f94f made test output more verbose
31902956b remove unneeded test code
84fb3d880 Make array::set(0) zero-initialize array
323e3e708 Move Cache and Store to internal namespace
eb6cd2873 Bug fix: reopen codec when compacting store
122918c4c Fix incorrect loop counter types in simple.c
662fc0f42 Add size_bytes() to cfp
05156f50d Replace ieeecodec with generic codec templated on scalar type
8441d7063 Fix inconsistent coding style in zfpcodec
142a11f72 Setup github actions config
5555333ae add promote/demote util tests
b91dcafb4 Added field_metadata failure tests
6aec9dc42 compilation error fix
de381394f Fix unused variable warning
d65af032c Add link to MVAPICH2-GDR
5c991daae Add/rename top-level files for E4S compliance
99e526cd5 Document CUDA limitations
22beacb09 Document ZFP_WITH_CUDA in config file
57c50e038 Add FAQ on error distributions
bbfb88db8 Clarify FAQ #17 on violated tolerances
1cd5978fb Add printf FAQ
95dc35828 Merge pull request #148 from vicentebolea/cmake-fix-include-dir
b6f344ff4 CMAKE: ZFP_INCLUDE_DIRS avail at configure time
310e12d51 Merge pull request #147 from vicentebolea/add-zfp-cuda-var
670368238 Update zfp-config.cmake.in
f5d8c2824 Add templated encode/decode tests
f65ac4434 Move cuda testing to new machine
5079b8958 Extend and include testviews in tests
2d0ba85f8 Fix view const accessors
a922f6408 Clean up .travis.yml
1a4565c6f Force gcc-4.9 on Travis CI
416e19573 Require CMake 3.9 in tests
af933bb99 Suppress CMake warning about unused ZFP_OMP_TESTS_ONLY
f718c5616 Add nullptr_t proxy pointer constructors
d83d3432c Update README build report badge
bfbaa9929 Undo attempted codecov fix
1e74a04c4 Add mask parameter to header.size_bytes()
625691784 Bug fix: overflow due to subnormals (resolves #119)
3f768fad6 Fix zfp_field bugs that confused precision and byte size
ecbbf7633 Fix missing codecov uploads
ef8b8791a Document C++ wrappers for low-level API
8e790cbf5 Update installation instructions
881644019 Complete citation record
0ff612eca Merge pull request #141 from researchapps/add/github-citation
cd14c0633 GitHub has recently added this standard file, CITATION.cff, that will render a sidebar link to easily copy paste bibtex information to cite the work. This should be useful for zfp
33eed2fb0 Fix missing size_t and declarations in tests
c3a7b3343 Fix missing size_t types and casts
191073760 Fix C89 incompatibility in tests
a85609f07 Build cmocka using prescribed build type for MSVC
149de0a54 Update cfp tests to use size_t
416d16b82 Complete size_t transition for array tests
83d572f17 Fix broken C89 support in tests
45c9e82bf Silence warnings and use size_t over uint in tests
7450b7282 Silence miscellaneous compiler warnings
ce5c5b3af Silence compiler warnings
4e49b3180 Fix gtest deprecation warnings
6443612cb Merge branch 'develop' of github.com:LLNL/zfp into develop
02872e2a0 added templated encode/decode block tests
722782608 Merge branch 'refactor/64-bit-indexing' into develop
954390e19 Silence compiler warnings in tests
2a2ff3571 Update docs with new zfp_field and function signatures
28a3858b3 Silence compiler warnings in tests
d97e589b6 Update examples and utils to use size_t array dimensions
b756eb043 Handle missing size_t/ptrdiff_t
c3672137c Update Fortran API to support 64-bit indexing
64f71dde8 Update Python API to support 64-bit indexing
5f3ad37d6 64-bit indexing: replace uint/int with size_t/ptrdiff_t
24bcaf80e Organized tests/array directory
ea8124f8d merged cpp namespace into zfp namespace
01ccb95d3 Clarify constraints on rate in docs
b47749f56 changed gtest version to fix compilation issues with non-gnu linux compilers
496a416c3 fixed macro expansion issue (commit issue fix)
1c5bc2ab7 fixed macro expansion issue
c9adc2a4a fixed macro expansion issue
1f5fb92ab Rename Travis files
62f350340 Merge branch 'feature/const-arrays' into develop
5bdd22f37 Merge branch 'develop' into feature/const-arrays
26c5eb9df Do not reset rate in zfp::index::implicit::clear()
a571d4350 Add appropriate casts around CHAR_BIT * sizeof() constructs
5c7f6ecc4 Add dispatch function for integer encoding/decoding
6cf405a27 added in const array bitstream checksum test
5f6394ee9 disable travis tests temporarily
6a7364b3c Fix docs on array serialization constraints
cea4ff6ce Merge branch 'feature/unbiased-error' into develop
8a4487e75 Document ZFP_ROUND_* macros
bd2b1545c Fix CUDA rounding issue due to incorrect NBMASK
3205d7219 Add rounding modes to CUDA implementation
4f3f1d4a7 Disabled travis linux testing due to new usage restrictions
caf14c7f1 Added index names to test reporting
a949b1c55 Added block index tests for const arrays
2968e8e0e Specialize coding for unconstrained rate
74a11efef Merge pull request #137 from jwsblokland/bugfix/cmake
2fba7f690 CMake: (fix) OpenMP dependency.
f25982add Merge branch 'misc/travis-update' into develop
eb1d4c2d3 Discontinue xcode7.3 tests
e5a55f762 Fix build errors
4f409922f Added const array checksum tests
97044629f Only call cudaGetDeviceProperties once.
fcae077d9 Add ability to specify CUDA compilation flags.
8a4db7fb0 Added in tests for multiple precision/accuracy compression mode values
fb4d7a613 Fixed uninitialized value error in test fixture
09a9f7614 Initial work on const array tests
2d92b09d9 moved version info to zfp/version.h header
6389b89d3 Implemented fixes discussed in issue #126
12772bd48 Fix incorrect math in relative-error bound FAQ
a51171f14 Document promotion/demotion of scalars
3ddbd24d5 renamed header.size() to header.size_bytes() to better clarify its functionality
6b2da922e strip out redundant CI builds
11341f46f Add new rounding mode: ZFP_ROUND_FIRST
6d24ac9f1 Fixed various CI test issues
928bbceb2 refactored GitlabCI config to be more extensible
f3736ca59 Update diffusion documentation
321ce7fcd Add codec and index documentation
68099ac68 Add const_array support for expert mode
1d64a1613 Fix off-by-one error in ZFP_MAX_BITS
aabe13f93 Documentation: Use checkmarks in tables
8ded86bd6 Add FAQ on C vs. Fortran order
ebe66d628 Add FAQ on compressed storage size
d7686bc49 Bug fix: compressed block size in reversible mode (resolves #127)
35bc789ab Rename zfp::codec derived classes
d91176121 Rename conflicting zfp_stream* codec::zfp::zfp
6b6602b6d Add zfp::codec namespace
24f9b0708 Add support for const_array in diffusion example
d2932309e Fix incorrect rate granularity in FAQ
c733c923e Document const_array rate granularity and zero initialization
1d38a7f02 Refactor and fix bugs in arrays, store, index, codec
6993e72da Add exception on cache overflow
93780bb1c Add size_bytes() to fixed-rate arrays
4abe3d5ca Fix incorrect cache size initialization
71d24c018 Add missing size_bytes() for block stores
4142c9995 Remove debug info
bb4e567a5 Add const_array documentation
9f2b09c88 Add missing size_bytes() for block caches
fee7e80eb Consolidate shape() and block_shape() functions
b2816e2ef fix compiler error during cuda testing
0b4f9a3b8 Add 1D, 2D, 4D const_arrays
a56a749fb Merge branch 'develop' into feature/const-arrays
dd174cd21 Update documentation on exceptions
0ecdc57cd Add zfp_field layout functions
4a0d1f3eb Use zfp_bool in examples
0ceec10cc Ensure diffusionC builds with cfp
ccdec24e3 Add zfp_bool documentation
53b69c4dc Document ppm example
ee3c9fe23 Refactor array/cache/store to support const_array
10612d476 Exclude redundant header inclusion
aa71d4ade Improve POD memory allocation
350867321 Add inclusion of missing headers
1de54120e Add zfp_config for const_array constructor
695886a76 Move install command to utils subdirectory
f4e559860 Change gtest build paths (resolves #108)
ab07ab956 Remove zfp utility as install target
1fa8dbbef Add zfp utility as install target (resolves #122)
a215e19c7 Merge branch 'bugfix/omp-large-arrays' into develop
67ab4c983 Avoid Win32 size_t overflow
b09d079aa Resolves #125
1051621b7 Pick up .travis.yml fix
915e04141 Squashed commit of the following:
f784bbebd Revert to OpenMP 2.0 to accommodate MSVC
e525e484e Fix OpenMP 32-bit int overflow; require OpenMP 3.0
f668af3d5 Add support for querying rate, precision, accuracy
d24b528e9 Fix bad link in file compression docs
a96940074 Bug fix: CUDA bit streams limited to 2^31 bits
8d09ee13d Silence compiler warnings
abc769647 Correct comments/code for coding partial/strided blocks
4fd729283 Rename cfparrays.*; add cfp/src/template directory
779b36653 Minor improvements to simple.c
5cefb373e Clarify zfp_read_header() documentation
7e68696db Consolidate intro and overview sections
1829523e6 added small fixes for zfp testing, new gitlab CI functionality
74364a8a4 Merge branch 'feature/cfp-references' into develop
71f13b819 Refactor cfp header API to align with accessors
ccdc3ec68 Silence PGI compiler warnings
5feae4ca4 Support no alignment via ZFP_CACHE_LINE_SIZE=0
ebb172d00 Propagate zfp_private_defs to tests
a048b3f58 Expose ZFP_CACHE_LINE_SIZE to address PGI compiler bug
e11004ae1 Add cfp serialization documentation
bf09645b6 Update Makefiles to simplify macOS builds
7eb57fa02 Clean up cfp docs
7ab1ed615 Update docs to use zfp_bool
9c6032bd8 Make cfp its own section
6f111ce21 Update cfp documentation
61f7b2caf Add zfp_bool documentation
ac22c5847 Clean up merge and :c:expr: Sphinx issues
6db9cf049 Merge branch 'feature/cfp-references' into develop
661cd2aaa Add cfp multidim iterator tests
dca15d47d Ensure consistent cfp parameter names and APIs
72e4fd1fe Remove unnecessary casts
b2e658527 Add iterators to diffusionC; ensure C89 compatibility
88906c395 Add cfp random-access iterators and const qualifiers; refactor header
03f213bab Rename and reorder accessor members
f7682e657 Refactor cfpheader to avoid circular inclusion
87b770ea1 changed booleans from int to zfp_bool type
6b8e03dd8 Added cfp header tests
b20b2bb40 added header functionality to cfp
75c2a59cf Avoid importing unused cpython array for compatibility with pypy
50aa4f193 changed uint8_t and memoryview test
29d7de129 change to void, add tests similar to ensure_ndarray
5cb7a3253 remove print cython version
89b19d3f5 remove apt-get cython3
a4d6edac1 change cython3 to cython
2739f2e7a upgrade pip first
2fc75483f change back to pip
1d9524a59 remove upgrade pip
b560ddcbf use pip3 instead pip
66d45d1c6 change cython3 in .travis.yml
0bed27e1d enable cython3 upgrade on xenial
3c2c06128 add cython version check in travis.sh
8269e09b2 check cython version in zfpy.pyx
bf05fb43d change to void *
f0f824063 add <char*> in stream_open
31d8c51c2 remove const for test
a3c572a10 Update python/zfpy.pyx
14da51149 Update python/zfpy.pyx
a75ef2615 replace bytes of compressed stream to uint8_t
1f4973df4 fixed missing semi-colon
ee58ae344 changed cfp 4d fixed rate tests to prevent future problems
a36d4063b Fixed issues with 4d checksum tests
1c3123355 Fix failing 4D cfp test due to lack of precision
29c4f5a9d Changed out of date uint references to size_t in docs
0541ec97c Added 4d cfp documentation
15187ed8b Disabled broken checksum tests
42d3c7fe8 Added cfp 4d tests
a79c49305 Merge branch 'develop' into feature/const-arrays
0817b0865 Add documentation for 4D arrays
a5959fe02 Use python3 with sphinx documentation
6f829dd50 Enable 4D array support in cfp
78c2c51f8 Merge branch 'develop' into feature/cfp-references
c6bcb0988 Remove temporary fix to private_view::get()
2fbe0084a Merge branch 'refactor/arrays' into develop
8b794fb8b Fix ppm issues with clamping; add chroma dimensionality parameter
33149742a Add support for 1D chroma blocks in ppm
1fd3a5f80 Revise zfp_bool type for C++ and update function signatures
50b635b45 Add tests for 4D compressed arrays
635db5c5b Fix minor issues with 3D array tests
915ba2159 Fix copy-paste errors uncovered by tests
bdb2623ce Fix ppm range expansion bug
5816bcde1 Add ppm example
d2c844c6d Add pointer and iterator tests for views
6a544b056 Add missing const_pointer and const_iterator friends to views
8e1d8dcd1 Fix testzfp-large tolerances for new default cache size
23bb66f67 Remove obsolete 4D deserialization test
935c38937 changed ctor name
67383cc4a Correct order of arguments in cfp {ptr,iter}.distance
bf58a4ad0 Prepare cfp for 4D arrays
02a403ee7 Add testzfp tests for 4D array classes
fdd14c9e4 Add 4D compressed-array classes
044cbe14f Add offset dereference operator tests for pointer and iterators
35d5c8cb1 Fix copy-paste error in iterator3
9761ee91c Update docs for iterators and views
cf899daf0 Fix bad typedefs in iterators
9bf999348 Refactor accessors using container template
29cbdf760 Changed cfp to use safer static casting
6bd6d7248 Change to cfp header/array typedefs to solve issue with pre gcc 4.8
fa0d09c45 add cfp_header stubs to prep for new zfp header class
5b687458d Promote all iterators to random access; prepare for view iterators
19720bf79 Clean up documentation
520596bdc Fix incorrect reads through private_view
f3e1ca9a5 Fix zfp_stream_maximum_size() estimate in reversible mode
ca97dfdc1 Extra cleanup & organization pass for cfp tests/docs
d1d185c11 fix iteratorC formatting to match new output types
97749bd02 Add missing functionality for 2d/3d cfp_iter, move cfp fully from uint/int to size_t/ptrdiff_t
3bdb7fa31 zfpy 0.5.5
8f81ae47d Update FAQ on relative error
2dafa9f81 Update cfp pointer docs
8487b618e Add missing functionality for 2d/3d cfp_ptr
d83c2c81f Fix cfp naming consistency, add missing 1d functionality
68bc279be Mention const pointer availability
b5255c5a0 Update docs to reflect 64-bit support
0df472c3c Small tweak to cfp docs
2104f25cc Add const_pointer tests
97818431d Add const iterator tests
0086df3a8 Add pointer comparison tests
a29e83a99 Const accessor constructors must accept const containers
3a33078ba zfpy 0.5.5-rc5
cf8740a1d zfp 0.5.5-rc4
94ac5c00f Add alignment, header tests
739ea17c0 zfpy 0.5.5rc3
61048ab23 zfpy rc2
37096f146 Update docs for new const accessors, serialization, etc.
9d0d1df3a Fixed iteratorC to match output with iterator example
1167e3d5c Docs changes: fix ptr/iter names, add missing functions, fix broken links
ba0d5f46e Add zfpy-wheels long description
64f1b58af Update version
8d0f76fd2 Windows friendly setup.py
33f9c6657 Add URL
0f148ecf6 Build wheel with setup.py
35b5cafd0 Workaround for conflicting container_type declarations
ad613305d Resolve conflicting container_type in private views
1c4b9d9be Replace array with container_type in views
2ecc4686f Make accessors return const_reference; add pointers to views
cc332853e cfp accessor documentation first pass
27aca9c3d Fix iterator ~3u bug
20d81aa21 Add full 64-bit support to compressed arrays
da4a71f2b Silence type conversion warnings
f7893523f Rewrite zfp codec to use zfp::cpp templates
80b7841a2 Add C++ wrappers around low-level C API
470a743d8 Add typename to please MSVC 14
6a7ee0935 Change header API to rely on constructor
e30a70a73 Replace last occurrence of stream_word_bits
45c508d00 Plug memory leak in zfp::array::construct
9548b9404 Plug memory leak in block store
abc6922ee Clean up codec and use stream_alignment()
90009fd0c Add stream_alignment() function
4e6f0315d Fix using declarations for MSVC
a42b52d6f Work around googletest iterator bug
f6a8be00f Update compressed_data() API in cfp
bc55d5e77 Fix codec constructors
4d4001e8e Remove obsolete code
81e577dd8 Refactor array serialization
24270e887 Add Boolean type
229a37df6 Respect cache size request
6f0b78e42 Move codec specializations
6e7c12b5f Use std:: prefix in memory allocation where appropriate
878b329c7 Add type traits
3539cc849 Reinstate private views
4f2525c92 Fix incorrect cache size initialization
a688d9de4 Initialize array dimensions in constructor from view
efb78225b Move codec from BlockStore to BlockCache
086a156ad Rename storage -> store
d8b099c91 Fix codec source file names
a7b5a6d52 Update tests to support new array API
d2ca8ae63 Add new block storage + cache and refactor arrays + views
ecd4fb496 Add specialized allocator for void*
3b12f7c08 Made zfp memory alignment a compile-time macro
91e702f73 Add missing cbegin/cend to 2D and 3D arrays
b98c864b4 Update 3D references, pointers, iterators, and views
a2a7a2cf0 Fix incorrect pointer2 comparisons
e8e1000c8 Update 2D references, pointers, iterators, and views
b05afe923 Fix inconsistent relational operators
47e4c7cea Replace 1D view_reference with new generic reference
90eff6506 Traverse array using new const iterator
aacc4989a Refactor 1D references, pointers, iterators and add const versions
079c409fc Fix Cython 'Assignment to non-lvalue' errors
2d7873710 Remove trailing semicolons which Cython treats as errors
a1a876c96 Fix make install clobbering existing gtest installations
fc0ecac33 Optimized cfp references and arrays based on results of testing
c4029cf48 Added cfp iterator example
08adb27db Fix typo in CXXFLAGS
0202b0be3 Check for undefined __STDC_VERSION__ to suppress warnings
bc2fe6bf9 Add reversible mode to simple.c
1bb7955a6 Minor documentation changes
5254963d9 Added iterator tests
18f6b8ad4 Added proxy iterators
bbcded585 Fix AppVeyor badge URL
74e8a145c Fix bad URLs
e1b45152d Reorganize README file
a05e2e529 Update license info per LLNL guidelines
bcea89307 Update Appveyor project URL
05ef34110 Added tests for pointers and array flat ptr/ref
26be0ff9b Added pointer shifting/arithmetic and flat array ref/ptr accessors
c8969dfa0 changed references to linear indexing
4079af78b Implemented cfp pointers
5710f2913 Removed unnecessary export_ macro.
ee7ecc233 Revert "Minor optimization of lifting transform."
92fca13de Minor optimization of lifting transform.
4cb27e2a6 Fixed linker errors.
aa6266927 Fixed LNK4217 linker warnings when using Visual Studio. Also removes .lib and .exp files created along executable file of a project using static zfp library.
b515bd112 Fixed linker error LNK1114 when using Visual Studio. The fix required a change of name of zfp command line executable.
2ab06b3d7 hotfix: Fix wrong memset params order
b1a4f8a0c set and copy added to cfp references, diffusion example updated to use new API
d62a7f9e6 added get_ref to cfp arrays and get (value) to cfp refs (with associated tests)
af30bdc24 Changed cfp_array initialization syntax to fix windows build issue
28decba39 cfp_arrayALL changed from raw pointer to struct wrapped pointer (prep for cfp refs)
e748a9c96 Clean up code
edf135f5a Add compressed block index
bcf97058c Add variable rate support
f80c5103c Add uncompressed block index
40ae50ce1 Return block size in codec
e9f4743cd Initial pass for 3D arrays
40ef12b71 Merge branch 'develop' of github.com:LLNL/zfp into develop
814c11086 Add reversible mode to simple.c
6fe05a5d6 Minor documentation changes
fa79823c7 Merge branch 'misc/checksum-gen-tool' into develop
1c6144a99 updated cython for new checksum API
b82d9e836 TODO: update cython for new test utils APIs
1a32165eb Update cpp and CFP tests to conform to new checksum API
e75a0b111 Extend zfpChecksumsLib API for original-input specific function (omits irrelevant parameters)
32b4637ad Checksums now support tuples (key1, key2, value), where new key2 encodes array dimension lengths. Encode, decode, endtoend all updated
93b602ce2 Checksum (key, value) lookup working for encode, decode, and endtoend tests. zfpChecksumsLib updated to handle (key, value) lookup
9e8a000ea Checksum generation working for encode, decode, endtoend tests, across {dimensionalities 1-4}x{all scalar types}
00b944b01 endtoend tests can print (key, value) = (descriptor, checksum) pairs when compiled with flag PRINT_CHECKSUMS
881d8418d Update links to zfp website
6da6eff20 updated cython for new checksum API
d85b6fa48 Remove redundant CUDA h2d transfers
3119f94df add short mention of third-party tools in overview
a8def9af5 adding a short note and link for h5z-zfp
068f8150f Merge pull request #61 from maddyscientist/feature/cuda_encode_unroll
110a4945e Merge pull request #67 from maddyscientist/feature/cuda_decode_unroll
4cfd5882f TODO: update cython for new test utils APIs
070e4e4b6 Update cpp and CFP tests to conform to new checksum API
cf8c779d6 Extend zfpChecksumsLib API for original-input specific function (omits irrelevant parameters)
bf577f528 Checksums now support tuples (key1, key2, value), where new key2 encodes array dimension lengths. Encode, decode, endtoend all updated
64ef0ea9f Checksum (key, value) lookup working for encode, decode, and endtoend tests. zfpChecksumsLib updated to handle (key, value) lookup
208c6bbf7 Checksum generation working for encode, decode, endtoend tests, across {dimensionalities 1-4}x{all scalar types}
680fbb941 endtoend tests can print (key, value) = (descriptor, checksum) pairs when compiled with flag PRINT_CHECKSUMS
034f37bde Fix missing CUDA test function from previous refactor commit
047da679a Merge branch 'refactor/combine-testcases' into develop
7b75e6ce6 Remove fatal fail_msg() calls from setupCompressParam() becuase it would terminate some looping tests early
ec7771247 Minor test changes: print uint64 in hex format, add "ERROR: " prefix to nonfatal print statements
cb784f8f1 Refactor compress/decompress, and hash bitstream/array to separate functions for re-use in endtoend tests
7c751efab Combine endtoend testcases for CUDA, similarly to past 2 commits. Also perform some refactoring across endtoend testcode
a608eb9b0 Combine testcases for OpenMP endtoend tests, such that one testcase now tests 1 compression mode, varying the compression parameter (3), threadcount (3), and chunk size (3).
32fa7e76a Combine testcases for serial endtoend tests, such that one test case tests a compression mode across all 3 compression parameters, covering all compress/decompress checksum comparisons, without prematurely ending at the first failure
045ca9cbb Refactor encode/decode block special value tests (10 testcases become 1)
a7e77cc1f Fix typos in CFP documentation
ede3bee7b Merge branch 'develop' into feature/cuda_decode_unroll
989e1aa6c Merge pull request #1 from LLNL/develop
b7dcdb019 Unroll optimization for CUDA encode scatter partial functions
ae26c99ce Merge pull request #65 from maddyscientist/feature/cuda_inline_constants
761127592 Let the compiler inline the permutation lookup table into registers rather than using __constant__ memory
4444fd309 Fix C++ undefined behavior with last commit.  All tests now pass.
bee1d74fa Add support for unbiased errors and tighter error bounds
9cb007ff2 CUDA optimization: unrolling optimization gather_partial functions
f76320231 Fix RuntimeError using int64 in zfpy on Windows (Pull Request #60)
d488a6f69 Appveyor: build zfpy on MSVC release builds with python 2.7 and 3.5 (expect zfpy failures on MSVC)
8f1756928 Fix mixed declarations and code
c0a7acc4f Update contact email
5384b5715 Merge branch 'release0.5.5' into develop

git-subtree-dir: src/zfp
git-subtree-split: f40868a6a1c190c802e7d8b5987064f044bf7812
---
 .github/dependabot.yml                        |    7 +
 .github/workflows/coverage.yml                |   49 +
 .github/workflows/debug-linux.yml             |   29 +
 .github/workflows/debug-macos.yml             |   29 +
 .github/workflows/tests.yml                   |   78 +
 .gitignore                                    |    4 +
 .readthedocs.yaml                             |   35 +
 .travis.yml                                   |  303 ----
 CHANGELOG.md                                  |  432 ++++++
 CITATION.cff                                  |   17 +
 CMakeLists.txt                                |  117 +-
 CONTRIBUTING.md                               |   13 +
 CTestConfig.cmake                             |    3 +
 Config                                        |   86 +-
 LICENSE                                       |   66 +-
 Makefile                                      |    4 +-
 NOTICE                                        |   21 +
 README.md                                     |  199 ++-
 SUPPORT.md                                    |   11 +
 VERSIONS.md                                   |  298 ----
 appveyor.sh                                   |   18 +-
 appveyor.yml                                  |   78 +-
 array/zfp/header.h                            |   19 -
 array/zfp/headerHelpers.h                     |  144 --
 array/zfp/iterator1.h                         |   38 -
 array/zfp/iterator2.h                         |   42 -
 array/zfp/iterator3.h                         |   50 -
 array/zfp/memory.h                            |  145 --
 array/zfp/pointer1.h                          |   30 -
 array/zfp/pointer2.h                          |   42 -
 array/zfp/pointer3.h                          |   48 -
 array/zfp/reference1.h                        |   27 -
 array/zfp/reference2.h                        |   27 -
 array/zfp/reference3.h                        |   27 -
 array/zfp/view1.h                             |  291 ----
 array/zfp/view2.h                             |  393 -----
 array/zfp/view3.h                             |  445 ------
 array/zfparray.h                              |  286 ----
 array/zfparray1.h                             |  297 ----
 array/zfparray2.h                             |  324 -----
 array/zfparray3.h                             |  338 -----
 array/zfpcodec.h                              |   17 -
 array/zfpcodecd.h                             |  149 --
 array/zfpcodecf.h                             |  149 --
 array/zfpfactory.h                            |   98 --
 cfp/CMakeLists.txt                            |   37 +-
 cfp/{src => }/Makefile                        |   12 +-
 cfp/cfp.cpp                                   |  868 +++++++++++
 cfp/cfparray1d.cpp                            |   21 +
 cfp/cfparray1f.cpp                            |   21 +
 cfp/cfparray2d.cpp                            |   21 +
 cfp/cfparray2f.cpp                            |   21 +
 cfp/cfparray3d.cpp                            |   21 +
 cfp/cfparray3f.cpp                            |   21 +
 cfp/cfparray4d.cpp                            |   21 +
 cfp/cfparray4f.cpp                            |   21 +
 cfp/cfpheader.cpp                             |   21 +
 cfp/include/cfparray1d.h                      |   37 -
 cfp/include/cfparray1f.h                      |   37 -
 cfp/include/cfparray2d.h                      |   39 -
 cfp/include/cfparray2f.h                      |   39 -
 cfp/include/cfparray3d.h                      |   40 -
 cfp/include/cfparray3f.h                      |   40 -
 cfp/include/cfparrays.h                       |   28 -
 cfp/src/CMakeLists.txt                        |   38 -
 cfp/src/cfparray1_source.cpp                  |   23 -
 cfp/src/cfparray1d.cpp                        |   15 -
 cfp/src/cfparray1f.cpp                        |   15 -
 cfp/src/cfparray2_source.cpp                  |   35 -
 cfp/src/cfparray2d.cpp                        |   15 -
 cfp/src/cfparray2f.cpp                        |   15 -
 cfp/src/cfparray3_source.cpp                  |   41 -
 cfp/src/cfparray3d.cpp                        |   15 -
 cfp/src/cfparray3f.cpp                        |   15 -
 cfp/src/cfparray_source.cpp                   |  106 --
 cfp/src/cfparrays.cpp                         |  183 ---
 cfp/template/cfparray.cpp                     |  136 ++
 cfp/template/cfparray1.cpp                    |  332 +++++
 cfp/template/cfparray2.cpp                    |  468 ++++++
 cfp/template/cfparray3.cpp                    |  522 +++++++
 cfp/template/cfparray4.cpp                    |  576 ++++++++
 cfp/template/cfpheader.cpp                    |  166 +++
 cmake/appveyor.cmake                          |   16 +
 cmake/travis.cmake                            |   87 --
 docs/Makefile                                 |   23 +
 docs/make.bat                                 |   36 +
 docs/requirements.txt                         |    4 +
 docs/source/algorithm.rst                     |  155 ++
 docs/source/arrays.rst                        |  859 +++++++++++
 docs/source/bit-stream.rst                    |  303 ++++
 docs/source/caching.inc                       |   38 +
 docs/source/cfp.rst                           |  983 +++++++++++++
 docs/source/codec.inc                         |  258 ++++
 docs/source/conf.py                           |  181 +++
 docs/source/contributors.rst                  |   24 +
 docs/source/defs.rst                          |   42 +
 docs/source/directions.rst                    |   85 ++
 docs/source/disclaimer.inc                    |    9 +
 docs/source/examples.rst                      |  205 +++
 docs/source/execution.rst                     |  310 ++++
 docs/source/faq.rst                           | 1270 +++++++++++++++++
 docs/source/high-level-api.rst                | 1047 ++++++++++++++
 docs/source/index.inc                         |  157 ++
 docs/source/index.rst                         |   32 +
 docs/source/installation.rst                  |  440 ++++++
 docs/source/introduction.rst                  |  216 +++
 docs/source/issues.rst                        |  414 ++++++
 docs/source/iterators.inc                     |  238 +++
 docs/source/license.rst                       |   59 +
 docs/source/limitations.rst                   |   87 ++
 docs/source/low-level-api.rst                 |  437 ++++++
 docs/source/modes.rst                         |  261 ++++
 docs/source/pointers.inc                      |  187 +++
 docs/source/python.rst                        |  155 ++
 docs/source/references.inc                    |  106 ++
 docs/source/requirements.txt                  |    3 +
 docs/source/serialization.inc                 |  215 +++
 docs/source/setup.py                          |    3 +
 docs/source/testing.rst                       |   18 +
 docs/source/tutorial.rst                      |  620 ++++++++
 docs/source/versions.rst                      |  456 ++++++
 docs/source/view-indexing.pdf                 |  Bin 0 -> 121307 bytes
 docs/source/views.inc                         |  863 +++++++++++
 docs/source/zforp.rst                         |  884 ++++++++++++
 docs/source/zfp-rounding.pdf                  |  Bin 0 -> 48460 bytes
 docs/source/zfpcmd.rst                        |  259 ++++
 examples/CMakeLists.txt                       |   21 +-
 examples/Makefile                             |   47 +-
 examples/array.cpp                            |   42 +
 examples/array2d.h                            |   58 -
 examples/array2d.hpp                          |   72 +
 examples/diffusion.cpp                        |  422 ++++--
 examples/diffusionC.c                         |  220 +--
 examples/inplace.c                            |    2 +-
 examples/iterator.cpp                         |   10 +-
 examples/iteratorC.c                          |   97 ++
 examples/pgm.c                                |    2 +-
 examples/ppm.c                                |  390 +++++
 examples/simple.c                             |   25 +-
 examples/speed.c                              |    2 +-
 fortran/CMakeLists.txt                        |   16 +-
 fortran/Makefile                              |   16 +-
 fortran/{zfp.f => zfp.f90}                    |  360 +++--
 include/zfp.h                                 |  509 ++++---
 include/zfp.hpp                               |  289 ++++
 include/zfp/array.h                           |   32 +
 include/zfp/array.hpp                         |   95 ++
 include/zfp/array1.hpp                        |  265 ++++
 include/zfp/array2.hpp                        |  301 ++++
 include/zfp/array3.hpp                        |  316 ++++
 include/zfp/array4.hpp                        |  331 +++++
 include/{ => zfp}/bitstream.h                 |   38 +-
 .../bitstream.c => include/zfp/bitstream.inl  |  140 +-
 include/zfp/codec/gencodec.hpp                |  421 ++++++
 include/zfp/codec/zfpcodec.hpp                |  551 +++++++
 include/zfp/constarray1.hpp                   |  265 ++++
 include/zfp/constarray2.hpp                   |  288 ++++
 include/zfp/constarray3.hpp                   |  300 ++++
 include/zfp/constarray4.hpp                   |  312 ++++
 include/zfp/factory.hpp                       |  119 ++
 include/zfp/index.hpp                         |  537 +++++++
 .../zfp/internal/array/cache.hpp              |   50 +-
 include/zfp/internal/array/cache1.hpp         |  201 +++
 include/zfp/internal/array/cache2.hpp         |  207 +++
 include/zfp/internal/array/cache3.hpp         |  213 +++
 include/zfp/internal/array/cache4.hpp         |  219 +++
 include/zfp/internal/array/exception.hpp      |   18 +
 include/zfp/internal/array/handle1.hpp        |   38 +
 include/zfp/internal/array/handle2.hpp        |   38 +
 include/zfp/internal/array/handle3.hpp        |   38 +
 include/zfp/internal/array/handle4.hpp        |   38 +
 include/zfp/internal/array/header.hpp         |   41 +
 include/zfp/internal/array/iterator1.hpp      |  137 ++
 include/zfp/internal/array/iterator2.hpp      |  230 +++
 include/zfp/internal/array/iterator3.hpp      |  265 ++++
 include/zfp/internal/array/iterator4.hpp      |  300 ++++
 include/zfp/internal/array/memory.hpp         |  200 +++
 include/zfp/internal/array/pointer1.hpp       |  118 ++
 include/zfp/internal/array/pointer2.hpp       |  136 ++
 include/zfp/internal/array/pointer3.hpp       |  145 ++
 include/zfp/internal/array/pointer4.hpp       |  154 ++
 include/zfp/internal/array/reference1.hpp     |   78 +
 include/zfp/internal/array/reference2.hpp     |   80 ++
 include/zfp/internal/array/reference3.hpp     |   82 ++
 include/zfp/internal/array/reference4.hpp     |   84 ++
 include/zfp/internal/array/store.hpp          |  255 ++++
 include/zfp/internal/array/store1.hpp         |  140 ++
 include/zfp/internal/array/store2.hpp         |  147 ++
 include/zfp/internal/array/store3.hpp         |  154 ++
 include/zfp/internal/array/store4.hpp         |  161 +++
 include/zfp/internal/array/traits.hpp         |   30 +
 include/zfp/internal/array/view1.hpp          |  303 ++++
 include/zfp/internal/array/view2.hpp          |  498 +++++++
 include/zfp/internal/array/view3.hpp          |  584 ++++++++
 include/zfp/internal/array/view4.hpp          |  679 +++++++++
 include/zfp/internal/cfp/array1d.h            |  141 ++
 include/zfp/internal/cfp/array1f.h            |  141 ++
 include/zfp/internal/cfp/array2d.h            |  144 ++
 include/zfp/internal/cfp/array2f.h            |  144 ++
 include/zfp/internal/cfp/array3d.h            |  146 ++
 include/zfp/internal/cfp/array3f.h            |  146 ++
 include/zfp/internal/cfp/array4d.h            |  148 ++
 include/zfp/internal/cfp/array4f.h            |  148 ++
 include/zfp/internal/cfp/header.h             |    8 +
 include/zfp/internal/codec/genheader.hpp      |   76 +
 include/zfp/internal/codec/zfpheader.hpp      |  129 ++
 .../zfp/internal/zfp}/inline.h                |    4 +-
 include/zfp/{ => internal/zfp}/macros.h       |    0
 include/zfp/{ => internal/zfp}/system.h       |   24 +-
 include/zfp/{ => internal/zfp}/types.h        |   29 +-
 include/zfp/version.h                         |   49 +
 python/CMakeLists.txt                         |   16 +-
 python/eyescale-cmake/FindNumPy.cmake         |   41 -
 python/eyescale-cmake/LICENSE.txt             |   26 -
 python/requirements.txt                       |    1 -
 python/scikit-build-cmake/FindCython.cmake    |   19 +-
 python/scikit-build-cmake/FindNumPy.cmake     |  104 ++
 .../FindPythonExtensions.cmake                |   58 +-
 python/scikit-build-cmake/LICENSE             |    3 -
 python/scikit-build-cmake/UseCython.cmake     |   44 +-
 .../UsePythonExtensions.cmake                 |  320 +++++
 ...targetLinkLibrariesWithDynamicLookup.cmake |   52 +-
 python/zfpy.pxd                               |   80 +-
 python/zfpy.pyx                               |   96 +-
 setup.py                                      |   15 +
 src/CMakeLists.txt                            |   17 +-
 src/Makefile                                  |    2 +-
 src/bitstream.c                               |    6 +-
 src/cuda_zfp/CMakeLists.txt                   |    1 -
 src/cuda_zfp/constant_setup.cuh               |   39 -
 src/cuda_zfp/constants.h                      |    6 +-
 src/cuda_zfp/cuZFP.cu                         |   84 +-
 src/cuda_zfp/decode.cuh                       |  154 +-
 src/cuda_zfp/decode1.cuh                      |   10 +-
 src/cuda_zfp/decode2.cuh                      |   18 +-
 src/cuda_zfp/decode3.cuh                      |   23 +-
 src/cuda_zfp/encode.cuh                       |  100 +-
 src/cuda_zfp/encode1.cuh                      |   12 +-
 src/cuda_zfp/encode2.cuh                      |   21 +-
 src/cuda_zfp/encode3.cuh                      |   31 +-
 src/cuda_zfp/shared.h                         |   51 +-
 src/cuda_zfp/type_info.cuh                    |   17 +-
 src/decode1d.c                                |    7 +-
 src/decode1f.c                                |    7 +-
 src/decode1i.c                                |    7 +-
 src/decode1l.c                                |    7 +-
 src/decode2d.c                                |    7 +-
 src/decode2f.c                                |    7 +-
 src/decode2i.c                                |    7 +-
 src/decode2l.c                                |    7 +-
 src/decode3d.c                                |    7 +-
 src/decode3f.c                                |    7 +-
 src/decode3i.c                                |    7 +-
 src/decode3l.c                                |    7 +-
 src/decode4d.c                                |    7 +-
 src/decode4f.c                                |    7 +-
 src/decode4i.c                                |    7 +-
 src/decode4l.c                                |    7 +-
 src/encode1d.c                                |    7 +-
 src/encode1f.c                                |    7 +-
 src/encode1i.c                                |    7 +-
 src/encode1l.c                                |    7 +-
 src/encode2d.c                                |    7 +-
 src/encode2f.c                                |    7 +-
 src/encode2i.c                                |    7 +-
 src/encode2l.c                                |    7 +-
 src/encode3d.c                                |    7 +-
 src/encode3f.c                                |    7 +-
 src/encode3i.c                                |    7 +-
 src/encode3l.c                                |    7 +-
 src/encode4d.c                                |    7 +-
 src/encode4f.c                                |    7 +-
 src/encode4i.c                                |    7 +-
 src/encode4l.c                                |    7 +-
 src/share/omp.c                               |   20 +-
 src/share/parallel.c                          |   68 +-
 src/template/codec.c                          |    6 +
 src/template/codecf.c                         |    8 +-
 src/template/compress.c                       |   54 +-
 src/template/cudacompress.c                   |   16 +-
 src/template/cudadecompress.c                 |   16 +-
 src/template/decode.c                         |  194 ++-
 src/template/decode1.c                        |   32 +-
 src/template/decode2.c                        |   30 +-
 src/template/decode3.c                        |   30 +-
 src/template/decode4.c                        |   30 +-
 src/template/decodef.c                        |   10 +-
 src/template/decodei.c                        |    4 +-
 src/template/decompress.c                     |   54 +-
 src/template/encode.c                         |  157 +-
 src/template/encode1.c                        |   34 +-
 src/template/encode2.c                        |   34 +-
 src/template/encode3.c                        |   34 +-
 src/template/encode4.c                        |   34 +-
 src/template/encodef.c                        |   26 +-
 src/template/encodei.c                        |    4 +-
 src/template/ompcompress.c                    |  150 +-
 src/template/revdecode.c                      |   11 +-
 src/template/revdecodef.c                     |    7 +-
 src/template/revencode.c                      |   13 +-
 src/template/revencodef.c                     |    6 +-
 src/traitsd.h                                 |    1 +
 src/traitsf.h                                 |    1 +
 src/zfp.c                                     |  355 +++--
 tests/CMakeLists.txt                          |  171 ++-
 tests/CMakeLists.txt.in                       |   16 +
 tests/Makefile                                |   10 +-
 tests/array/CMakeLists.txt                    |    7 +
 tests/array/array/CMakeLists.txt              |   74 +
 tests/array/array/testArray1Base.cpp          |  194 +++
 tests/array/array/testArray1ItersBase.cpp     |  395 +++++
 tests/array/array/testArray1RefsBase.cpp      |   37 +
 tests/array/array/testArray1ViewsBase.cpp     |  192 +++
 tests/array/array/testArray1d.cpp             |   45 +
 tests/array/array/testArray1dIters.cpp        |   10 +
 tests/array/array/testArray1dPtrs.cpp         |    9 +
 tests/array/array/testArray1dRefs.cpp         |   14 +
 tests/array/array/testArray1dViewIters.cpp    |   17 +
 tests/array/array/testArray1dViewPtrs.cpp     |   17 +
 tests/array/array/testArray1dViews.cpp        |   18 +
 tests/array/array/testArray1f.cpp             |   45 +
 tests/array/array/testArray1fIters.cpp        |   10 +
 tests/array/array/testArray1fPtrs.cpp         |    9 +
 tests/array/array/testArray1fRefs.cpp         |   14 +
 tests/array/array/testArray1fViewIters.cpp    |   17 +
 tests/array/array/testArray1fViewPtrs.cpp     |   17 +
 tests/array/array/testArray1fViews.cpp        |   18 +
 tests/array/array/testArray2Base.cpp          |  422 ++++++
 tests/array/array/testArray2ItersBase.cpp     |   73 +
 tests/array/array/testArray2PtrsBase.cpp      |   23 +
 tests/array/array/testArray2RefsBase.cpp      |   55 +
 tests/array/array/testArray2ViewsBase.cpp     |  398 ++++++
 tests/array/array/testArray2d.cpp             |   45 +
 tests/array/array/testArray2dIters.cpp        |   10 +
 tests/array/array/testArray2dPtrs.cpp         |   10 +
 tests/array/array/testArray2dRefs.cpp         |   14 +
 tests/array/array/testArray2dViewIters.cpp    |   17 +
 tests/array/array/testArray2dViewPtrs.cpp     |   17 +
 tests/array/array/testArray2dViews.cpp        |   18 +
 tests/array/array/testArray2f.cpp             |   45 +
 tests/array/array/testArray2fIters.cpp        |   10 +
 tests/array/array/testArray2fPtrs.cpp         |   10 +
 tests/array/array/testArray2fRefs.cpp         |   14 +
 tests/array/array/testArray2fViewIters.cpp    |   17 +
 tests/array/array/testArray2fViewPtrs.cpp     |   17 +
 tests/array/array/testArray2fViews.cpp        |   18 +
 tests/array/array/testArray3Base.cpp          |  571 ++++++++
 tests/array/array/testArray3ItersBase.cpp     |   91 ++
 tests/array/array/testArray3PtrsBase.cpp      |   52 +
 tests/array/array/testArray3RefsBase.cpp      |   58 +
 tests/array/array/testArray3ViewsBase.cpp     |  489 +++++++
 tests/array/array/testArray3d.cpp             |   45 +
 tests/array/array/testArray3dIters.cpp        |   10 +
 tests/array/array/testArray3dPtrs.cpp         |   10 +
 tests/array/array/testArray3dRefs.cpp         |   14 +
 tests/array/array/testArray3dViewIters.cpp    |   17 +
 tests/array/array/testArray3dViewPtrs.cpp     |   17 +
 tests/array/array/testArray3dViews.cpp        |   18 +
 tests/array/array/testArray3f.cpp             |   45 +
 tests/array/array/testArray3fIters.cpp        |   10 +
 tests/array/array/testArray3fPtrs.cpp         |   10 +
 tests/array/array/testArray3fRefs.cpp         |   14 +
 tests/array/array/testArray3fViewIters.cpp    |   17 +
 tests/array/array/testArray3fViewPtrs.cpp     |   17 +
 tests/array/array/testArray3fViews.cpp        |   18 +
 tests/array/array/testArray4Base.cpp          |  741 ++++++++++
 tests/array/array/testArray4ItersBase.cpp     |  109 ++
 tests/array/array/testArray4PtrsBase.cpp      |   86 ++
 tests/array/array/testArray4RefsBase.cpp      |   61 +
 tests/array/array/testArray4ViewsBase.cpp     |  639 +++++++++
 tests/array/array/testArray4d.cpp             |   45 +
 tests/array/array/testArray4dIters.cpp        |   10 +
 tests/array/array/testArray4dPtrs.cpp         |   10 +
 tests/array/array/testArray4dRefs.cpp         |   14 +
 tests/array/array/testArray4dViewIters.cpp    |   17 +
 tests/array/array/testArray4dViewPtrs.cpp     |   17 +
 tests/array/array/testArray4dViews.cpp        |   18 +
 tests/array/array/testArray4f.cpp             |   45 +
 tests/array/array/testArray4fIters.cpp        |   10 +
 tests/array/array/testArray4fPtrs.cpp         |   10 +
 tests/array/array/testArray4fRefs.cpp         |   14 +
 tests/array/array/testArray4fViewIters.cpp    |   17 +
 tests/array/array/testArray4fViewPtrs.cpp     |   17 +
 tests/array/array/testArray4fViews.cpp        |   18 +
 tests/array/array/testArrayBase.cpp           |  911 ++++++++++++
 tests/array/array/testArrayItersBase.cpp      |  205 +++
 tests/array/array/testArrayPtrsBase.cpp       |  388 +++++
 tests/array/array/testArrayRefsBase.cpp       |  133 ++
 tests/array/array/testArrayViewItersBase.cpp  |   98 ++
 tests/array/array/testArrayViewPtrsBase.cpp   |  102 ++
 tests/array/array/testArrayViewsBase.cpp      |  598 ++++++++
 tests/array/array/testConstruct.cpp           |  140 ++
 tests/array/constArray/CMakeLists.txt         |   20 +
 .../array/constArray/testConstArray1Base.cpp  |    1 +
 tests/array/constArray/testConstArray1d.cpp   |   46 +
 tests/array/constArray/testConstArray1f.cpp   |   46 +
 .../array/constArray/testConstArray2Base.cpp  |    0
 tests/array/constArray/testConstArray2d.cpp   |   46 +
 tests/array/constArray/testConstArray2f.cpp   |   46 +
 .../array/constArray/testConstArray3Base.cpp  |    0
 tests/array/constArray/testConstArray3d.cpp   |   46 +
 tests/array/constArray/testConstArray3f.cpp   |   46 +
 .../array/constArray/testConstArray4Base.cpp  |    0
 tests/array/constArray/testConstArray4d.cpp   |   46 +
 tests/array/constArray/testConstArray4f.cpp   |   46 +
 tests/array/constArray/testConstArrayBase.cpp |  242 ++++
 tests/array/decode/CMakeLists.txt             |   28 +
 tests/array/decode/testTemplatedDecode1d.cpp  |   20 +
 tests/array/decode/testTemplatedDecode1f.cpp  |   20 +
 tests/array/decode/testTemplatedDecode2d.cpp  |   20 +
 tests/array/decode/testTemplatedDecode2f.cpp  |   20 +
 tests/array/decode/testTemplatedDecode3d.cpp  |   20 +
 tests/array/decode/testTemplatedDecode3f.cpp  |   20 +
 tests/array/decode/testTemplatedDecode4d.cpp  |   20 +
 tests/array/decode/testTemplatedDecode4f.cpp  |   20 +
 .../array/decode/testTemplatedDecodeBase.cpp  |  558 ++++++++
 tests/array/encode/CMakeLists.txt             |   29 +
 tests/array/encode/testTemplatedEncode1d.cpp  |   17 +
 tests/array/encode/testTemplatedEncode1f.cpp  |   17 +
 tests/array/encode/testTemplatedEncode2d.cpp  |   17 +
 tests/array/encode/testTemplatedEncode2f.cpp  |   17 +
 tests/array/encode/testTemplatedEncode3d.cpp  |   17 +
 tests/array/encode/testTemplatedEncode3f.cpp  |   17 +
 tests/array/encode/testTemplatedEncode4d.cpp  |   17 +
 tests/array/encode/testTemplatedEncode4f.cpp  |   17 +
 .../array/encode/testTemplatedEncodeBase.cpp  |  302 ++++
 tests/array/utils/commonMacros.h              |    6 +
 tests/array/utils/gtest1dTest.h               |   47 +
 tests/array/utils/gtest1fTest.h               |   47 +
 tests/array/utils/gtest2dTest.h               |   52 +
 tests/array/utils/gtest2fTest.h               |   52 +
 tests/array/utils/gtest3dTest.h               |   59 +
 tests/array/utils/gtest3fTest.h               |   59 +
 tests/array/utils/gtest4dTest.h               |   64 +
 tests/array/utils/gtest4fTest.h               |   64 +
 tests/array/utils/gtestBaseFixture.h          |  149 ++
 tests/array/utils/gtestCApiTest.h             |   31 +
 tests/array/utils/gtestDoubleEnv.h            |   50 +
 tests/array/utils/gtestFloatEnv.h             |   50 +
 tests/array/utils/gtestSingleFixture.h        |    6 +
 tests/array/utils/gtestTestEnv.h              |    8 +
 tests/array/utils/predicates.h                |   59 +
 tests/array/zfp/CMakeLists.txt                |    6 +
 tests/array/zfp/testAlignedMemory.cpp         |   34 +
 tests/cfp/CMakeLists.txt                      |   22 +
 tests/cfp/testCfpArray1_source.c              |  580 ++++++++
 tests/cfp/testCfpArray1d.c                    |  101 ++
 tests/cfp/testCfpArray1f.c                    |  100 ++
 tests/cfp/testCfpArray2_source.c              |  634 ++++++++
 tests/cfp/testCfpArray2d.c                    |  101 ++
 tests/cfp/testCfpArray2f.c                    |  101 ++
 tests/cfp/testCfpArray3_source.c              |  662 +++++++++
 tests/cfp/testCfpArray3d.c                    |  101 ++
 tests/cfp/testCfpArray3f.c                    |  101 ++
 tests/cfp/testCfpArray4_source.c              |  692 +++++++++
 tests/cfp/testCfpArray4d.c                    |   98 ++
 tests/cfp/testCfpArray4f.c                    |   98 ++
 tests/cfp/testCfpArray_source.c               |  643 +++++++++
 tests/cfp/testCfpNamespace.c                  |   27 +
 tests/ci-utils/CMakeLists.txt                 |    6 +
 tests/constants/1dDouble.h                    |    4 +
 tests/constants/1dFloat.h                     |    4 +
 tests/constants/1dInt32.h                     |    4 +
 tests/constants/1dInt64.h                     |    4 +
 tests/constants/2dDouble.h                    |    4 +
 tests/constants/2dFloat.h                     |    4 +
 tests/constants/2dInt32.h                     |    4 +
 tests/constants/2dInt64.h                     |    4 +
 tests/constants/3dDouble.h                    |    4 +
 tests/constants/3dFloat.h                     |    4 +
 tests/constants/3dInt32.h                     |    4 +
 tests/constants/3dInt64.h                     |    4 +
 tests/constants/4dDouble.h                    |    4 +
 tests/constants/4dFloat.h                     |    4 +
 tests/constants/4dInt32.h                     |    4 +
 tests/constants/4dInt64.h                     |    4 +
 tests/constants/checksums/1dDouble.h          |   37 +
 tests/constants/checksums/1dFloat.h           |   37 +
 tests/constants/checksums/1dInt32.h           |   21 +
 tests/constants/checksums/1dInt64.h           |   21 +
 tests/constants/checksums/2dDouble.h          |   37 +
 tests/constants/checksums/2dFloat.h           |   37 +
 tests/constants/checksums/2dInt32.h           |   21 +
 tests/constants/checksums/2dInt64.h           |   21 +
 tests/constants/checksums/3dDouble.h          |   37 +
 tests/constants/checksums/3dFloat.h           |   37 +
 tests/constants/checksums/3dInt32.h           |   21 +
 tests/constants/checksums/3dInt64.h           |   21 +
 tests/constants/checksums/4dDouble.h          |   37 +
 tests/constants/checksums/4dFloat.h           |   37 +
 tests/constants/checksums/4dInt32.h           |   21 +
 tests/constants/checksums/4dInt64.h           |   21 +
 tests/constants/doubleConsts.h                |    3 +
 tests/constants/floatConsts.h                 |    3 +
 tests/constants/int32Consts.h                 |    2 +
 tests/constants/int64Consts.h                 |    2 +
 tests/constants/universalConsts.h             |    4 +
 tests/fortran/CMakeLists.txt                  |   18 +
 tests/fortran/testFortran.f                   |  103 ++
 tests/gitlab/corona-jobs.yml                  |   17 +
 tests/gitlab/corona-templates.yml             |   12 +
 tests/gitlab/gitlab-ci.yml                    |  143 ++
 tests/gitlab/pascal-jobs.yml                  |   17 +
 tests/gitlab/pascal-templates.yml             |   12 +
 tests/gitlab/quartz-jobs.yml                  |   64 +
 tests/gitlab/quartz-templates.yml             |   12 +
 tests/python/CMakeLists.txt                   |   40 +
 tests/python/test_numpy.py                    |  238 +++
 tests/python/test_utils.pyx                   |  512 +++++++
 tests/src/CMakeLists.txt                      |   10 +
 tests/src/decode/CMakeLists.txt               |   49 +
 tests/src/decode/testZfpDecodeBlock1dDouble.c |   13 +
 tests/src/decode/testZfpDecodeBlock1dFloat.c  |   13 +
 tests/src/decode/testZfpDecodeBlock1dInt32.c  |   13 +
 tests/src/decode/testZfpDecodeBlock1dInt64.c  |   13 +
 tests/src/decode/testZfpDecodeBlock2dDouble.c |   13 +
 tests/src/decode/testZfpDecodeBlock2dFloat.c  |   13 +
 tests/src/decode/testZfpDecodeBlock2dInt32.c  |   13 +
 tests/src/decode/testZfpDecodeBlock2dInt64.c  |   13 +
 tests/src/decode/testZfpDecodeBlock3dDouble.c |   13 +
 tests/src/decode/testZfpDecodeBlock3dFloat.c  |   13 +
 tests/src/decode/testZfpDecodeBlock3dInt32.c  |   13 +
 tests/src/decode/testZfpDecodeBlock3dInt64.c  |   13 +
 tests/src/decode/testZfpDecodeBlock4dDouble.c |   13 +
 tests/src/decode/testZfpDecodeBlock4dFloat.c  |   13 +
 tests/src/decode/testZfpDecodeBlock4dInt32.c  |   13 +
 tests/src/decode/testZfpDecodeBlock4dInt64.c  |   13 +
 .../testZfpDecodeBlockStrided1dDouble.c       |   13 +
 .../decode/testZfpDecodeBlockStrided1dFloat.c |   13 +
 .../decode/testZfpDecodeBlockStrided1dInt32.c |   13 +
 .../decode/testZfpDecodeBlockStrided1dInt64.c |   13 +
 .../testZfpDecodeBlockStrided2dDouble.c       |   13 +
 .../decode/testZfpDecodeBlockStrided2dFloat.c |   13 +
 .../decode/testZfpDecodeBlockStrided2dInt32.c |   13 +
 .../decode/testZfpDecodeBlockStrided2dInt64.c |   13 +
 .../testZfpDecodeBlockStrided3dDouble.c       |   13 +
 .../decode/testZfpDecodeBlockStrided3dFloat.c |   13 +
 .../decode/testZfpDecodeBlockStrided3dInt32.c |   13 +
 .../decode/testZfpDecodeBlockStrided3dInt64.c |   13 +
 .../testZfpDecodeBlockStrided4dDouble.c       |   13 +
 .../decode/testZfpDecodeBlockStrided4dFloat.c |   13 +
 .../decode/testZfpDecodeBlockStrided4dInt32.c |   13 +
 .../decode/testZfpDecodeBlockStrided4dInt64.c |   13 +
 tests/src/decode/testcases/block.c            |   13 +
 tests/src/decode/testcases/blockStrided.c     |   18 +
 tests/src/decode/zfpDecodeBlockBase.c         |  278 ++++
 tests/src/decode/zfpDecodeBlockStridedBase.c  |  621 ++++++++
 tests/src/encode/CMakeLists.txt               |   49 +
 tests/src/encode/testZfpEncodeBlock1dDouble.c |   13 +
 tests/src/encode/testZfpEncodeBlock1dFloat.c  |   13 +
 tests/src/encode/testZfpEncodeBlock1dInt32.c  |   13 +
 tests/src/encode/testZfpEncodeBlock1dInt64.c  |   13 +
 tests/src/encode/testZfpEncodeBlock2dDouble.c |   13 +
 tests/src/encode/testZfpEncodeBlock2dFloat.c  |   13 +
 tests/src/encode/testZfpEncodeBlock2dInt32.c  |   13 +
 tests/src/encode/testZfpEncodeBlock2dInt64.c  |   13 +
 tests/src/encode/testZfpEncodeBlock3dDouble.c |   13 +
 tests/src/encode/testZfpEncodeBlock3dFloat.c  |   13 +
 tests/src/encode/testZfpEncodeBlock3dInt32.c  |   13 +
 tests/src/encode/testZfpEncodeBlock3dInt64.c  |   13 +
 tests/src/encode/testZfpEncodeBlock4dDouble.c |   13 +
 tests/src/encode/testZfpEncodeBlock4dFloat.c  |   13 +
 tests/src/encode/testZfpEncodeBlock4dInt32.c  |   13 +
 tests/src/encode/testZfpEncodeBlock4dInt64.c  |   13 +
 .../testZfpEncodeBlockStrided1dDouble.c       |   13 +
 .../encode/testZfpEncodeBlockStrided1dFloat.c |   13 +
 .../encode/testZfpEncodeBlockStrided1dInt32.c |   13 +
 .../encode/testZfpEncodeBlockStrided1dInt64.c |   13 +
 .../testZfpEncodeBlockStrided2dDouble.c       |   13 +
 .../encode/testZfpEncodeBlockStrided2dFloat.c |   13 +
 .../encode/testZfpEncodeBlockStrided2dInt32.c |   13 +
 .../encode/testZfpEncodeBlockStrided2dInt64.c |   13 +
 .../testZfpEncodeBlockStrided3dDouble.c       |   13 +
 .../encode/testZfpEncodeBlockStrided3dFloat.c |   13 +
 .../encode/testZfpEncodeBlockStrided3dInt32.c |   13 +
 .../encode/testZfpEncodeBlockStrided3dInt64.c |   13 +
 .../testZfpEncodeBlockStrided4dDouble.c       |   13 +
 .../encode/testZfpEncodeBlockStrided4dFloat.c |   13 +
 .../encode/testZfpEncodeBlockStrided4dInt32.c |   13 +
 .../encode/testZfpEncodeBlockStrided4dInt64.c |   13 +
 tests/src/encode/testcases/block.c            |   11 +
 tests/src/encode/testcases/blockStrided.c     |   18 +
 tests/src/encode/zfpEncodeBlockBase.c         |  266 ++++
 tests/src/encode/zfpEncodeBlockStridedBase.c  |  490 +++++++
 tests/src/endtoend/CMakeLists.txt             |   70 +
 tests/src/endtoend/cudaExecBase.c             |  202 +++
 tests/src/endtoend/ompExecBase.c              |  235 +++
 tests/src/endtoend/serialExecBase.c           |  299 ++++
 tests/src/endtoend/testZfpCuda1dDouble.c      |   13 +
 tests/src/endtoend/testZfpCuda1dFloat.c       |   13 +
 tests/src/endtoend/testZfpCuda1dInt32.c       |   13 +
 tests/src/endtoend/testZfpCuda1dInt64.c       |   13 +
 tests/src/endtoend/testZfpCuda2dDouble.c      |   13 +
 tests/src/endtoend/testZfpCuda2dFloat.c       |   13 +
 tests/src/endtoend/testZfpCuda2dInt32.c       |   13 +
 tests/src/endtoend/testZfpCuda2dInt64.c       |   13 +
 tests/src/endtoend/testZfpCuda3dDouble.c      |   13 +
 tests/src/endtoend/testZfpCuda3dFloat.c       |   13 +
 tests/src/endtoend/testZfpCuda3dInt32.c       |   13 +
 tests/src/endtoend/testZfpCuda3dInt64.c       |   13 +
 tests/src/endtoend/testZfpCuda4dDouble.c      |   13 +
 tests/src/endtoend/testZfpCuda4dFloat.c       |   13 +
 tests/src/endtoend/testZfpCuda4dInt32.c       |   13 +
 tests/src/endtoend/testZfpCuda4dInt64.c       |   13 +
 tests/src/endtoend/testZfpOmp1dDouble.c       |   13 +
 tests/src/endtoend/testZfpOmp1dFloat.c        |   13 +
 tests/src/endtoend/testZfpOmp1dInt32.c        |   13 +
 tests/src/endtoend/testZfpOmp1dInt64.c        |   13 +
 tests/src/endtoend/testZfpOmp2dDouble.c       |   13 +
 tests/src/endtoend/testZfpOmp2dFloat.c        |   13 +
 tests/src/endtoend/testZfpOmp2dInt32.c        |   13 +
 tests/src/endtoend/testZfpOmp2dInt64.c        |   13 +
 tests/src/endtoend/testZfpOmp3dDouble.c       |   13 +
 tests/src/endtoend/testZfpOmp3dFloat.c        |   13 +
 tests/src/endtoend/testZfpOmp3dInt32.c        |   13 +
 tests/src/endtoend/testZfpOmp3dInt64.c        |   13 +
 tests/src/endtoend/testZfpOmp4dDouble.c       |   13 +
 tests/src/endtoend/testZfpOmp4dFloat.c        |   13 +
 tests/src/endtoend/testZfpOmp4dInt32.c        |   13 +
 tests/src/endtoend/testZfpOmp4dInt64.c        |   13 +
 tests/src/endtoend/testZfpSerial1dDouble.c    |   13 +
 tests/src/endtoend/testZfpSerial1dFloat.c     |   13 +
 tests/src/endtoend/testZfpSerial1dInt32.c     |   13 +
 tests/src/endtoend/testZfpSerial1dInt64.c     |   13 +
 tests/src/endtoend/testZfpSerial2dDouble.c    |   13 +
 tests/src/endtoend/testZfpSerial2dFloat.c     |   13 +
 tests/src/endtoend/testZfpSerial2dInt32.c     |   13 +
 tests/src/endtoend/testZfpSerial2dInt64.c     |   13 +
 tests/src/endtoend/testZfpSerial3dDouble.c    |   13 +
 tests/src/endtoend/testZfpSerial3dFloat.c     |   13 +
 tests/src/endtoend/testZfpSerial3dInt32.c     |   13 +
 tests/src/endtoend/testZfpSerial3dInt64.c     |   13 +
 tests/src/endtoend/testZfpSerial4dDouble.c    |   13 +
 tests/src/endtoend/testZfpSerial4dFloat.c     |   13 +
 tests/src/endtoend/testZfpSerial4dInt32.c     |   13 +
 tests/src/endtoend/testZfpSerial4dInt64.c     |   13 +
 tests/src/endtoend/testcases/cuda.c           |   24 +
 tests/src/endtoend/testcases/omp.c            |   20 +
 tests/src/endtoend/testcases/serial.c         |   31 +
 tests/src/endtoend/zfpEndtoendBase.c          |  591 ++++++++
 tests/src/execPolicy/CMakeLists.txt           |   19 +
 tests/src/execPolicy/testCuda.c               |   95 ++
 tests/src/execPolicy/testOmp.c                |  213 +++
 tests/src/execPolicy/testOmpInternal.c        |   84 ++
 tests/src/inline/CMakeLists.txt               |   11 +
 tests/src/inline/testBitstream.c              |  674 +++++++++
 tests/src/inline/testBitstreamSmallWsize.c    |  119 ++
 tests/src/inline/testBitstreamStrided.c       |   95 ++
 tests/src/misc/CMakeLists.txt                 |   48 +
 tests/src/misc/testZfpField1d.c               |   24 +
 tests/src/misc/testZfpField1f.c               |   24 +
 tests/src/misc/testZfpField2d.c               |   28 +
 tests/src/misc/testZfpField2f.c               |   28 +
 tests/src/misc/testZfpField3d.c               |   32 +
 tests/src/misc/testZfpField3f.c               |   32 +
 tests/src/misc/testZfpField4d.c               |   36 +
 tests/src/misc/testZfpField4f.c               |   36 +
 tests/src/misc/testZfpHeader.c                |  497 +++++++
 tests/src/misc/testZfpPromote.c               |  120 ++
 tests/src/misc/testZfpStream.c                |  934 ++++++++++++
 tests/src/misc/zfpFieldBase.c                 |  256 ++++
 tests/testviews.cpp                           |  241 ++++
 tests/testzfp.cpp                             |  126 +-
 tests/utils/CMakeLists.txt                    |   43 +
 tests/utils/fixedpoint96.c                    |  296 ++++
 tests/utils/fixedpoint96.h                    |   33 +
 tests/utils/genChecksums.sh                   |   51 +
 tests/utils/genSmoothRandNums.c               |  918 ++++++++++++
 tests/utils/genSmoothRandNums.h               |   33 +
 tests/utils/rand32.c                          |   43 +
 tests/utils/rand32.h                          |   22 +
 tests/utils/rand64.c                          |   52 +
 tests/utils/rand64.h                          |   26 +
 tests/utils/stridedOperations.c               |  133 ++
 tests/utils/stridedOperations.h               |   37 +
 tests/utils/testMacros.h                      |   39 +
 tests/utils/zfpChecksums.c                    |  171 +++
 tests/utils/zfpChecksums.h                    |   35 +
 tests/utils/zfpCompressionParams.c            |   20 +
 tests/utils/zfpCompressionParams.h            |   15 +
 tests/utils/zfpHash.c                         |  126 ++
 tests/utils/zfpHash.h                         |   26 +
 tests/utils/zfpTimer.c                        |   56 +
 tests/utils/zfpTimer.h                        |   24 +
 travis.sh                                     |   60 -
 utils/CMakeLists.txt                          |   12 +-
 utils/Makefile                                |    4 +-
 utils/zfp.c                                   |   66 +-
 zfp-config-version.cmake.in                   |    2 +
 zfp-config.cmake.in                           |   28 +-
 690 files changed, 59905 insertions(+), 7087 deletions(-)
 create mode 100644 .github/dependabot.yml
 create mode 100644 .github/workflows/coverage.yml
 create mode 100644 .github/workflows/debug-linux.yml
 create mode 100644 .github/workflows/debug-macos.yml
 create mode 100644 .github/workflows/tests.yml
 create mode 100644 .readthedocs.yaml
 delete mode 100644 .travis.yml
 create mode 100644 CHANGELOG.md
 create mode 100644 CITATION.cff
 create mode 100644 CONTRIBUTING.md
 create mode 100644 NOTICE
 create mode 100644 SUPPORT.md
 delete mode 100644 VERSIONS.md
 delete mode 100644 array/zfp/header.h
 delete mode 100644 array/zfp/headerHelpers.h
 delete mode 100644 array/zfp/iterator1.h
 delete mode 100644 array/zfp/iterator2.h
 delete mode 100644 array/zfp/iterator3.h
 delete mode 100644 array/zfp/memory.h
 delete mode 100644 array/zfp/pointer1.h
 delete mode 100644 array/zfp/pointer2.h
 delete mode 100644 array/zfp/pointer3.h
 delete mode 100644 array/zfp/reference1.h
 delete mode 100644 array/zfp/reference2.h
 delete mode 100644 array/zfp/reference3.h
 delete mode 100644 array/zfp/view1.h
 delete mode 100644 array/zfp/view2.h
 delete mode 100644 array/zfp/view3.h
 delete mode 100644 array/zfparray.h
 delete mode 100644 array/zfparray1.h
 delete mode 100644 array/zfparray2.h
 delete mode 100644 array/zfparray3.h
 delete mode 100644 array/zfpcodec.h
 delete mode 100644 array/zfpcodecd.h
 delete mode 100644 array/zfpcodecf.h
 delete mode 100644 array/zfpfactory.h
 rename cfp/{src => }/Makefile (59%)
 create mode 100644 cfp/cfp.cpp
 create mode 100644 cfp/cfparray1d.cpp
 create mode 100644 cfp/cfparray1f.cpp
 create mode 100644 cfp/cfparray2d.cpp
 create mode 100644 cfp/cfparray2f.cpp
 create mode 100644 cfp/cfparray3d.cpp
 create mode 100644 cfp/cfparray3f.cpp
 create mode 100644 cfp/cfparray4d.cpp
 create mode 100644 cfp/cfparray4f.cpp
 create mode 100644 cfp/cfpheader.cpp
 delete mode 100644 cfp/include/cfparray1d.h
 delete mode 100644 cfp/include/cfparray1f.h
 delete mode 100644 cfp/include/cfparray2d.h
 delete mode 100644 cfp/include/cfparray2f.h
 delete mode 100644 cfp/include/cfparray3d.h
 delete mode 100644 cfp/include/cfparray3f.h
 delete mode 100644 cfp/include/cfparrays.h
 delete mode 100644 cfp/src/CMakeLists.txt
 delete mode 100644 cfp/src/cfparray1_source.cpp
 delete mode 100644 cfp/src/cfparray1d.cpp
 delete mode 100644 cfp/src/cfparray1f.cpp
 delete mode 100644 cfp/src/cfparray2_source.cpp
 delete mode 100644 cfp/src/cfparray2d.cpp
 delete mode 100644 cfp/src/cfparray2f.cpp
 delete mode 100644 cfp/src/cfparray3_source.cpp
 delete mode 100644 cfp/src/cfparray3d.cpp
 delete mode 100644 cfp/src/cfparray3f.cpp
 delete mode 100644 cfp/src/cfparray_source.cpp
 delete mode 100644 cfp/src/cfparrays.cpp
 create mode 100644 cfp/template/cfparray.cpp
 create mode 100644 cfp/template/cfparray1.cpp
 create mode 100644 cfp/template/cfparray2.cpp
 create mode 100644 cfp/template/cfparray3.cpp
 create mode 100644 cfp/template/cfparray4.cpp
 create mode 100644 cfp/template/cfpheader.cpp
 delete mode 100644 cmake/travis.cmake
 create mode 100644 docs/Makefile
 create mode 100644 docs/make.bat
 create mode 100644 docs/requirements.txt
 create mode 100644 docs/source/algorithm.rst
 create mode 100644 docs/source/arrays.rst
 create mode 100644 docs/source/bit-stream.rst
 create mode 100644 docs/source/caching.inc
 create mode 100644 docs/source/cfp.rst
 create mode 100644 docs/source/codec.inc
 create mode 100644 docs/source/conf.py
 create mode 100644 docs/source/contributors.rst
 create mode 100644 docs/source/defs.rst
 create mode 100644 docs/source/directions.rst
 create mode 100644 docs/source/disclaimer.inc
 create mode 100644 docs/source/examples.rst
 create mode 100644 docs/source/execution.rst
 create mode 100644 docs/source/faq.rst
 create mode 100644 docs/source/high-level-api.rst
 create mode 100644 docs/source/index.inc
 create mode 100644 docs/source/index.rst
 create mode 100644 docs/source/installation.rst
 create mode 100644 docs/source/introduction.rst
 create mode 100644 docs/source/issues.rst
 create mode 100644 docs/source/iterators.inc
 create mode 100644 docs/source/license.rst
 create mode 100644 docs/source/limitations.rst
 create mode 100644 docs/source/low-level-api.rst
 create mode 100644 docs/source/modes.rst
 create mode 100644 docs/source/pointers.inc
 create mode 100644 docs/source/python.rst
 create mode 100644 docs/source/references.inc
 create mode 100644 docs/source/requirements.txt
 create mode 100644 docs/source/serialization.inc
 create mode 100644 docs/source/setup.py
 create mode 100644 docs/source/testing.rst
 create mode 100644 docs/source/tutorial.rst
 create mode 100644 docs/source/versions.rst
 create mode 100644 docs/source/view-indexing.pdf
 create mode 100644 docs/source/views.inc
 create mode 100644 docs/source/zforp.rst
 create mode 100644 docs/source/zfp-rounding.pdf
 create mode 100644 docs/source/zfpcmd.rst
 create mode 100644 examples/array.cpp
 delete mode 100644 examples/array2d.h
 create mode 100644 examples/array2d.hpp
 create mode 100644 examples/iteratorC.c
 create mode 100644 examples/ppm.c
 rename fortran/{zfp.f => zfp.f90} (78%)
 create mode 100644 include/zfp.hpp
 create mode 100644 include/zfp/array.h
 create mode 100644 include/zfp/array.hpp
 create mode 100644 include/zfp/array1.hpp
 create mode 100644 include/zfp/array2.hpp
 create mode 100644 include/zfp/array3.hpp
 create mode 100644 include/zfp/array4.hpp
 rename include/{ => zfp}/bitstream.h (64%)
 rename src/inline/bitstream.c => include/zfp/bitstream.inl (73%)
 create mode 100644 include/zfp/codec/gencodec.hpp
 create mode 100644 include/zfp/codec/zfpcodec.hpp
 create mode 100644 include/zfp/constarray1.hpp
 create mode 100644 include/zfp/constarray2.hpp
 create mode 100644 include/zfp/constarray3.hpp
 create mode 100644 include/zfp/constarray4.hpp
 create mode 100644 include/zfp/factory.hpp
 create mode 100644 include/zfp/index.hpp
 rename array/zfp/cache.h => include/zfp/internal/array/cache.hpp (82%)
 create mode 100644 include/zfp/internal/array/cache1.hpp
 create mode 100644 include/zfp/internal/array/cache2.hpp
 create mode 100644 include/zfp/internal/array/cache3.hpp
 create mode 100644 include/zfp/internal/array/cache4.hpp
 create mode 100644 include/zfp/internal/array/exception.hpp
 create mode 100644 include/zfp/internal/array/handle1.hpp
 create mode 100644 include/zfp/internal/array/handle2.hpp
 create mode 100644 include/zfp/internal/array/handle3.hpp
 create mode 100644 include/zfp/internal/array/handle4.hpp
 create mode 100644 include/zfp/internal/array/header.hpp
 create mode 100644 include/zfp/internal/array/iterator1.hpp
 create mode 100644 include/zfp/internal/array/iterator2.hpp
 create mode 100644 include/zfp/internal/array/iterator3.hpp
 create mode 100644 include/zfp/internal/array/iterator4.hpp
 create mode 100644 include/zfp/internal/array/memory.hpp
 create mode 100644 include/zfp/internal/array/pointer1.hpp
 create mode 100644 include/zfp/internal/array/pointer2.hpp
 create mode 100644 include/zfp/internal/array/pointer3.hpp
 create mode 100644 include/zfp/internal/array/pointer4.hpp
 create mode 100644 include/zfp/internal/array/reference1.hpp
 create mode 100644 include/zfp/internal/array/reference2.hpp
 create mode 100644 include/zfp/internal/array/reference3.hpp
 create mode 100644 include/zfp/internal/array/reference4.hpp
 create mode 100644 include/zfp/internal/array/store.hpp
 create mode 100644 include/zfp/internal/array/store1.hpp
 create mode 100644 include/zfp/internal/array/store2.hpp
 create mode 100644 include/zfp/internal/array/store3.hpp
 create mode 100644 include/zfp/internal/array/store4.hpp
 create mode 100644 include/zfp/internal/array/traits.hpp
 create mode 100644 include/zfp/internal/array/view1.hpp
 create mode 100644 include/zfp/internal/array/view2.hpp
 create mode 100644 include/zfp/internal/array/view3.hpp
 create mode 100644 include/zfp/internal/array/view4.hpp
 create mode 100644 include/zfp/internal/cfp/array1d.h
 create mode 100644 include/zfp/internal/cfp/array1f.h
 create mode 100644 include/zfp/internal/cfp/array2d.h
 create mode 100644 include/zfp/internal/cfp/array2f.h
 create mode 100644 include/zfp/internal/cfp/array3d.h
 create mode 100644 include/zfp/internal/cfp/array3f.h
 create mode 100644 include/zfp/internal/cfp/array4d.h
 create mode 100644 include/zfp/internal/cfp/array4f.h
 create mode 100644 include/zfp/internal/cfp/header.h
 create mode 100644 include/zfp/internal/codec/genheader.hpp
 create mode 100644 include/zfp/internal/codec/zfpheader.hpp
 rename {src/inline => include/zfp/internal/zfp}/inline.h (77%)
 rename include/zfp/{ => internal/zfp}/macros.h (100%)
 rename include/zfp/{ => internal/zfp}/system.h (59%)
 rename include/zfp/{ => internal/zfp}/types.h (77%)
 create mode 100644 include/zfp/version.h
 delete mode 100644 python/eyescale-cmake/FindNumPy.cmake
 delete mode 100644 python/eyescale-cmake/LICENSE.txt
 create mode 100644 python/scikit-build-cmake/FindNumPy.cmake
 create mode 100644 python/scikit-build-cmake/UsePythonExtensions.cmake
 create mode 100644 setup.py
 delete mode 100644 src/cuda_zfp/constant_setup.cuh
 create mode 100644 src/template/codec.c
 create mode 100644 tests/CMakeLists.txt.in
 create mode 100644 tests/array/CMakeLists.txt
 create mode 100644 tests/array/array/CMakeLists.txt
 create mode 100644 tests/array/array/testArray1Base.cpp
 create mode 100644 tests/array/array/testArray1ItersBase.cpp
 create mode 100644 tests/array/array/testArray1RefsBase.cpp
 create mode 100644 tests/array/array/testArray1ViewsBase.cpp
 create mode 100644 tests/array/array/testArray1d.cpp
 create mode 100644 tests/array/array/testArray1dIters.cpp
 create mode 100644 tests/array/array/testArray1dPtrs.cpp
 create mode 100644 tests/array/array/testArray1dRefs.cpp
 create mode 100644 tests/array/array/testArray1dViewIters.cpp
 create mode 100644 tests/array/array/testArray1dViewPtrs.cpp
 create mode 100644 tests/array/array/testArray1dViews.cpp
 create mode 100644 tests/array/array/testArray1f.cpp
 create mode 100644 tests/array/array/testArray1fIters.cpp
 create mode 100644 tests/array/array/testArray1fPtrs.cpp
 create mode 100644 tests/array/array/testArray1fRefs.cpp
 create mode 100644 tests/array/array/testArray1fViewIters.cpp
 create mode 100644 tests/array/array/testArray1fViewPtrs.cpp
 create mode 100644 tests/array/array/testArray1fViews.cpp
 create mode 100644 tests/array/array/testArray2Base.cpp
 create mode 100644 tests/array/array/testArray2ItersBase.cpp
 create mode 100644 tests/array/array/testArray2PtrsBase.cpp
 create mode 100644 tests/array/array/testArray2RefsBase.cpp
 create mode 100644 tests/array/array/testArray2ViewsBase.cpp
 create mode 100644 tests/array/array/testArray2d.cpp
 create mode 100644 tests/array/array/testArray2dIters.cpp
 create mode 100644 tests/array/array/testArray2dPtrs.cpp
 create mode 100644 tests/array/array/testArray2dRefs.cpp
 create mode 100644 tests/array/array/testArray2dViewIters.cpp
 create mode 100644 tests/array/array/testArray2dViewPtrs.cpp
 create mode 100644 tests/array/array/testArray2dViews.cpp
 create mode 100644 tests/array/array/testArray2f.cpp
 create mode 100644 tests/array/array/testArray2fIters.cpp
 create mode 100644 tests/array/array/testArray2fPtrs.cpp
 create mode 100644 tests/array/array/testArray2fRefs.cpp
 create mode 100644 tests/array/array/testArray2fViewIters.cpp
 create mode 100644 tests/array/array/testArray2fViewPtrs.cpp
 create mode 100644 tests/array/array/testArray2fViews.cpp
 create mode 100644 tests/array/array/testArray3Base.cpp
 create mode 100644 tests/array/array/testArray3ItersBase.cpp
 create mode 100644 tests/array/array/testArray3PtrsBase.cpp
 create mode 100644 tests/array/array/testArray3RefsBase.cpp
 create mode 100644 tests/array/array/testArray3ViewsBase.cpp
 create mode 100644 tests/array/array/testArray3d.cpp
 create mode 100644 tests/array/array/testArray3dIters.cpp
 create mode 100644 tests/array/array/testArray3dPtrs.cpp
 create mode 100644 tests/array/array/testArray3dRefs.cpp
 create mode 100644 tests/array/array/testArray3dViewIters.cpp
 create mode 100644 tests/array/array/testArray3dViewPtrs.cpp
 create mode 100644 tests/array/array/testArray3dViews.cpp
 create mode 100644 tests/array/array/testArray3f.cpp
 create mode 100644 tests/array/array/testArray3fIters.cpp
 create mode 100644 tests/array/array/testArray3fPtrs.cpp
 create mode 100644 tests/array/array/testArray3fRefs.cpp
 create mode 100644 tests/array/array/testArray3fViewIters.cpp
 create mode 100644 tests/array/array/testArray3fViewPtrs.cpp
 create mode 100644 tests/array/array/testArray3fViews.cpp
 create mode 100644 tests/array/array/testArray4Base.cpp
 create mode 100644 tests/array/array/testArray4ItersBase.cpp
 create mode 100644 tests/array/array/testArray4PtrsBase.cpp
 create mode 100644 tests/array/array/testArray4RefsBase.cpp
 create mode 100644 tests/array/array/testArray4ViewsBase.cpp
 create mode 100644 tests/array/array/testArray4d.cpp
 create mode 100644 tests/array/array/testArray4dIters.cpp
 create mode 100644 tests/array/array/testArray4dPtrs.cpp
 create mode 100644 tests/array/array/testArray4dRefs.cpp
 create mode 100644 tests/array/array/testArray4dViewIters.cpp
 create mode 100644 tests/array/array/testArray4dViewPtrs.cpp
 create mode 100644 tests/array/array/testArray4dViews.cpp
 create mode 100644 tests/array/array/testArray4f.cpp
 create mode 100644 tests/array/array/testArray4fIters.cpp
 create mode 100644 tests/array/array/testArray4fPtrs.cpp
 create mode 100644 tests/array/array/testArray4fRefs.cpp
 create mode 100644 tests/array/array/testArray4fViewIters.cpp
 create mode 100644 tests/array/array/testArray4fViewPtrs.cpp
 create mode 100644 tests/array/array/testArray4fViews.cpp
 create mode 100644 tests/array/array/testArrayBase.cpp
 create mode 100644 tests/array/array/testArrayItersBase.cpp
 create mode 100644 tests/array/array/testArrayPtrsBase.cpp
 create mode 100644 tests/array/array/testArrayRefsBase.cpp
 create mode 100644 tests/array/array/testArrayViewItersBase.cpp
 create mode 100644 tests/array/array/testArrayViewPtrsBase.cpp
 create mode 100644 tests/array/array/testArrayViewsBase.cpp
 create mode 100644 tests/array/array/testConstruct.cpp
 create mode 100644 tests/array/constArray/CMakeLists.txt
 create mode 100644 tests/array/constArray/testConstArray1Base.cpp
 create mode 100644 tests/array/constArray/testConstArray1d.cpp
 create mode 100644 tests/array/constArray/testConstArray1f.cpp
 create mode 100644 tests/array/constArray/testConstArray2Base.cpp
 create mode 100644 tests/array/constArray/testConstArray2d.cpp
 create mode 100644 tests/array/constArray/testConstArray2f.cpp
 create mode 100644 tests/array/constArray/testConstArray3Base.cpp
 create mode 100644 tests/array/constArray/testConstArray3d.cpp
 create mode 100644 tests/array/constArray/testConstArray3f.cpp
 create mode 100644 tests/array/constArray/testConstArray4Base.cpp
 create mode 100644 tests/array/constArray/testConstArray4d.cpp
 create mode 100644 tests/array/constArray/testConstArray4f.cpp
 create mode 100644 tests/array/constArray/testConstArrayBase.cpp
 create mode 100644 tests/array/decode/CMakeLists.txt
 create mode 100644 tests/array/decode/testTemplatedDecode1d.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode1f.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode2d.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode2f.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode3d.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode3f.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode4d.cpp
 create mode 100644 tests/array/decode/testTemplatedDecode4f.cpp
 create mode 100644 tests/array/decode/testTemplatedDecodeBase.cpp
 create mode 100644 tests/array/encode/CMakeLists.txt
 create mode 100644 tests/array/encode/testTemplatedEncode1d.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode1f.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode2d.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode2f.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode3d.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode3f.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode4d.cpp
 create mode 100644 tests/array/encode/testTemplatedEncode4f.cpp
 create mode 100644 tests/array/encode/testTemplatedEncodeBase.cpp
 create mode 100644 tests/array/utils/commonMacros.h
 create mode 100644 tests/array/utils/gtest1dTest.h
 create mode 100644 tests/array/utils/gtest1fTest.h
 create mode 100644 tests/array/utils/gtest2dTest.h
 create mode 100644 tests/array/utils/gtest2fTest.h
 create mode 100644 tests/array/utils/gtest3dTest.h
 create mode 100644 tests/array/utils/gtest3fTest.h
 create mode 100644 tests/array/utils/gtest4dTest.h
 create mode 100644 tests/array/utils/gtest4fTest.h
 create mode 100644 tests/array/utils/gtestBaseFixture.h
 create mode 100644 tests/array/utils/gtestCApiTest.h
 create mode 100644 tests/array/utils/gtestDoubleEnv.h
 create mode 100644 tests/array/utils/gtestFloatEnv.h
 create mode 100644 tests/array/utils/gtestSingleFixture.h
 create mode 100644 tests/array/utils/gtestTestEnv.h
 create mode 100644 tests/array/utils/predicates.h
 create mode 100644 tests/array/zfp/CMakeLists.txt
 create mode 100644 tests/array/zfp/testAlignedMemory.cpp
 create mode 100644 tests/cfp/CMakeLists.txt
 create mode 100644 tests/cfp/testCfpArray1_source.c
 create mode 100644 tests/cfp/testCfpArray1d.c
 create mode 100644 tests/cfp/testCfpArray1f.c
 create mode 100644 tests/cfp/testCfpArray2_source.c
 create mode 100644 tests/cfp/testCfpArray2d.c
 create mode 100644 tests/cfp/testCfpArray2f.c
 create mode 100644 tests/cfp/testCfpArray3_source.c
 create mode 100644 tests/cfp/testCfpArray3d.c
 create mode 100644 tests/cfp/testCfpArray3f.c
 create mode 100644 tests/cfp/testCfpArray4_source.c
 create mode 100644 tests/cfp/testCfpArray4d.c
 create mode 100644 tests/cfp/testCfpArray4f.c
 create mode 100644 tests/cfp/testCfpArray_source.c
 create mode 100644 tests/cfp/testCfpNamespace.c
 create mode 100644 tests/ci-utils/CMakeLists.txt
 create mode 100644 tests/constants/1dDouble.h
 create mode 100644 tests/constants/1dFloat.h
 create mode 100644 tests/constants/1dInt32.h
 create mode 100644 tests/constants/1dInt64.h
 create mode 100644 tests/constants/2dDouble.h
 create mode 100644 tests/constants/2dFloat.h
 create mode 100644 tests/constants/2dInt32.h
 create mode 100644 tests/constants/2dInt64.h
 create mode 100644 tests/constants/3dDouble.h
 create mode 100644 tests/constants/3dFloat.h
 create mode 100644 tests/constants/3dInt32.h
 create mode 100644 tests/constants/3dInt64.h
 create mode 100644 tests/constants/4dDouble.h
 create mode 100644 tests/constants/4dFloat.h
 create mode 100644 tests/constants/4dInt32.h
 create mode 100644 tests/constants/4dInt64.h
 create mode 100644 tests/constants/checksums/1dDouble.h
 create mode 100644 tests/constants/checksums/1dFloat.h
 create mode 100644 tests/constants/checksums/1dInt32.h
 create mode 100644 tests/constants/checksums/1dInt64.h
 create mode 100644 tests/constants/checksums/2dDouble.h
 create mode 100644 tests/constants/checksums/2dFloat.h
 create mode 100644 tests/constants/checksums/2dInt32.h
 create mode 100644 tests/constants/checksums/2dInt64.h
 create mode 100644 tests/constants/checksums/3dDouble.h
 create mode 100644 tests/constants/checksums/3dFloat.h
 create mode 100644 tests/constants/checksums/3dInt32.h
 create mode 100644 tests/constants/checksums/3dInt64.h
 create mode 100644 tests/constants/checksums/4dDouble.h
 create mode 100644 tests/constants/checksums/4dFloat.h
 create mode 100644 tests/constants/checksums/4dInt32.h
 create mode 100644 tests/constants/checksums/4dInt64.h
 create mode 100644 tests/constants/doubleConsts.h
 create mode 100644 tests/constants/floatConsts.h
 create mode 100644 tests/constants/int32Consts.h
 create mode 100644 tests/constants/int64Consts.h
 create mode 100644 tests/constants/universalConsts.h
 create mode 100644 tests/fortran/CMakeLists.txt
 create mode 100644 tests/fortran/testFortran.f
 create mode 100644 tests/gitlab/corona-jobs.yml
 create mode 100644 tests/gitlab/corona-templates.yml
 create mode 100644 tests/gitlab/gitlab-ci.yml
 create mode 100644 tests/gitlab/pascal-jobs.yml
 create mode 100644 tests/gitlab/pascal-templates.yml
 create mode 100644 tests/gitlab/quartz-jobs.yml
 create mode 100644 tests/gitlab/quartz-templates.yml
 create mode 100644 tests/python/CMakeLists.txt
 create mode 100644 tests/python/test_numpy.py
 create mode 100644 tests/python/test_utils.pyx
 create mode 100644 tests/src/CMakeLists.txt
 create mode 100644 tests/src/decode/CMakeLists.txt
 create mode 100644 tests/src/decode/testZfpDecodeBlock1dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock1dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock1dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock1dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock2dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock2dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock2dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock2dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock3dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock3dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock3dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock3dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock4dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock4dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock4dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlock4dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided1dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided1dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided1dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided1dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided2dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided2dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided2dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided2dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided3dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided3dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided3dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided3dInt64.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided4dDouble.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided4dFloat.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided4dInt32.c
 create mode 100644 tests/src/decode/testZfpDecodeBlockStrided4dInt64.c
 create mode 100644 tests/src/decode/testcases/block.c
 create mode 100644 tests/src/decode/testcases/blockStrided.c
 create mode 100644 tests/src/decode/zfpDecodeBlockBase.c
 create mode 100644 tests/src/decode/zfpDecodeBlockStridedBase.c
 create mode 100644 tests/src/encode/CMakeLists.txt
 create mode 100644 tests/src/encode/testZfpEncodeBlock1dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock1dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock1dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock1dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock2dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock2dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock2dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock2dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock3dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock3dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock3dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock3dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock4dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock4dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock4dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlock4dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided1dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided1dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided1dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided1dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided2dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided2dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided2dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided2dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided3dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided3dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided3dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided3dInt64.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided4dDouble.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided4dFloat.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided4dInt32.c
 create mode 100644 tests/src/encode/testZfpEncodeBlockStrided4dInt64.c
 create mode 100644 tests/src/encode/testcases/block.c
 create mode 100644 tests/src/encode/testcases/blockStrided.c
 create mode 100644 tests/src/encode/zfpEncodeBlockBase.c
 create mode 100644 tests/src/encode/zfpEncodeBlockStridedBase.c
 create mode 100644 tests/src/endtoend/CMakeLists.txt
 create mode 100644 tests/src/endtoend/cudaExecBase.c
 create mode 100644 tests/src/endtoend/ompExecBase.c
 create mode 100644 tests/src/endtoend/serialExecBase.c
 create mode 100644 tests/src/endtoend/testZfpCuda1dDouble.c
 create mode 100644 tests/src/endtoend/testZfpCuda1dFloat.c
 create mode 100644 tests/src/endtoend/testZfpCuda1dInt32.c
 create mode 100644 tests/src/endtoend/testZfpCuda1dInt64.c
 create mode 100644 tests/src/endtoend/testZfpCuda2dDouble.c
 create mode 100644 tests/src/endtoend/testZfpCuda2dFloat.c
 create mode 100644 tests/src/endtoend/testZfpCuda2dInt32.c
 create mode 100644 tests/src/endtoend/testZfpCuda2dInt64.c
 create mode 100644 tests/src/endtoend/testZfpCuda3dDouble.c
 create mode 100644 tests/src/endtoend/testZfpCuda3dFloat.c
 create mode 100644 tests/src/endtoend/testZfpCuda3dInt32.c
 create mode 100644 tests/src/endtoend/testZfpCuda3dInt64.c
 create mode 100644 tests/src/endtoend/testZfpCuda4dDouble.c
 create mode 100644 tests/src/endtoend/testZfpCuda4dFloat.c
 create mode 100644 tests/src/endtoend/testZfpCuda4dInt32.c
 create mode 100644 tests/src/endtoend/testZfpCuda4dInt64.c
 create mode 100644 tests/src/endtoend/testZfpOmp1dDouble.c
 create mode 100644 tests/src/endtoend/testZfpOmp1dFloat.c
 create mode 100644 tests/src/endtoend/testZfpOmp1dInt32.c
 create mode 100644 tests/src/endtoend/testZfpOmp1dInt64.c
 create mode 100644 tests/src/endtoend/testZfpOmp2dDouble.c
 create mode 100644 tests/src/endtoend/testZfpOmp2dFloat.c
 create mode 100644 tests/src/endtoend/testZfpOmp2dInt32.c
 create mode 100644 tests/src/endtoend/testZfpOmp2dInt64.c
 create mode 100644 tests/src/endtoend/testZfpOmp3dDouble.c
 create mode 100644 tests/src/endtoend/testZfpOmp3dFloat.c
 create mode 100644 tests/src/endtoend/testZfpOmp3dInt32.c
 create mode 100644 tests/src/endtoend/testZfpOmp3dInt64.c
 create mode 100644 tests/src/endtoend/testZfpOmp4dDouble.c
 create mode 100644 tests/src/endtoend/testZfpOmp4dFloat.c
 create mode 100644 tests/src/endtoend/testZfpOmp4dInt32.c
 create mode 100644 tests/src/endtoend/testZfpOmp4dInt64.c
 create mode 100644 tests/src/endtoend/testZfpSerial1dDouble.c
 create mode 100644 tests/src/endtoend/testZfpSerial1dFloat.c
 create mode 100644 tests/src/endtoend/testZfpSerial1dInt32.c
 create mode 100644 tests/src/endtoend/testZfpSerial1dInt64.c
 create mode 100644 tests/src/endtoend/testZfpSerial2dDouble.c
 create mode 100644 tests/src/endtoend/testZfpSerial2dFloat.c
 create mode 100644 tests/src/endtoend/testZfpSerial2dInt32.c
 create mode 100644 tests/src/endtoend/testZfpSerial2dInt64.c
 create mode 100644 tests/src/endtoend/testZfpSerial3dDouble.c
 create mode 100644 tests/src/endtoend/testZfpSerial3dFloat.c
 create mode 100644 tests/src/endtoend/testZfpSerial3dInt32.c
 create mode 100644 tests/src/endtoend/testZfpSerial3dInt64.c
 create mode 100644 tests/src/endtoend/testZfpSerial4dDouble.c
 create mode 100644 tests/src/endtoend/testZfpSerial4dFloat.c
 create mode 100644 tests/src/endtoend/testZfpSerial4dInt32.c
 create mode 100644 tests/src/endtoend/testZfpSerial4dInt64.c
 create mode 100644 tests/src/endtoend/testcases/cuda.c
 create mode 100644 tests/src/endtoend/testcases/omp.c
 create mode 100644 tests/src/endtoend/testcases/serial.c
 create mode 100644 tests/src/endtoend/zfpEndtoendBase.c
 create mode 100644 tests/src/execPolicy/CMakeLists.txt
 create mode 100644 tests/src/execPolicy/testCuda.c
 create mode 100644 tests/src/execPolicy/testOmp.c
 create mode 100644 tests/src/execPolicy/testOmpInternal.c
 create mode 100644 tests/src/inline/CMakeLists.txt
 create mode 100644 tests/src/inline/testBitstream.c
 create mode 100644 tests/src/inline/testBitstreamSmallWsize.c
 create mode 100644 tests/src/inline/testBitstreamStrided.c
 create mode 100644 tests/src/misc/CMakeLists.txt
 create mode 100644 tests/src/misc/testZfpField1d.c
 create mode 100644 tests/src/misc/testZfpField1f.c
 create mode 100644 tests/src/misc/testZfpField2d.c
 create mode 100644 tests/src/misc/testZfpField2f.c
 create mode 100644 tests/src/misc/testZfpField3d.c
 create mode 100644 tests/src/misc/testZfpField3f.c
 create mode 100644 tests/src/misc/testZfpField4d.c
 create mode 100644 tests/src/misc/testZfpField4f.c
 create mode 100644 tests/src/misc/testZfpHeader.c
 create mode 100644 tests/src/misc/testZfpPromote.c
 create mode 100644 tests/src/misc/testZfpStream.c
 create mode 100644 tests/src/misc/zfpFieldBase.c
 create mode 100644 tests/testviews.cpp
 create mode 100644 tests/utils/CMakeLists.txt
 create mode 100644 tests/utils/fixedpoint96.c
 create mode 100644 tests/utils/fixedpoint96.h
 create mode 100755 tests/utils/genChecksums.sh
 create mode 100644 tests/utils/genSmoothRandNums.c
 create mode 100644 tests/utils/genSmoothRandNums.h
 create mode 100644 tests/utils/rand32.c
 create mode 100644 tests/utils/rand32.h
 create mode 100644 tests/utils/rand64.c
 create mode 100644 tests/utils/rand64.h
 create mode 100644 tests/utils/stridedOperations.c
 create mode 100644 tests/utils/stridedOperations.h
 create mode 100644 tests/utils/testMacros.h
 create mode 100644 tests/utils/zfpChecksums.c
 create mode 100644 tests/utils/zfpChecksums.h
 create mode 100644 tests/utils/zfpCompressionParams.c
 create mode 100644 tests/utils/zfpCompressionParams.h
 create mode 100644 tests/utils/zfpHash.c
 create mode 100644 tests/utils/zfpHash.h
 create mode 100644 tests/utils/zfpTimer.c
 create mode 100644 tests/utils/zfpTimer.h
 delete mode 100755 travis.sh

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..7bb4cf76
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
new file mode 100644
index 00000000..33c5b512
--- /dev/null
+++ b/.github/workflows/coverage.yml
@@ -0,0 +1,49 @@
+name: Coverage Report
+
+on:
+  workflow_run:
+    workflows: [Tests]
+    types: [completed]
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+          architecture: x64
+
+      - name: Install Dependencies
+        run: |-
+          sudo apt install lcov
+          python -m pip install lcov_cobertura
+
+      - name: Run CMake
+        run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=g++-11 -DCMAKE_C_COMPILER=gcc-11 -DCMAKE_CXX_FLAGS="-fprofile-arcs -ftest-coverage" -DCMAKE_C_FLAGS="-fprofile-arcs -ftest-coverage" -DBUILD_TESTING_FULL=ON -DBUILD_CFP=ON -DZFP_WITH_OPENMP=ON
+
+      - name: Build
+        run: cmake --build ${{github.workspace}}/build
+
+      - name: Run Tests
+        working-directory: ${{github.workspace}}/build
+        run: ctest -j 8
+
+      - name: Generate Coverage Report
+        working-directory: ${{github.workspace}}/build
+        run: |-
+          lcov -c --directory ${{github.workspace}}/build --output-file coverage.info
+          lcov --remove coverage.info '${{github.workspace}}/build/tests/*' --remove coverage.info '${{github.workspace}}/tests/*' --remove coverage.info '/usr/include/*' -o coverage.info
+          lcov_cobertura ${{github.workspace}}/build/coverage.info -d -o ${{github.workspace}}/build/coverage.xml
+
+      - name: Upload Report to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+           files: ${{github.workspace}}/build/coverage.xml
+           env_vars: Actions
+           fail_ci_if_error: true
diff --git a/.github/workflows/debug-linux.yml b/.github/workflows/debug-linux.yml
new file mode 100644
index 00000000..62268a50
--- /dev/null
+++ b/.github/workflows/debug-linux.yml
@@ -0,0 +1,29 @@
+name: Debug (Linux)
+
+on: [workflow_dispatch]
+
+jobs:
+    debug:
+      runs-on: ubuntu-latest
+      steps:
+        - name: Checkout Zfp
+          uses: actions/checkout@v4
+
+        - name: Setup Python
+          uses: actions/setup-python@v4
+          with:
+            python-version: '3.x'
+            architecture: x64
+
+        - name: Install Zfpy Dependencies
+          run: |
+            python -m pip install cython
+            python -m pip install oldest-supported-numpy
+            python -m pip install setuptools
+
+        - name: Install OpenMP
+          run: |
+            sudo apt-get update; sudo apt-get install -y libomp5 libomp-dev
+
+        - name: Setup Tmate Session
+          uses: mxschmitt/action-tmate@v3
diff --git a/.github/workflows/debug-macos.yml b/.github/workflows/debug-macos.yml
new file mode 100644
index 00000000..36fbb976
--- /dev/null
+++ b/.github/workflows/debug-macos.yml
@@ -0,0 +1,29 @@
+name: Debug (MacOS)
+
+on: [workflow_dispatch]
+
+jobs:
+    debug:
+      runs-on: macos-latest
+      steps:
+        - name: Checkout Zfp
+          uses: actions/checkout@v4
+
+        - name: Setup Python
+          uses: actions/setup-python@v4
+          with:
+            python-version: '3.x'
+            architecture: x64
+
+        - name: Install Zfpy Dependencies
+          run: |
+            python -m pip install cython
+            python -m pip install oldest-supported-numpy
+            python -m pip install setuptools
+
+        - name: Install OpenMP
+          run: |
+            brew install libomp
+
+        - name: Setup Tmate Session
+          uses: mxschmitt/action-tmate@v3
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..51c56e9a
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,78 @@
+name: Tests
+
+on: push
+
+env:
+  BUILD_TYPE: Release
+
+jobs:
+  build:
+    runs-on: ${{matrix.os}}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-latest
+            cxx_compiler: g++-10
+            c_compiler: gcc-10
+            omp: ON
+            target: all
+            
+          - os: ubuntu-latest
+            cxx_compiler: clang++
+            c_compiler: clang
+            omp: ON
+            target: all
+            
+          - os: macos-latest
+            cxx_compiler: g++-11
+            c_compiler: gcc-11
+            omp: ON
+            target: all
+
+          - os: macos-latest
+            cxx_compiler: clang++
+            c_compiler: clang
+            omp: OFF
+            target: all
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+          architecture: x64
+
+      - name: Install zfpy dependencies
+        run: |
+          python -m pip install cython
+          python -m pip install oldest-supported-numpy
+          python -m pip install setuptools
+      
+      - name: Setup OpenMP (Linux)
+        if: ${{matrix.os == 'ubuntu-latest' && matrix.cxx_compiler == 'clang++'}}
+        run: sudo apt-get update; sudo apt-get install -y libomp5 libomp-dev
+
+      - name: Setup OpenMP (MacOS)
+        if: ${{matrix.os == 'macos-latest'}}
+        run: |
+          brew install libomp
+          echo "CC=$(brew --prefix llvm)/bin/clang" >> $GITHUB_ENV
+          echo "CXX=$(brew --prefix llvm)/bin/clang++" >> $GITHUB_ENV
+          echo "LDFLAGS=\"-L$(brew --prefix llvm)/lib\"" >> $GITHUB_ENV
+          echo "CPPFLAGS=\"-I$(brew --prefix llvm)/include\"" >> $GITHUB_ENV
+
+      - name: Run CMake
+        id: cmake
+        run: cmake -B ${{github.workspace}}/build ${{matrix.generator}} -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{matrix.cxx_compiler}} -DCMAKE_C_COMPILER=${{matrix.c_compiler}} -DBUILD_TESTING_FULL=ON -DZFP_WITH_OPENMP=${{matrix.omp}} -DBUILD_ZFPY=ON -DPYTHON_INCLUDE_DIR=$(python -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())")  -DPYTHON_LIBRARY=$(python -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))")
+        
+      - name: Build
+        id: build
+        run: cmake --build ${{github.workspace}}/build --target ${{matrix.target}} --config ${{env.BUILD_TYPE}}
+
+      - name: Run Tests
+        id: test
+        working-directory: ${{github.workspace}}/build
+        run: ctest -C ${{env.BUILD_TYPE}} -VV
diff --git a/.gitignore b/.gitignore
index 581f2abe..66f13148 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,7 @@
 bin
 build
 lib
+dist
+wheelhouse
+zfpy.egg-info
+modules
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 00000000..b6421ddd
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,35 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.12"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/source/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats:
+  - pdf
+#   - epub
+
+# Optional but recommended, declare the Python requirements required
+# to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+  install:
+    - requirements: docs/requirements.txt
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 40287688..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,303 +0,0 @@
-language:
-  - generic
-
-matrix:
-  include:
-    - os: linux
-      dist: xenial
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5' COVERAGE='ON'
-
-    - os: linux
-      dist: xenial
-      compiler: clang-3.6
-      addons: &clang36
-        apt:
-          sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty']
-          packages:
-            - clang-3.6
-            - g++-7
-            - gfortran-6
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='clang-3.6' CXX='clang++-3.6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: xenial
-      compiler: clang-4.0
-      before_install:
-        - export LD_LIBRARY_PATH=/usr/local/clang/lib:$LD_LIBRARY_PATH
-      addons: &clang40
-        apt:
-          sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-4.0']
-          packages:
-            - clang-4.0
-            - g++-7
-            - gfortran-6
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='clang-4.0' CXX='clang++-4.0' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-4.4
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-4.4
-            - g++-4.4
-            - gfortran-4.4
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-4.4' CXX='g++-4.4' FC='gfortran-4.4' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-4.7
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-4.7
-            - g++-4.7
-            - gfortran-4.7
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-4.7' CXX='g++-4.7' FC='gfortran-4.7' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-4.8
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-4.8
-            - g++-4.8
-            - gfortran-4.8
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-4.8' CXX='g++-4.8' FC='gfortran-4.8' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-4.9
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-4.9
-            - g++-4.9
-            - gfortran-4.9
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-4.9' CXX='g++-4.9' FC='gfortran-4.9' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: trusty
-      compiler: gcc-5
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-5
-            - g++-5
-            - gfortran-5
-            - libpython2.7
-            - python-pip
-      env: CC='gcc-5' CXX='g++-5' FC='gfortran-5' FORTRAN_STANDARD='2003' PYTHON_VERSION='2.7'
-
-    - os: linux
-      dist: trusty
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython2.7
-            - python-pip
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='2.7'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5' C_STANDARD='90'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5' C_STANDARD='11'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython3.5-dev
-            - cython3
-            - python3-numpy
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5' CXX_STANDARD='11'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython3.5
-            - cython3
-            - python3-numpy
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2003' PYTHON_VERSION='3.5' CXX_STANDARD='14'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-6
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-6
-            - g++-6
-            - gfortran-6
-            - libpython3.5
-            - cython3
-            - python3-numpy
-      env: CC='gcc-6' CXX='g++-6' FC='gfortran-6' FORTRAN_STANDARD='2008' PYTHON_VERSION='3.5'
-
-    - os: linux
-      dist: xenial
-      compiler: gcc-7
-      addons:
-        apt:
-          sources: ubuntu-toolchain-r-test
-          packages:
-            - gcc-7
-            - g++-7
-            - gfortran-7
-            - libpython3.5
-            - cython3
-            - python3-numpy
-      env: CC='gcc-7' CXX='g++-7' FC='gfortran-7' FORTRAN_STANDARD='2008' PYTHON_VERSION='3.5'
-
-    - os: osx
-      osx_image: xcode7.3
-      compiler: gcc
-      env: CC='gcc' CXX='g++' PYTHON_VERSION='3.5'
-
-    - os: osx
-      osx_image: xcode8.3
-      compiler: gcc
-      env: CC='gcc' CXX='g++' PYTHON_VERSION='2.7'
-
-    - os: osx
-      osx_image: xcode7.3
-      compiler: clang
-      env: CC='clang' CXX='clang++' PYTHON_VERSION='3.5'
-
-    - os: osx
-      osx_image: xcode8.3
-      compiler: clang
-      env: CC='clang' CXX='clang++' PYTHON_VERSION='2.7'
-
-script:
-  - if [ "$TRAVIS_OS_NAME" == "osx" ]; then pyenv root; fi
-  - |
-    if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "$PYTHON_VERSION" = "2.7" ]; then
-        pyenv install 2.7.12;
-        export PYTHON_INCLUDE_DIR=$(pyenv root)/versions/2.7.12/include/python2.7;
-        export PYTHON_LIBRARY=$(pyenv root)/versions/2.7.12/lib/libpython2.7.dylib;
-        export PYTHON_EXECUTABLE=$(pyenv root)/versions/2.7.12/bin/python2.7;
-    fi
-  - |
-    if [ "$TRAVIS_OS_NAME" = "osx" ] && [ "$PYTHON_VERSION" = "3.5" ]; then
-        pyenv install 3.5.0;
-        export PYTHON_INCLUDE_DIR=$(pyenv root)/versions/3.5.0/include/python3.5m;
-        export PYTHON_LIBRARY=$(pyenv root)/versions/3.5.0/lib/libpython3.5m.a;
-        export PYTHON_EXECUTABLE=$(pyenv root)/versions/3.5.0/bin/python3.5m;
-    fi
-  - |
-    if [ "$TRAVIS_OS_NAME" == "osx" ]; then
-        $PYTHON_EXECUTABLE -m pip install --upgrade pip;
-        $PYTHON_EXECUTABLE -m pip install -r ${TRAVIS_BUILD_DIR}/python/requirements.txt;
-    fi
-
-  - |
-    if [ "$TRAVIS_OS_NAME" = "linux" ]; then
-        export PYTHON_EXECUTABLE=/usr/bin/python$PYTHON_VERSION;
-        source /etc/lsb-release;
-    fi
-  - |
-    if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$PYTHON_VERSION" = "2.7" ]; then
-        export PYTHON_INCLUDE_DIR=/usr/include/python2.7;
-        export PYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython2.7.so;
-    fi
-  - |
-    if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$PYTHON_VERSION" = "3.5" ]; then
-        export PYTHON_INCLUDE_DIR=/usr/include/python3.5m;
-        export PYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.5m.so;
-    fi
-  - |
-    if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$DISTRIB_CODENAME" = "trusty" ] && [ "$PYTHON_VERSION" = "2.7" ]; then
-        sudo $PYTHON_EXECUTABLE -m pip install --upgrade pip;
-        sudo $PYTHON_EXECUTABLE -m pip install -r ${TRAVIS_BUILD_DIR}/python/requirements.txt;
-    fi
-  - |
-    if [ "$TRAVIS_OS_NAME" = "linux" ] && [ "$DISTRIB_CODENAME" = "trusty" ] && [ "$PYTHON_VERSION" = "3.5" ]; then
-        echo "Python 3.5 not supported on Ubuntu Trusty";
-        exit 1;
-    fi
-
-  - printenv | grep PYTHON
-  - ./travis.sh
-
-after_success:
-  - if [[ -n "${COVERAGE}" ]]; then bash <(curl -s https://codecov.io/bash); fi
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..96aa9216
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,432 @@
+Change Log
+==========
+
+---
+
+## 1.0.1 (2023-12-15)
+
+This patch release primarily addresses minor bug fixes and is needed to update
+the zfpy Python wheels.
+
+### Added
+
+- A new build macro, `BUILD_TESTING_FULL`, specifies that all unit tests be
+  built; `BUILD_TESTING` produces a smaller subset of tests.  Full tests and
+  documentation are now included in releases.
+
+### Fixed
+
+- #169: `libm` dependency is not always correctly detected.
+- #171: `ptrdiff_t` is not always imported in Cython.
+- #176: cfp API is not exposed via CMake configuration file.
+- #177: Full test suite is not included in release.
+- #181: `rpath` is not set correctly in executables.
+- #204: Array strides are not passed by value in zFORp.
+- #220: Errors reported with scikit-build when building zfpy.
+
+---
+
+## 1.0.0 (2022-08-01)
+
+This release is not ABI compatible with prior releases due to numerous changes
+to function signatures and data structures like `zfp_field`.  However, few of
+the API changes, other than to the cfp C API for compressed arrays, should
+impact existing code.  Note that numerous header files have been renamed or
+moved relative to prior versions.
+
+### Added
+
+- `zfp::const_array`: read-only variable-rate array that supports
+  fixed-precision, fixed-accuracy, and reversible modes.
+- Compressed-array classes for 4D data.
+- `const` versions of array references, pointers, and iterators.
+- A more complete API for pointers and iterators.
+- cfp support for proxy references and pointers, iterators, and
+  (de)serialization.
+- Support for pointers and iterators into array views.
+- `zfp::array::size_bytes()` allows querying the size of different components
+  of an array object (e.g., payload, cache, index, metadata, ...).
+- Templated C++ wrappers around the low-level C API.
+- A generic codec for storing blocks of uncompressed scalars in zfp's
+  C++ arrays.
+- Additional functions for querying `zfp_field` and `zfp_stream` structs.
+- `zfp_config`: struct that encapsulates compression mode and parameters.
+- Rounding modes for reducing bias in compression errors.
+- New examples: `array`, `iteratorC`, and `ppm`.
+
+### Changed
+
+- Headers from `array/`, `cfp/include/`, and `include/` have been renamed
+  and reorganized into a common `include/` directory.
+  - The libzfp API is now confined to `zfp.h`, `zfp.hpp`, and `zfp.mod`
+    for C, C++, and Fortran bindings, respectively.  These all appear in
+    the top-level `include/` directory upon installation.
+  - C++ headers now use a `.hpp` suffix; C headers use a `.h` suffix.
+  - C++ headers like `array/zfparray.h` have been renamed `zfp/array.hpp`.
+  - C headers like `cfp/include/cfparrays.h` have been renamed `zfp/array.h`.
+- `size_t` and `ptrdiff_t` replace `uint` and `int` for array sizes and
+  strides in the array classes and C/Fortran APIs.
+- `zfp_bool` replaces `int` as Boolean type in the C API.
+- `bitstream_offset` and `bitstream_size` replace `size_t` to ensure support
+  for 64-bit offsets into and lengths of bit streams.  Consequently, the
+  `bitstream` API has changed accordingly.
+- All array and view iterators are now random-access iterators.
+- Array inspectors now return `const_reference` rather than a scalar
+  type like `float` to allow obtaining a `const_pointer` to an element
+  of an immutable array.
+- `zfp::array::compressed_data()` now returns `void*` instead of `uchar*`.
+- The array (de)serialization API has been revised, resulting in new
+  `zfp::array::header` and `zfp::exception` classes with new exception
+  messages.
+- The array `codec` class is now responsible for all details regarding
+  compression.
+- The compressed-array C++ implementation has been completely refactored to
+  make it more modular, extensible, and reusable across array types.
+- Array block shapes are now computed on the fly rather than stored.
+- The cfp C API now wraps array objects in structs.
+- The zfpy Python API now supports the more general `memoryview` over
+  `bytes` objects for decompression.
+- The zFORp Fortran module name is now `zfp` instead of `zforp_module`.
+- Some command-line options for the `diffusion` example have changed.
+- CMake 3.9 or later is now required for CMake builds.
+
+### Removed
+
+- `zfp::array::get_header()` has been replaced with a `zfp::array::header`
+  constructor that accepts an array object.
+- `ZFP_VERSION_RELEASE` is no longer defined (use `ZFP_VERSION_PATCH`).
+
+### Fixed
+
+- #66: `make install` overwrites googletest.
+- #84: Incorrect order of parameters in CUDA `memset()`.
+- #86: C++ compiler warns when `__STDC_VERSION__` is undefined.
+- #87: `CXXFLAGS` is misspelled in `cfp/src/Makefile`.
+- #98: `zfp_stream_maximum_size()` underestimates size in reversible mode.
+- #99: Incorrect `private_view` reads due to missing writeback.
+- #109: Unused CPython array is incompatible with PyPy.
+- #112: PGI compiler bug causes issues with memory alignment.
+- #119: All-subnormal blocks may cause floating-point overflow.
+- #121: CUDA bit offsets are limited to 32 bits.
+- #122: `make install` does not install zfp command-line utility.
+- #125: OpenMP bit offsets are limited to 32 bits.
+- #126: `make install` does not install Fortran module.
+- #127: Reversible mode reports incorrect compressed block size.
+- #150: cmocka tests do not build on macOS.
+- #154: Thread safety is broken in `private_view` and `private_const_view`.
+- `ZFP_MAX_BITS` is off by one.
+- `diffusionC`, `iteratorC` are not being built with `gmake`.
+
+---
+
+## 0.5.5 (2019-05-05)
+
+### Added
+
+- Support for reversible (lossless) compression of floating-point and
+  integer data.
+- Methods for serializing and deserializing zfp's compressed arrays.
+- Python bindings for compressing NumPy arrays.
+- Fortran bindings to zfp's high-level C API.
+
+### Changed
+
+- The default compressed-array cache size is now a function of the total
+  number of array elements, irrespective of array shape.
+
+### Fixed
+
+- Incorrect handling of execution policy in zfp utility.
+- Incorrect handling of decompression via header in zfp utility.
+- Incorrect cleanup of device memory in CUDA decompress.
+- Missing tests for failing mallocs.
+- CMake does not install CFP when built.
+- `zfp_write_header()` and `zfp_field_metadata()` succeed even if array
+  dimensions are too large to fit in header.
+
+---
+
+## 0.5.4 (2018-10-01)
+
+### Added
+
+- Support for CUDA fixed-rate compression and decompression.
+- Views into compressed arrays for thread safety, nested array indexing,
+  slicing, and array subsetting.
+- C language bindings for compressed arrays.
+- Support for compressing and decompressing 4D data.
+
+### Changed
+
+- Execution policy now applies to both compression and decompression.
+- Compressed array accessors now return Scalar type instead of
+  `const Scalar&` to avoid stale references to evicted cache lines.
+
+### Fixed
+
+- Incorrect handling of negative strides.
+- Incorrect handling of arrays with more than 2^32 elements in zfp command-line
+  tool.
+- `bitstream` is not C++ compatible.
+- Minimum cache size request is not respected.
+
+---
+
+## 0.5.3 (2018-03-28)
+
+### Added
+
+- Support for OpenMP multithreaded compression (but not decompression).
+- Options for OpenMP execution in zfp command-line tool.
+- Compressed-array support for copy construction and assignment via deep
+  copies.
+- Virtual destructors to enable inheritance from zfp arrays.
+
+### Changed
+
+- `zfp_decompress()` now returns the number of compressed bytes processed so
+  far, i.e., the same value returned by `zfp_compress()`.
+
+---
+
+## 0.5.2 (2017-09-28)
+
+### Added
+
+- Iterators and proxy objects for pointers and references.
+- Example illustrating how to use iterators and pointers.
+
+### Changed
+
+- Diffusion example now optionally uses iterators.
+- Moved internal headers under array to `array/zfp`.
+- Modified 64-bit integer typedefs to avoid the C89 non-compliant `long long`
+  and allow for user-supplied types and literal suffixes.
+- Renamed compile-time macros that did not have a `ZFP` prefix.
+- Rewrote documentation in reStructuredText and added complete documentation
+  of all public functions, classes, types, and macros.
+
+### Fixed
+
+- Issue with setting stream word type via CMake.
+
+---
+
+## 0.5.1 (2017-03-28)
+
+This release primarily fixes a few minor issues but also includes changes in
+anticipation of a large number of planned future additions to the library.
+No changes have been made to the compressed format, which is backwards
+compatible with version 0.5.0.
+
+### Added
+
+- High-level API support for integer types.
+- Example that illustrates in-place compression.
+- Support for CMake builds.
+- Documentation that discusses common issues with using zfp.
+
+### Changed
+
+- Separated library version from CODEC version and added version string.
+- Corrected inconsistent naming of `BIT_STREAM` macros in code and
+  documentation.
+- Renamed some of the header bit mask macros.
+- `stream_skip()` and `stream_flush()` now return the number of bits skipped
+  or output.
+- Renamed `stream_block()` and `stream_delta()` to make it clear that they
+  refer to strided streams.  Added missing definition of
+  `stream_stride_block()`.
+- Changed `int` and `uint` types in places to use `ptrdiff_t` and `size_t`
+  where appropriate.
+- Changed API for `zfp_set_precision()` and `zfp_set_accuracy()` to not
+  require the scalar type.
+- Added missing `static` keyword in `decode_block()`.
+- Changed `testzfp` to allow specifying which tests to perform on the
+  command line.
+- Modified directory structure.
+
+### Fixed
+
+- Bug that prevented defining uninitialized arrays.
+- Incorrect computation of array sizes in `zfp_field_size()`.
+- Minor issues that prevented code from compiling on Windows.
+- Issue with fixed-accuracy headers that caused unnecessary storage.
+
+---
+
+## 0.5.0 (2016-02-29)
+
+This version introduces backwards incompatible changes to the CODEC.
+
+### Added
+
+- Modified CODEC to more efficiently encode blocks whose values are all
+  zero or are smaller in magnitude than the absolute error tolerance.
+  This allows representing "empty" blocks using only one bit each.
+- Added functions for compactly encoding the compression parameters
+  and field meta data, e.g., for producing self-contained compressed
+  streams.  Also added functions for reading and writing a header
+  containing these parameters.
+
+### Changed
+
+- Changed behavior of `zfp_compress()` and `zfp_decompress()` to not
+  automatically rewind the bit stream.  This makes it easier to concatenate
+  multiple compressed bit streams, e.g., when compressing vector fields or
+  multiple scalars together.
+- Changed the zfp example program interface to allow reading and writing
+  compressed streams, optionally with a header.  The zfp tool can now be
+  used to compress and decompress files as a stand alone utility.
+
+---
+
+## 0.4.1 (2015-12-28)
+
+### Added
+
+- Added `simple.c` as a minimal example of how to call the compressor.
+
+### Changed
+
+- Changed compilation of diffusion example to output two executables:
+  one with and one without compression.
+
+### Fixed
+
+- Bug that caused segmentation fault when compressing 3D arrays whose
+  dimensions are not multiples of four.  Specifically, arrays of dimensions
+  nx * ny * nz, with ny not a multiple of four, were not handled correctly.
+- Modified `examples/fields.h` to ensure standard compliance.  Previously,
+  C99 support was needed to handle the hex float constants, which are
+  not supported in C++98.
+
+---
+
+## 0.4.0 (2015-12-05)
+
+This version contains substantial changes to the compression algorithm that
+improve PSNR by about 6 dB and speed by a factor of 2-3.  These changes are
+not backward compatible with previous versions of zfp.
+
+### Added
+
+- Support for 31-bit and 63-bit integer data, as well as shorter integer types.
+- New examples for evaluating the throughput of the (de)compressor and for
+  compressing grayscale images in the pgm format.
+- Frequently asked questions.
+
+### Changed
+
+- Rewrote compression codec entirely in C to make linking and calling
+  easier from other programming languages, and to expose the low-level
+  interface through C instead of C++.  This necessitated significant
+  changes to the API as well.
+- Minor changes to the C++ compressed array API, as well as major
+  implementation changes to support the C library.  The namespace and
+  public types are now all in lower case.
+
+### Removed
+
+- Support for general fixed-point decorrelating transforms.
+
+---
+
+## 0.3.2 (2015-12-03)
+
+### Fixed
+
+- Bug in `Array::get()` that caused the wrong cached block to be looked up,
+  thus occasionally copying incorrect values back to parts of the array.
+
+---
+
+## 0.3.1 (2015-05-06)
+
+### Fixed
+
+- Rare bug caused by exponent underflow in blocks with no normal and some
+  subnormal numbers.
+
+---
+
+## 0.3.0 (2015-03-03)
+
+This version modifies the default decorrelating transform to one that uses
+only additions and bit shifts.  This new transform, in addition to being
+faster, also has some theoretical optimality properties and tends to improve
+rate distortion.  This change is not backwards compatible.
+
+### Added
+
+- Compile-time support for parameterized transforms, e.g., to support other
+  popular transforms like DCT, HCT, and Walsh-Hadamard.
+- Floating-point traits to reduce the number of template parameters.  It is
+  now possible to declare a 3D array as `Array3<float>`, for example.
+- Functions for setting the array scalar type and dimensions.
+- `testzfp` for regression testing.
+
+### Changed
+
+- Made forward transform range preserving: (-1, 1) is mapped to (-1, 1).
+  Consequently Q1.62 fixed point can be used throughout.
+- Changed the order in which bits are emitted within each bit plane to be more
+  intelligent.  Group tests are now deferred until they are needed, i.e., just
+  before the value bits for the group being tested.  This improves the quality
+  of fixed-rate encodings, but has no impact on compressed size.
+- Made several optimizations to improve performance.
+- Consolidated several header files.
+
+---
+
+## 0.2.1 (2014-12-12)
+
+### Added
+
+- Win64 support via Microsoft Visual Studio compiler.
+- Documentation of the expected output for the diffusion example.
+
+### Changed
+
+- Made several minor changes to suppress compiler warnings.
+
+### Fixed
+
+- Broken support for IBM's `xlc` compiler.
+
+---
+
+## 0.2.0 (2014-12-02)
+
+The compression interface from `zfpcompress` was relocated to a separate
+library, called `libzfp`, and modified to be callable from C.  This API now
+uses a parameter object (`zfp_params`) to specify array type and dimensions
+as well as compression parameters.
+
+### Added
+
+- Several utility functions were added to simplify `libzfp` usage:
+  * Functions for setting the rate, precision, and accuracy.
+    Corresponding functions were also added to the `Codec` class.
+  * A function for estimating the buffer size needed for compression.
+- The `Array` class functionality was expanded:
+  * Support for accessing the compressed bit stream stored with an array,
+    e.g., for offline compressed storage and for initializing an already
+    compressed array.
+  * Functions for dynamically specifying the cache size.
+  * The default cache is now direct-mapped instead of two-way associative.
+
+### Fixed
+
+- Corrected the value of the lowest possible bit plane to account for both
+  the smallest exponent and the number of bits in the significand.
+- Corrected inconsistent use of rate and precision.  The rate refers to the
+  number of compressed bits per floating-point value, while the precision
+  refers to the number of uncompressed bits.  The `Array` API was changed
+  accordingly.
+
+---
+
+## 0.1.0 (2014-11-12)
+
+Initial beta release.
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000..6e36e7cb
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,17 @@
+cff-version: 1.1.0
+message: "If you use this software, please cite it as below."
+authors:
+  - family-names: Lindstrom
+    given-names: Peter
+    orcid: https://orcid.org/0000-0003-3817-4199
+title: "Fixed-Rate Compressed Floating-Point Arrays"
+journal: "IEEE Transactions on Visualization and Computer Graphics"
+volume: 20
+number: 12
+start: 2674
+end: 2683
+year: 2014
+month: 12
+version: develop
+doi: 10.1109/TVCG.2014.2346458
+date-released: 2014-11-05
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 47179fd8..52ae1584 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,8 +1,7 @@
-if(WIN32)
-  cmake_minimum_required(VERSION 3.4)
-else()
-  cmake_minimum_required(VERSION 3.1)
-endif()
+cmake_minimum_required(VERSION 3.9)
+
+# Enable MACOSX_RPATH by default
+cmake_policy(SET CMP0042 NEW)
 
 # Fail immediately if not using an out-of-source build
 if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
@@ -14,15 +13,23 @@ endif()
 #------------------------------------------------------------------------------#
 # Parse version number from zfp.h
 #------------------------------------------------------------------------------#
-file(READ ${CMAKE_CURRENT_SOURCE_DIR}/include/zfp.h _zfp_h_contents)
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/include/zfp/version.h _zfp_h_contents)
 string(REGEX REPLACE ".*#define[ \t]+ZFP_VERSION_MAJOR[ \t]+([0-9]+).*"
      "\\1" ZFP_VERSION_MAJOR ${_zfp_h_contents})
 string(REGEX REPLACE ".*#define[ \t]+ZFP_VERSION_MINOR[ \t]+([0-9]+).*"
     "\\1" ZFP_VERSION_MINOR ${_zfp_h_contents})
 string(REGEX REPLACE ".*#define[ \t]+ZFP_VERSION_PATCH[ \t]+([0-9]+).*"
     "\\1" ZFP_VERSION_PATCH ${_zfp_h_contents})
-set(ZFP_VERSION
-  "${ZFP_VERSION_MAJOR}.${ZFP_VERSION_MINOR}.${ZFP_VERSION_PATCH}")
+string(REGEX REPLACE ".*#define[ \t]+ZFP_VERSION_TWEAK[ \t]+([0-9]+).*"
+    "\\1" ZFP_VERSION_TWEAK ${_zfp_h_contents})
+
+if(${ZFP_VERSION_TWEAK} EQUAL 0)
+  set(ZFP_VERSION
+    "${ZFP_VERSION_MAJOR}.${ZFP_VERSION_MINOR}.${ZFP_VERSION_PATCH}")
+else()
+  set(ZFP_VERSION
+    "${ZFP_VERSION_MAJOR}.${ZFP_VERSION_MINOR}.${ZFP_VERSION_PATCH}.${ZFP_VERSION_TWEAK}")
+endif()
 
 project(ZFP VERSION ${ZFP_VERSION})
 
@@ -44,6 +51,9 @@ if(NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
   set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${ZFP_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR})
 endif()
 
+# Setup RPath
+set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
+
 #------------------------------------------------------------------------------#
 # Top level options
 #------------------------------------------------------------------------------#
@@ -54,7 +64,9 @@ if(MSVC)
   set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 
   # Silence extraneous Visual Studio specific warnings
-  add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS /wd4146 /wd4305)
+  add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS)
+  add_compile_options(/wd4146)
+  add_compile_options(/wd4305)
 endif()
 
 # Suggest C99
@@ -108,19 +120,40 @@ set(ZFP_BIT_STREAM_WORD_SIZE 64 CACHE STRING
   "Use smaller bit stream word type for finer rate granularity")
 set_property(CACHE ZFP_BIT_STREAM_WORD_SIZE PROPERTY STRINGS "8;16;32;64")
 
+if(CMAKE_C_COMPILER_ID MATCHES "PGI|NVHPC")
+  # Use default alignment to address PGI compiler bug.
+  set(ZFP_CACHE_LINE_SIZE 0 CACHE STRING "Cache line alignment in bytes")
+  mark_as_advanced(ZFP_CACHE_LINE_SIZE)
+endif()
+
+set(PPM_CHROMA 2 CACHE STRING "Chroma block dimensionality for ppm example")
+set_property(CACHE PPM_CHROMA PROPERTY STRINGS "1;2")
+
+set(ZFP_ROUNDING_MODE ZFP_ROUND_NEVER CACHE STRING
+  "Rounding mode for reducing bias")
+set_property(CACHE ZFP_ROUNDING_MODE PROPERTY STRINGS "ZFP_ROUND_NEVER;ZFP_ROUND_FIRST;ZFP_ROUND_LAST")
+
+option(ZFP_WITH_DAZ "Treat subnormals as zero to avoid overflow" OFF)
+
 option(ZFP_WITH_CUDA "Enable CUDA parallel compression" OFF)
 
-option(ZFP_WITH_BIT_STREAM_STRIDED
-  "Enable strided access for progressive zfp streams" OFF)
+option(ZFP_WITH_BIT_STREAM_STRIDED "Enable strided access for progressive zfp streams" OFF)
+mark_as_advanced(ZFP_WITH_BIT_STREAM_STRIDED)
+
+option(ZFP_WITH_TIGHT_ERROR "Reduce slack in absolute errors" OFF)
 
 option(ZFP_WITH_ALIGNED_ALLOC "Enable aligned memory allocation" OFF)
+mark_as_advanced(ZFP_WITH_ALIGNED_ALLOC)
 
 option(ZFP_WITH_CACHE_TWOWAY "Use two-way skew-associative cache" OFF)
+mark_as_advanced(ZFP_WITH_CACHE_TWOWAY)
 
 option(ZFP_WITH_CACHE_FAST_HASH
   "Use a faster but more collision prone hash function" OFF)
+mark_as_advanced(ZFP_WITH_CACHE_FAST_HASH)
 
 option(ZFP_WITH_CACHE_PROFILE "Count cache misses" OFF)
+mark_as_advanced(ZFP_WITH_CACHE_PROFILE)
 
 # Handle compile-time macros
 
@@ -134,19 +167,30 @@ if((DEFINED ZFP_UINT64) AND (DEFINED ZFP_UINT64_SUFFIX))
   list(APPEND zfp_public_defs ZFP_UINT64_SUFFIX=${ZFP_UINT64_SUFFIX})
 endif()
 
-# This odd cmake pattern here let's the OpenMP feature be either auto-detected,
+# This odd cmake pattern here lets the OpenMP feature be either auto-detected,
 # explicitly enabled, or explicitly disabled, instead of just on or off.
 if(DEFINED ZFP_WITH_OPENMP)
   option(ZFP_WITH_OPENMP "Enable OpenMP parallel compression"
     ${ZFP_WITH_OPENMP})
   if(ZFP_WITH_OPENMP)
-    find_package(OpenMP COMPONENTS C REQUIRED)
+    if(BUILD_EXAMPLES)
+      find_package(OpenMP COMPONENTS C CXX REQUIRED)
+    else()
+      find_package(OpenMP COMPONENTS C REQUIRED)
+    endif()
   endif()
 else()
-  find_package(OpenMP COMPONENTS C)
+  if(BUILD_EXAMPLES)
+    find_package(OpenMP COMPONENTS C CXX)
+  else()
+    find_package(OpenMP COMPONENTS C)
+  endif()
   option(ZFP_WITH_OPENMP "Enable OpenMP parallel compression" ${OPENMP_FOUND})
 endif()
 
+# Suppress CMake warning about unused variable in this file
+set(TOUCH_UNUSED_VARIABLE ${ZFP_OMP_TESTS_ONLY})
+
 # Some compilers don't use explicit libraries on the link line for OpenMP but
 # instead need to treat the OpenMP C flags as both compile and link flags
 # i.e. -fopenmp for compiling and -lgomp for linking, use -fomp for both
@@ -163,7 +207,7 @@ if(ZFP_WITH_CUDA)
     message(FATAL_ERROR "ZFP_WITH_CUDA is enabled, but a CUDA installation was not found.")
   endif()
   if(${CUDA_VERSION_MAJOR} LESS 7)
-        message(FATAL_ERROR "zfp requires at least CUDA 7.0.")
+    message(FATAL_ERROR "zfp requires at least CUDA 7.0.")
   endif()
 endif()
 
@@ -171,10 +215,31 @@ if(NOT (ZFP_BIT_STREAM_WORD_SIZE EQUAL 64))
   list(APPEND zfp_private_defs BIT_STREAM_WORD_TYPE=uint${ZFP_BIT_STREAM_WORD_SIZE})
 endif()
 
+if(DEFINED ZFP_CACHE_LINE_SIZE)
+  # Add to zfp_public_defs since many tests currently include files from src.
+#  list(APPEND zfp_public_defs ZFP_CACHE_LINE_SIZE=${ZFP_CACHE_LINE_SIZE})
+  list(APPEND zfp_private_defs ZFP_CACHE_LINE_SIZE=${ZFP_CACHE_LINE_SIZE})
+endif()
+
 if(ZFP_WITH_BIT_STREAM_STRIDED)
   list(APPEND zfp_public_defs BIT_STREAM_STRIDED)
 endif()
 
+if(NOT (ZFP_ROUNDING_MODE EQUAL ZFP_ROUND_NEVER))
+  list(APPEND zfp_private_defs ZFP_ROUNDING_MODE=${ZFP_ROUNDING_MODE})
+endif()
+
+if(ZFP_WITH_TIGHT_ERROR)
+  if((ZFP_ROUNDING_MODE EQUAL 0) OR (ZFP_ROUNDING_MODE STREQUAL ZFP_ROUND_NEVER))
+    message(FATAL_ERROR "ZFP_WITH_TIGHT_ERROR requires ZFP_ROUND_FIRST or ZFP_ROUND_LAST rounding mode")
+  endif()
+  list(APPEND zfp_private_defs ZFP_WITH_TIGHT_ERROR)
+endif()
+
+if(ZFP_WITH_DAZ)
+  list(APPEND zfp_private_defs ZFP_WITH_DAZ)
+endif()
+
 if(ZFP_WITH_ALIGNED_ALLOC)
   list(APPEND zfp_compressed_array_defs ZFP_WITH_ALIGNED_ALLOC)
 endif()
@@ -191,12 +256,14 @@ if(ZFP_WITH_CACHE_PROFILE)
   list(APPEND zfp_compressed_array_defs ZFP_WITH_CACHE_PROFILE)
 endif()
 
+list(APPEND ppm_private_defs PPM_CHROMA=${PPM_CHROMA})
+
 # Link libm only if necessary
 include(CheckCSourceCompiles)
-check_c_source_compiles("#include<math.h>\nfloat f; int main(){sqrt(f);return 0;}" HAVE_MATH)
+check_c_source_compiles("#include<math.h>\nint main(int n,char*v[]){return sqrt(n);}" HAVE_MATH)
 if(NOT HAVE_MATH)
   set(CMAKE_REQUIRED_LIBRARIES m)
-  check_c_source_compiles("#include<math.h>\nfloat f; int main(){sqrt(f);return 0;}" HAVE_LIBM_MATH)
+  check_c_source_compiles("#include<math.h>\nint main(int n,char*v[]){return sqrt(n);}" HAVE_LIBM_MATH)
   unset(CMAKE_REQUIRED_LIBRARIES)
   if(NOT HAVE_LIBM_MATH)
     message(FATAL_ERROR "Unable to use C math library functions (with or without -lm)")
@@ -207,7 +274,7 @@ endif()
 # Add source code
 #------------------------------------------------------------------------------#
 include(CTest)
-if(BUILD_TESTING)
+if(BUILD_TESTING OR BUILD_TESTING_FULL)
   enable_testing()
 endif()
 
@@ -251,18 +318,22 @@ if(BUILD_EXAMPLES)
   add_subdirectory(examples)
 endif()
 
-if(BUILD_TESTING)
+if(BUILD_TESTING OR BUILD_TESTING_FULL)
+  # Disable gtest install to prevent clobbering existing installations 
+  option(INSTALL_GMOCK "Install Googlemock" OFF)
+  option(INSTALL_GTEST "Install Googletest" OFF)
+
   add_subdirectory(tests)
 endif()
 
 #------------------------------------------------------------------------------#
 # Header install
 #------------------------------------------------------------------------------#
-install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
-install(DIRECTORY array/   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
-
 if(BUILD_CFP)
-  install(DIRECTORY cfp/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+  install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+else()
+  install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+          PATTERN "cfp" EXCLUDE)
 endif()
 #------------------------------------------------------------------------------#
 # Build type: one of None, Debug, Release, RelWithDebInfo, MinSizeRel
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..9bc8fa83
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,13 @@
+Contributing
+============
+
+The zfp project uses the
+[Gitflow](https://nvie.com/posts/a-successful-git-branching-model/)
+development model.  Contributions should be made as pull requests on the
+`develop` branch.  Although this branch is under continuous development,
+it should be robust enough to pass all regression tests.  For contributions
+that are not production ready, please [contact us](mailto:zfp.llnl.gov) to
+have a separate branch created.  The `master` branch is updated with each
+release and reflects the most recent official release of zfp.  See the
+[Releases Page](https://github.com/LLNL/zfp/releases) for a history
+of releases.
diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index cbb9abcc..6e1eb54f 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -11,3 +11,6 @@ set(CTEST_DROP_METHOD "https")
 set(CTEST_DROP_SITE "open.cdash.org")
 set(CTEST_DROP_LOCATION "/submit.php?project=zfp")
 set(CTEST_DROP_SITE_CDASH TRUE)
+
+# Test Options
+set(MEMORYCHECK_COMMAND_OPTIONS "--show-reachable=no")
diff --git a/Config b/Config
index 834bf252..cf0df65d 100644
--- a/Config
+++ b/Config
@@ -6,38 +6,54 @@ FC = gfortran
 
 # language standard -----------------------------------------------------------
 
-# CSTD = -std=c89 -Wno-unused-function
+# CSTD = -std=c89
   CSTD = -std=c99
   CXXSTD = -std=c++98
 # CXXSTD = -std=c++11
-  FSTD = -std=f2003 -ffree-form -Wno-c-binding-type
+  FSTD = -std=f2018 -ffree-form -Wno-c-binding-type
 
 # common compiler options -----------------------------------------------------
 
-FLAGS = -O3 -fPIC -Wall -Wextra -pedantic -I../include
+OPTFLAGS = -O3
+FLAGS = $(OPTFLAGS) -fPIC -pedantic -Wall -Wextra
+LDFLAGS =
 SOFLAGS =
 
-# macOS compiler options (uncomment on macOS) ---------------------------------
-
-# SOFLAGS += -undefined dynamic_lookup
-
 # OpenMP compiler options -----------------------------------------------------
 
-# do not uncomment; use "make ZFP_WITH_OPENMP=0" to disable OpenMP
+# do not comment out; use "make ZFP_WITH_OPENMP=0" to disable OpenMP
 OMPFLAGS = -fopenmp
 
-# optional compiler macros ----------------------------------------------------
+# Apple clang OpenMP options
+# OMPFLAGS = -Xclang -fopenmp
 
-# use long long for 64-bit types
-# DEFS += -DZFP_INT64='long long' -DZFP_INT64_SUFFIX='ll'
-# DEFS += -DZFP_UINT64='unsigned long long' -DZFP_UINT64_SUFFIX='ull'
+# optional compiler macros ----------------------------------------------------
 
-# use smaller bit stream word type for finer rate granularity
+# use smaller bit stream word type for finer rate granularity;
+# can bet set on command line, e.g., "make BIT_STREAM_WORD_TYPE=uint8"
 # DEFS += -DBIT_STREAM_WORD_TYPE=uint8
 # DEFS += -DBIT_STREAM_WORD_TYPE=uint16
 # DEFS += -DBIT_STREAM_WORD_TYPE=uint32
 # DEFS += -DBIT_STREAM_WORD_TYPE=uint64
 
+# reduce bias and slack in errors; can be set on command line, e.g.,
+# "make ZFP_ROUNDING_MODE=ZFP_ROUND_FIRST"
+# DEFS += -DZFP_ROUNDING_MODE=ZFP_ROUND_NEVER
+# DEFS += -DZFP_ROUNDING_MODE=ZFP_ROUND_FIRST
+# DEFS += -DZFP_ROUNDING_MODE=ZFP_ROUND_LAST
+# DEFS += -DZFP_WITH_TIGHT_ERROR
+
+# treat subnormals as zero to avoid overflow; can be set on command line, e.g.,
+# "make ZFP_WITH_DAZ=1"
+# DEFS += -DZFP_WITH_DAZ
+
+# use long long for 64-bit types
+# DEFS += -DZFP_INT64='long long' -DZFP_INT64_SUFFIX='ll'
+# DEFS += -DZFP_UINT64='unsigned long long' -DZFP_UINT64_SUFFIX='ull'
+
+# cache alignment
+# DEFS += -DZFP_CACHE_LINE_SIZE=256
+
 # enable strided access for progressive zfp streams
 # DEFS += -DBIT_STREAM_STRIDED
 
@@ -85,7 +101,24 @@ else
   LIBCFP = libcfp.a
 endif
 
-# conditionals ----------------------------------------------------------------
+# operating system and compiler dependent flags -------------------------------
+
+# macOS configuration; compile with "make OS=mac"
+ifeq ($(OS),mac)
+  SOFLAGS += -undefined dynamic_lookup
+endif
+
+# suppress unused function warnings when compiling C89
+ifeq ($(CSTD),-std=c89)
+  FLAGS += -Wno-unused-function
+endif
+
+# process macros set on the command line --------------------------------------
+
+# bit stream word type
+ifdef BIT_STREAM_WORD_TYPE
+  DEFS += -DBIT_STREAM_WORD_TYPE=$(BIT_STREAM_WORD_TYPE)
+endif
 
 # enable OpenMP?
 ifdef ZFP_WITH_OPENMP
@@ -96,6 +129,31 @@ ifdef ZFP_WITH_OPENMP
   endif
 endif
 
+# treat subnormals as zero to avoid overflow
+ifdef ZFP_WITH_DAZ
+  ifneq ($(ZFP_WITH_DAZ),0)
+    FLAGS += -DZFP_WITH_DAZ
+  endif
+endif
+
+# rounding mode and slack in error
+ifdef ZFP_ROUNDING_MODE
+  FLAGS += -DZFP_ROUNDING_MODE=$(ZFP_ROUNDING_MODE)
+  ifneq ($(ZFP_ROUNDING_MODE),0)
+    # tight error bound requires round-first or round-last mode
+    ifdef ZFP_WITH_TIGHT_ERROR
+      ifneq ($(ZFP_WITH_TIGHT_ERROR),0)
+        FLAGS += -DZFP_WITH_TIGHT_ERROR
+      endif
+    endif
+  endif
+endif
+
+# chroma mode for ppm example
+ifdef PPM_CHROMA
+  PPM_FLAGS += -DPPM_CHROMA=$(PPM_CHROMA)
+endif
+
 # compiler options ------------------------------------------------------------
 
 CFLAGS = $(CSTD) $(FLAGS) $(DEFS)
diff --git a/LICENSE b/LICENSE
index 093449a3..3c726d29 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,57 +1,29 @@
-Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-Produced at the Lawrence Livermore National Laboratory.
-Written by Peter Lindstrom, Markus Salasoo, Matt Larsen, and Stephen Herbein.
-LLNL-CODE-663824.
-All rights reserved.
+BSD 3-Clause License
 
-This file is part of the zfp library.
-For details, see http://computation.llnl.gov/casc/zfp/.
+Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC
+All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the disclaimer below.
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
 
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the disclaimer (as noted below) in the
-documentation and/or other materials provided with the distribution.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
 
-3. Neither the name of the LLNS/LLNL nor the names of its contributors may
-be used to endorse or promote products derived from this software without
-specific prior written permission.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED.  IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY,
-LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-Additional BSD Notice
-
-1. This notice is required to be provided under our contract with the U.S.
-Department of Energy (DOE).  This work was produced at Lawrence Livermore
-National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
-
-2. Neither the United States Government nor Lawrence Livermore National
-Security, LLC nor any of their employees, makes any warranty, express or
-implied, or assumes any liability or responsibility for the accuracy,
-completeness, or usefulness of any information, apparatus, product, or
-process disclosed, or represents that its use would not infringe
-privately-owned rights.
-
-3. Also, reference herein to any specific commercial products, process, or
-services by trade name, trademark, manufacturer or otherwise does not
-necessarily constitute or imply its endorsement, recommendation, or
-favoring by the United States Government or Lawrence Livermore National
-Security, LLC.  The views and opinions of authors expressed herein do not
-necessarily state or reflect those of the United States Government or
-Lawrence Livermore National Security, LLC, and shall not be used for
-advertising or product endorsement purposes.
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
index bddc72ae..aacf7789 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ all:
 	@echo $(LIBRARY)
 	@cd src; $(MAKE) clean $(LIBRARY)
 ifneq ($(BUILD_CFP),0)
-	@cd cfp/src; $(MAKE) clean $(LIBRARY)
+	@cd cfp; $(MAKE) clean $(LIBRARY)
 endif
 ifneq ($(BUILD_ZFORP),0)
 	@cd fortran; $(MAKE) clean $(LIBRARY)
@@ -33,7 +33,7 @@ test:
 # clean all
 clean:
 	@cd src; $(MAKE) clean
-	@cd cfp/src; $(MAKE) clean
+	@cd cfp; $(MAKE) clean
 	@cd fortran; $(MAKE) clean
 	@cd utils; $(MAKE) clean
 	@cd tests; $(MAKE) clean
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 00000000..3737d5a8
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,21 @@
+This work was produced under the auspices of the U.S. Department of
+Energy by Lawrence Livermore National Laboratory under Contract
+DE-AC52-07NA27344.
+
+This work was prepared as an account of work sponsored by an agency of
+the United States Government. Neither the United States Government nor
+Lawrence Livermore National Security, LLC, nor any of their employees
+makes any warranty, expressed or implied, or assumes any legal liability
+or responsibility for the accuracy, completeness, or usefulness of any
+information, apparatus, product, or process disclosed, or represents that
+its use would not infringe privately owned rights.
+
+Reference herein to any specific commercial product, process, or service
+by trade name, trademark, manufacturer, or otherwise does not necessarily
+constitute or imply its endorsement, recommendation, or favoring by the
+United States Government or Lawrence Livermore National Security, LLC.
+
+The views and opinions of authors expressed herein do not necessarily
+state or reflect those of the United States Government or Lawrence
+Livermore National Security, LLC, and shall not be used for advertising
+or product endorsement purposes.
diff --git a/README.md b/README.md
index 2fd5fa32..5ee0ef63 100644
--- a/README.md
+++ b/README.md
@@ -1,142 +1,117 @@
 ZFP
 ===
-[![Travis CI Build Status](https://travis-ci.org/LLNL/zfp.svg?branch=develop)](https://travis-ci.org/LLNL/zfp)
-[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/LLNL/zfp?branch=develop&svg=true)](https://ci.appveyor.com/project/salasoom/zfp)
-[![Documentation Status](https://readthedocs.org/projects/zfp/badge/?version=release0.5.5)](https://zfp.readthedocs.io/en/release0.5.5/?badge=release0.5.5)
-[![Codecov](https://codecov.io/gh/LLNL/zfp/branch/develop/graph/badge.svg)](https://codecov.io/gh/LLNL/zfp)
-
-INTRODUCTION
-------------
-
-zfp is an open source C/C++ library for compressed numerical arrays that
-support high throughput read and write random access.  zfp also supports
-streaming compression of integer and floating-point data, e.g., for
-applications that read and write large data sets to and from disk.
-zfp is primarily written in C and C++ but also includes Python and
-Fortran bindings.
-
-zfp was developed at Lawrence Livermore National Laboratory and is loosely
-based on the algorithm described in the following paper:
-
-    Peter Lindstrom
-    "Fixed-Rate Compressed Floating-Point Arrays"
-    IEEE Transactions on Visualization and Computer Graphics
-    20(12):2674-2683, December 2014
-    doi:10.1109/TVCG.2014.2346458
-
-zfp was originally designed for floating-point arrays only, but has been
-extended to also support integer data and could for instance be used to
-compress images and quantized volumetric data.  To achieve high compression
-ratios, zfp generally uses lossy but optionally error-bounded compression.
-Bit-for-bit lossless compression is also possible through one of zfp's
-compression modes.
-
-zfp works best for 2D and 3D arrays that exhibit spatial correlation, such as
-continuous fields from physics simulations, images, regularly sampled terrain
-surfaces, etc.  Although zfp also provides a 1D array class that can be used
-for 1D signals such as audio, or even unstructured floating-point streams,
-the compression scheme has not been well optimized for this use case, and
-rate and quality may not be competitive with floating-point compressors
-designed specifically for 1D streams.  zfp also supports compression of
-4D arrays.
-
-zfp is freely available as open source under a BSD license, as outlined in
-the file 'LICENSE'.  For more information on zfp and comparisons with other
-compressors, please see the
-[zfp website](https://computation.llnl.gov/projects/floating-point-compression).
-For bug reports, please consult the
-[GitHub issue tracker](https://github.com/LLNL/zfp/issues).
-For questions, comments, and requests, please contact
-[Peter Lindstrom](mailto:pl@llnl.gov).
-
-
-DOCUMENTATION
--------------
+[![Github Actions Build Status](https://github.com/LLNL/zfp/workflows/Run%20Tests/badge.svg)](https://github.com/LLNL/zfp/actions/workflows/tests.yml)
+[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/qb3ld7j11segy52k/branch/develop?svg=true)](https://ci.appveyor.com/project/lindstro/zfp)
+[![Documentation Status](https://readthedocs.org/projects/zfp/badge/?version=release1.0.1)](https://zfp.readthedocs.io/en/release1.0.1/)
+[![codecov](https://codecov.io/gh/LLNL/zfp/branch/develop/graph/badge.svg?token=jqvMVvgRQ9)](https://codecov.io/gh/LLNL/zfp)
+[![R&D100 - Winner](https://img.shields.io/badge/R%26D100-Winner-gold)](https://www.rdworldonline.com/rd-100-winners-for-2023-are-announced-2/)
 
-Full
-[documentation](http://zfp.readthedocs.io/en/release0.5.5/)
-is available online via Read the Docs.  A
-[PDF](http://readthedocs.org/projects/zfp/downloads/pdf/release0.5.5/)
-version is also available.
+zfp is a compressed format for representing multidimensional floating-point
+and integer arrays.  zfp provides compressed-array classes that support high
+throughput read and write random access to individual array elements.  zfp
+also supports serial and parallel (OpenMP and CUDA) compression of whole
+arrays, e.g., for applications that read and write large data sets to and
+from disk.
 
+zfp uses lossy but optionally error-bounded compression to achieve high
+compression ratios.  Bit-for-bit lossless compression is also possible
+through one of zfp's compression modes.  zfp works best for 2D, 3D, and 4D
+arrays that exhibit spatial correlation, such as continuous fields from
+physics simulations, natural images, regularly sampled terrain surfaces, etc.
+zfp compression of 1D arrays is possible but generally discouraged.
 
-INSTALLATION
-------------
+zfp is freely available as open source and is distributed under a BSD license.
+zfp is primarily written in C and C++ but also includes Python and Fortran
+bindings.  zfp conforms to various language standards, including C89, C99,
+C11, C++98, C++11, and C++14, and is supported on Linux, macOS, and Windows.
 
-zfp consists of three distinct parts: a compression library written in C;
-a set of C++ header files with C wrappers that implement compressed arrays;
-and a set of C and C++ examples.  The main compression codec is written in
-C and should conform to both the ISO C89 and C99 standards.  The C++ array
-classes are implemented entirely in header files and can be included as is,
-but since they call the compression library, applications must link with
-libzfp.
 
-On Linux, macOS, and MinGW, zfp is easiest compiled using gcc and gmake.
-CMake support is also available, e.g., for Windows builds.  See below for
-instructions on GNU and CMake builds.
+Quick Start
+-----------
 
-zfp has successfully been built and tested using these compilers:
+To download zfp, type:
 
-    gcc versions 4.4.7, 4.9.4, 5.5.0, 6.1.0, 6.4.0, 7.1.0, 7.3.0, 8.1.0
-    icc versions 15.0.6, 16.0.4, 17.0.2, 18.0.2, 19.0.0
-    clang versions 3.9.1, 4.0.0, 5.0.0, 6.0.0 
-    MinGW version 5.3.0
-    Visual Studio versions 14 (2015), 15 (2017)
+    git clone https://github.com/LLNL/zfp.git
 
-zfp conforms to various language standards, including C89, C99, C11,
-C++98, C++11, and C++14.
+zfp may be built using either [CMake](https://cmake.org/) or
+[GNU make](https://www.gnu.org/software/make/).  To use CMake, type:
 
-NOTE: zfp requires 64-bit compiler and operating system support.
+    cd zfp
+    mkdir build
+    cd build
+    cmake ..
+    cmake --build . --config Release
+    ctest
 
-## GNU builds 
+This builds the zfp library in the `build/lib` directory and the zfp
+command-line executable in the `build/bin` directory.  It then runs
+the regression tests. The full test suite may be run by enabling the 
+`BUILD_TESTING_FULL` CMake option during the build step.
 
-To build zfp using gcc, type
+zfp may also be built using GNU make:
 
+    cd zfp
     make
+    make test
 
-from this directory.  This builds libzfp as a static library as well as
-utilities and example programs.  See documentation for complete build
-instructions.
+Note: GNU builds are less flexible and do not support all available features,
+e.g., CUDA support.
 
-## CMake builds
+For further configuration and build instructions, please consult the
+[documentation](https://zfp.readthedocs.io/en/release1.0.1/installation.html).
+For examples of how to call the C library and use the C++ array classes,
+see the [examples](https://zfp.readthedocs.io/en/release1.0.1/examples.html)
+section.
 
-To build zfp using CMake on Linux or macOS, start a Unix shell and type
 
-    mkdir build
-    cd build
-    cmake ..
-    make
+Documentation
+-------------
 
-To also build the examples, replace the cmake line with
+Full HTML [documentation](http://zfp.readthedocs.io/en/release1.0.1) is
+available online.
+A [PDF](http://readthedocs.org/projects/zfp/downloads/pdf/release1.0.1/)
+version is also available.
 
-    cmake -DBUILD_EXAMPLES=ON ..
+Further information on the zfp software is included in these files:
 
-To build zfp using Visual Studio on Windows, start a DOS shell, cd to the
-top-level zfp directory, and type
+- Change log: see [CHANGELOG.md](./CHANGELOG.md).
+- Support and additional resources: see [SUPPORT.md](./SUPPORT.md).
+- Code contributions: see [CONTRIBUTING.md](./CONTRIBUTING.md).
 
-    mkdir build
-    cd build
-    cmake ..
-    cmake --build . --config Release
 
-This builds zfp in release mode.  Replace 'Release' with 'Debug' to build
-zfp in debug mode.  See the instructions for Linux on how to change the
-cmake line to also build the example programs.
+Authors
+-------
 
-## Testing
+zfp was originally developed by [Peter Lindstrom](https://people.llnl.gov/pl)
+at [Lawrence Livermore National Laboratory](https://www.llnl.gov/).  Please
+see the [Contributors Page](https://github.com/LLNL/zfp/graphs/contributors)
+for a full list of contributors.
 
-To test that zfp is working properly, type
+### Citing zfp
 
-    make test
+If you use zfp for scholarly research, please cite this paper:
 
-or using CMake
+* Peter Lindstrom.
+  [Fixed-Rate Compressed Floating-Point Arrays](https://www.researchgate.net/publication/264417607_Fixed-Rate_Compressed_Floating-Point_Arrays).
+  IEEE Transactions on Visualization and Computer Graphics, 20(12):2674-2683, December 2014.
+  [doi:10.1109/TVCG.2014.2346458](http://doi.org/10.1109/TVCG.2014.2346458).
 
-    ctest
+The algorithm implemented in the current version of zfp is described in the
+[documentation](https://zfp.readthedocs.io/en/latest/algorithm.html) and in
+the following paper:
+
+* James Diffenderfer, Alyson Fox, Jeffrey Hittinger, Geoffrey Sanders, Peter Lindstrom.
+  [Error Analysis of ZFP Compression for Floating-Point Data](https://www.researchgate.net/publication/324908266_Error_Analysis_of_ZFP_Compression_for_Floating-Point_Data).
+  SIAM Journal on Scientific Computing, 41(3):A1867-A1898, June 2019.
+  [doi:10.1137/18M1168832](http://doi.org/10.1137/18M1168832).
+
+
+License
+-------
+
+zfp is distributed under the terms of the BSD 3-Clause license.  See
+[LICENSE](./LICENSE) and [NOTICE](./NOTICE) for details.
+
+SPDX-License-Identifier: BSD-3-Clause
 
-If the compilation or regression tests fail, it is possible that some of the
-macros in the file 'Config' have to be adjusted.  Also, the tests may fail
-due to minute differences in the computed floating-point fields being
-compressed, which will be indicated by checksum errors.  If most tests
-succeed and the failures result in byte sizes and error values reasonably
-close to the expected values, then it is likely that the compressor is
-working correctly.
+LLNL-CODE-663824
diff --git a/SUPPORT.md b/SUPPORT.md
new file mode 100644
index 00000000..83a97931
--- /dev/null
+++ b/SUPPORT.md
@@ -0,0 +1,11 @@
+Support
+=======
+
+For more information on zfp, please see the
+[zfp website](https://zfp.llnl.gov).
+For bug reports and feature requests, please consult the
+[GitHub issue tracker](https://github.com/LLNL/zfp/issues/).
+For questions and comments not answered here or in the
+[documentation](http://zfp.readthedocs.io),
+please contact us by email at
+[zfp@llnl.gov](mailto:zfp@llnl.gov).
diff --git a/VERSIONS.md b/VERSIONS.md
deleted file mode 100644
index 2b7e0e72..00000000
--- a/VERSIONS.md
+++ /dev/null
@@ -1,298 +0,0 @@
-# zfp Release Notes
-
-## 0.5.5 (May 5, 2019)
-
-- Added support for reversible (lossless) compression of floating-point and
-  integer data.
-
-- Added methods for serializing and deserializing zfp's compressed arrays.
-
-- Added Python bindings for compressing NumPy arrays.
-
-- Added Fortran bindings to zfp's high-level C API.
-
-- Change:
-  - The default compressed-array cache size is now a function of the total
-    number of array elements, irrespective of array shape.
-
-- Bug fixes:
-  - Incorrect handling of execution policy in zfp utility.
-  - Incorrect handling of decompression via header in zfp utility.
-  - Incorrect cleanup of device memory in CUDA decompress.
-  - Tests for failing mallocs.
-  - CMake installation of CFP when built.
-  - zfp\_write\_header and zfp\_field\_metadata now fail if array dimensions
-    are too large to fit in header.
-
-
-## 0.5.4 (October 1, 2018)
-
-- Added support for CUDA fixed-rate compression and decompression.
-
-- Added views into compressed arrays for thread safety, nested array
-  indexing, slicing, and array subsetting.
-
-- Added C language bindings for compressed arrays.
-
-- Added support for compressing and decompressing 4D data.
-
-- Changes:
-  - Execution policy now applies to both compression and decompression.
-  - Compressed array accessors now return Scalar type instead of
-    const Scalar& to avoid stale references to evicted cache lines.
-
-- Bug fixes:
-  - Handling of negative strides.
-  - Command line tool handling of arrays with more than 2^32 elements.
-  - bitstream C++ compatibility.  
-  - Respect minimum cache size request.
-
-
-## 0.5.3 (March 28, 2018)
-
-- Added support for OpenMP multithreaded compression (but not decompression).
-
-- Added options for OpenMP execution to zfp command-line tool.
-
-- Changed return value of zfp\_decompress to indicate the number of compressed
-  bytes processed so far (now returns same value as zfp\_compress on success).
-
-- Added compressed array support for copy construction and assignment via
-  deep copies.
-
-- Added virtual destructors to enable inheritance from zfp arrays.
-
-
-## 0.5.2 (September 28, 2017)
-
-- Added iterators and proxy objects for pointers and references.
-
-- Added example illustrating how to use iterators and pointers.
-
-- Modified diffusion example to optionally use iterators.
-
-- Moved internal headers under array to array/zfp.
-
-- Modified 64-bit integer typedefs to avoid the C89 non-compliant long long
-  and allow for user-supplied types and literal suffixes.
-
-- Renamed compile-time macros that did not have a ZFP prefix.
-
-- Fixed issue with setting stream word type via CMake.
-
-- Rewrote documentation in reStructuredText and added complete
-  documentation of all public functions, classes, types, and macros.
-  Removed ASCII documentation.
-
-
-## 0.5.1 (March 28, 2017)
-
-- This release primarily fixes a few minor issues but also includes
-  changes in anticipation of a large number of planned future additions
-  to the library.  No changes have been made to the compressed format,
-  which is backwards compatible with version 0.5.0.
-
-- Added high-level API support for integer types.
-
-- Separated library version from CODEC version and added version string.
-
-- Added example that illustrates in-place compression.
-
-- Added support for CMake builds.
-
-- Corrected inconsistent naming of BIT\_STREAM macros in code and
-  documentation.
-
-- Renamed some of the header bit mask macros.
-
-- Added return values to stream\_skip and stream\_flush to indicate the
-  number of bits skipped or output.
-
-- Renamed stream\_block and stream\_delta to make it clear that they refer
-  to strided streams.  Added missing definition of stream\_stride\_block.
-
-- Changed int/uint types in places to use ptrdiff\_t/size\_t where
-  appropriate.
-
-- Changed API for zfp\_set\_precision and zfp\_set\_accuracy to not require
-  the scalar type.
-
-- Added missing static keyword in decode\_block.
-
-- Changed testzfp to allow specifying which tests to perform on the
-  command line.
-
-- Fixed bug that prevented defining uninitialized arrays.
-
-- Fixed incorrect computation of array sizes in zfp\_field\_size.
-
-- Fixed minor issues that prevented code from compiling on Windows.
-
-- Fixed issue with fixed-accuracy headers that caused unnecessary storage.
-
-- Modified directory structure.
-
-- Added documentation that discusses common issues with using zfp.
-
-
-## 0.5.0 (February 29, 2016)
-
-- Modified CODEC to more efficiently encode blocks whose values are all
-  zero or are smaller in magnitude than the absolute error tolerance.
-  This allows representing "empty" blocks using only one bit each.  This
-  version is not backwards compatible with prior zfp versions.
-
-- Changed behavior of zfp\_compress and zfp\_decompress to not automatically
-  rewind the bit stream.  This makes it easier to concatenate multiple
-  compressed bit streams, e.g., when compressing vector fields or multiple
-  scalars together.
-
-- Added functions for compactly encoding the compression parameters
-  and field meta data, e.g., for producing self-contained compressed
-  streams.  Also added functions for reading and writing a header
-  containing these parameters.
-
-- Changed the zfp example program interface to allow reading and writing
-  compressed streams, optionally with a header.  The zfp tool can now be
-  used to compress and decompress files as a stand alone utility.
-
-
-## 0.4.1 (December 28, 2015)
-
-- Fixed bug that caused segmentation fault when compressing 3D arrays
-  whose dimensions are not multiples of four.  Specifically, arrays of
-  dimensions nx * ny * nz, with ny not a multiple of four, were not
-  handled correctly.
-
-- Modified examples/fields.h to ensure standard compliance.  Previously,
-  C99 support was needed to handle the hex float constants, which are
-  not supported in C++98.
-
-- Added simple.c as a minimal example of how to call the compressor.
-
-- Changed compilation of diffusion example to output two executables:
-  one with and one without compression.
-
-
-## 0.4.0 (December 5, 2015)
-
-- Substantial changes to the compression algorithm that improve PSNR
-  by about 6 dB and speed by a factor of 2-3.  These changes are not
-  backward compatible with previous versions of zfp.
-
-- Added support for 31-bit and 63-bit integer data, as well as shorter
-  integer types.
-
-- Rewrote compression codec entirely in C to make linking and calling
-  easier from other programming languages, and to expose the low-level
-  interface through C instead of C++.  This necessitated significant
-  changes to the API as well.
-
-- Minor changes to the C++ compressed array API, as well as major
-  implementation changes to support the C library.  The namespace and
-  public types are now all in lower case.
-
-- Deprecated support for general fixed-point decorrelating transforms
-  and slimmed down implementation.
-
-- Added new examples for evaluating the throughput of the (de)compressor
-  and for compressing grayscale images in the pgm format.
-
-- Added FAQ.
-
-
-## 0.3.2 (December 3, 2015)
-
-- Fixed bug in Array::get() that caused the wrong cached block to be
-  looked up, thus occasionally copying incorrect values back to parts
-  of the array.
-
-
-## 0.3.1 (May 6, 2015)
-
-- Fixed rare bug caused by exponent underflow in blocks with no normal
-  and some denormal numbers.
-
-
-## 0.3.0 (March 3, 2015)
-
-- Modified the default decorrelating transform to one that uses only
-  additions and bit shifts.  This new transform, in addition to being
-  faster, also has some theoretical optimality properties and tends to
-  improve rate distortion.
-
-- Added compile-time support for parameterized transforms, e.g., to
-  support other popular transforms like DCT, HCT, and Walsh-Hadamard.
-
-- Made forward transform range preserving: (-1, 1) is mapped to (-1, 1).
-  Consequently Q1.62 fixed point can be used throughout.
-
-- Changed the order in which bits are emitted within each bit plane
-  to be more intelligent.  Group tests are now deferred until they
-  are needed, i.e., just before the value bits for the group being
-  tested.  This improves the quality of fixed-rate encodings, but
-  has no impact on compressed size.
-
-- Made several optimizations to improve performance.
-
-- Added floating-point traits to reduce the number of template
-  parameters.  It is now possible to declare a 3D array as
-  Array3<float>, for example.
-
-- Added functions for setting the array scalar type and dimensions.
-
-- Consolidated several header files.
-
-- Added testzfp for regression testing.
-
-
-## 0.2.1 (December 12, 2014)
-
-- Added Win64 support via Microsoft Visual Studio compiler.
-
-- Fixed broken support for IBM's xlc compiler.
-
-- Made several minor changes to suppress compiler warnings.
-
-- Documented expected output for the diffusion example.
-
-
-## 0.2.0 (December 2, 2014)
-
-- The compression interface from zfpcompress was relocated to a
-  separate library, called libzfp, and modified to be callable from C.
-  This API now uses a parameter object (zfp\_params) to specify array
-  type and dimensions as well as compression parameters.
-
-- Several utility functions were added to simplify libzfp usage:
-
-  * Functions for setting the rate, precision, and accuracy.
-    Corresponding functions were also added to the Codec class.
-
-  * A function for estimating the buffer size needed for compression.
-
-- The Array class functionality was expanded:
-
-  * Support for accessing the compressed bit stream stored with an
-    array, e.g., for offline compressed storage and for initializing
-    an already compressed array.
-
-  * Functions for dynamically specifying the cache size.
-
-  * The default cache is now direct-mapped instead of two-way
-    associative.
-
-- Minor bug fixes:
-
-  * Corrected the value of the lowest possible bit plane to account for
-    both the smallest exponent and the number of bits in the significand.
-
-  * Corrected inconsistent use of rate and precision.  The rate refers
-    to the number of compressed bits per floating-point value, while
-    the precision refers to the number of uncompressed bits.  The Array
-    API was changed accordingly.
-
-
-## 0.1.0 (November 12, 2014)
-
-- Initial beta release.
diff --git a/appveyor.sh b/appveyor.sh
index 31d7194a..94ec4e33 100644
--- a/appveyor.sh
+++ b/appveyor.sh
@@ -18,7 +18,23 @@ BUILD_FLAGS="$BUILD_FLAGS -DBUILD_UTILITIES=ON"
 BUILD_FLAGS="$BUILD_FLAGS -DBUILD_EXAMPLES=ON"
 BUILD_FLAGS="$BUILD_FLAGS -DBUILD_CFP=ON"
 BUILD_FLAGS="$BUILD_FLAGS -DCFP_NAMESPACE=cfp2"
-BUILD_FLAGS="$BUILD_FLAGS -DZFP_WITH_ALIGNED_ALLOC=ON"
+
+# zfpy only built for MSVC, Release builds
+if [ $COMPILER == "msvc" ] && [ $BUILD_TYPE == "Release" ]; then
+  # verify active python version matches what was specified in appveyor.yml
+
+  # fetch python version X.Y (single digits only)
+  ACTIVE_PY_VERSION=$(python -c 'import platform; print(platform.python_version())' | cut -c1-3)
+  # $PYTHON_VERSION comes from appveyor.yml and has form XY (no dot separating major and minor versions)
+  ACTIVE_PY_VERSION=${ACTIVE_PY_VERSION:0:1}${ACTIVE_PY_VERSION:2:1}
+
+  if [ $ACTIVE_PY_VERSION != $PYTHON_VERSION ]; then
+    exit 1
+  fi
+
+  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_ZFPY=ON"
+fi
+
 BUILD_FLAGS="$BUILD_FLAGS -DBUILD_OPENMP=OFF"
 BUILD_FLAGS="$BUILD_FLAGS -DBUILD_CUDA=OFF"
 
diff --git a/appveyor.yml b/appveyor.yml
index deea4b3c..8a1e4688 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,74 +1,70 @@
-version: 0.5.5-{build}
+version: 1.0.1-{build}
 
 environment:
+  # zfpy only build for Release builds (otherwise need debug python libs python27_d.lib)
   matrix:
-    - COMPILER: mingw
-      GENERATOR: MinGW Makefiles
-      PLATFORM: Win32
-      BUILD_TYPE: Debug
-
-    - COMPILER: mingw
-      GENERATOR: MinGW Makefiles
-      PLATFORM: Win32
-      BUILD_TYPE: Release
-
-    - COMPILER: mingw-w64
-      GENERATOR: MinGW Makefiles
-      PLATFORM: x64
-      BUILD_TYPE: Debug
-
-    - COMPILER: mingw-w64
-      GENERATOR: MinGW Makefiles
-      PLATFORM: x64
-      BUILD_TYPE: Release
-
-    - COMPILER: msvc
-      GENERATOR: Visual Studio 15 2017 Win64
-      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
-      PLATFORM: x64
-      BUILD_TYPE: Debug
-
     - COMPILER: msvc
       GENERATOR: Visual Studio 15 2017 Win64
       APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
       PLATFORM: x64
       BUILD_TYPE: Release
-
-    - COMPILER: msvc
-      GENERATOR: Visual Studio 15 2017
-      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
-      PLATFORM: Win32
-      BUILD_TYPE: Debug
+      PYTHON_VERSION: 38
 
     - COMPILER: msvc
       GENERATOR: Visual Studio 15 2017
       APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
       PLATFORM: Win32
       BUILD_TYPE: Release
-
-    - COMPILER: msvc
-      GENERATOR: Visual Studio 14 2015 Win64
-      PLATFORM: x64
-      BUILD_TYPE: Debug
+      PYTHON_VERSION: 38
 
     - COMPILER: msvc
       GENERATOR: Visual Studio 14 2015 Win64
       PLATFORM: x64
       BUILD_TYPE: Release
+      PYTHON_VERSION: 38
 
     - COMPILER: msvc
       GENERATOR: Visual Studio 14 2015
       PLATFORM: Win32
-      BUILD_TYPE: Debug
+      BUILD_TYPE: Release
+      PYTHON_VERSION: 38
 
-    - COMPILER: msvc
-      GENERATOR: Visual Studio 14 2015
+    - COMPILER: mingw
+      GENERATOR: MinGW Makefiles
       PLATFORM: Win32
       BUILD_TYPE: Release
 
+    - COMPILER: mingw-w64
+      GENERATOR: MinGW Makefiles
+      PLATFORM: x64
+      BUILD_TYPE: Release
+
 install:
   - if "%COMPILER%"=="mingw" set PATH=C:\MinGW\bin;%PATH%
   - if "%COMPILER%"=="mingw-w64" set PATH=C:\MinGW\bin;%PATH%
 
+  # set env vars for Python system dir (assumed to always be MSVC)
+  - ps: |
+      if ($env:PYTHON_VERSION) {
+        $env:PYTHON_DIR = "C:\Python$env:PYTHON_VERSION"
+        if ($env:PLATFORM -eq "x64") {
+          $env:PYTHON_DIR = "$env:PYTHON_DIR-x64"
+        }
+
+        $env:PYTHON_LIB_PATH = "$env:PYTHON_DIR\libs\python$env:PYTHON_VERSION.lib"
+      }
+
+  # placing these behind a conditional for some reason prevents CMake from picking up the virtualenv
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" set PATH=%PYTHON_DIR%;%PYTHON_DIR%\Scripts;%PATH%
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" pip install virtualenv
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" set VIRTUALENV_NAME=pyVirtualEnv
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" virtualenv %VIRTUALENV_NAME%
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" "%VIRTUALENV_NAME%\\Scripts\\activate.bat"
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" pip install -r python\requirements.txt
+  - if "%COMPILER%"=="msvc" if "%BUILD_TYPE%"=="Release" python --version
+
+
 build_script:
   - sh appveyor.sh
+  # uncomment to enable interactive remote desktop mode
+  #- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
diff --git a/array/zfp/header.h b/array/zfp/header.h
deleted file mode 100644
index ad6433cf..00000000
--- a/array/zfp/header.h
+++ /dev/null
@@ -1,19 +0,0 @@
-class header {
-public:
-  class exception : public std::runtime_error {
-  public:
-    exception(const std::string& msg) : runtime_error(msg) {}
-
-    virtual ~exception() throw (){}
-  };
-
-  static void concat_sentence(std::string& s, const std::string& msg)
-  {
-    if (!s.empty())
-      s += " ";
-    s += msg;
-  }
-
-  uchar buffer[BITS_TO_BYTES(ZFP_HEADER_SIZE_BITS)];
-};
-
diff --git a/array/zfp/headerHelpers.h b/array/zfp/headerHelpers.h
deleted file mode 100644
index ed33816a..00000000
--- a/array/zfp/headerHelpers.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// "Handle" classes useful when throwing exceptions
-
-// buffer holds aligned memory for header, suitable for bitstream r/w (word-aligned)
-class AlignedBufferHandle {
-  public:
-    size_t buffer_size_bytes;
-    // uint64 alignment guarantees bitstream alignment
-    uint64* buffer;
-
-    // can copy a header into aligned buffer
-    AlignedBufferHandle(const zfp::array::header* h = 0) {
-      size_t num_64bit_entries = DIV_ROUND_UP(ZFP_HEADER_SIZE_BITS, CHAR_BIT * sizeof(uint64));
-      buffer = new uint64[num_64bit_entries];
-      buffer_size_bytes = num_64bit_entries * sizeof(uint64);
-
-      if (h)
-        memcpy(buffer, h->buffer, BITS_TO_BYTES(ZFP_HEADER_SIZE_BITS));
-    }
-
-    ~AlignedBufferHandle() {
-      delete[] buffer;
-    }
-
-    void copy_to_header(zfp::array::header* h) {
-      memcpy(h, buffer, BITS_TO_BYTES(ZFP_HEADER_SIZE_BITS));
-    }
-};
-
-// redirect zfp_stream->bitstream to header while object remains in scope
-class DualBitstreamHandle {
-  public:
-    bitstream* old_bs;
-    bitstream* new_bs;
-    zfp_stream* zfp;
-
-    DualBitstreamHandle(zfp_stream* zfp, AlignedBufferHandle& abh) :
-      zfp(zfp)
-    {
-      old_bs = zfp_stream_bit_stream(zfp);
-      new_bs = stream_open(abh.buffer, abh.buffer_size_bytes);
-
-      stream_rewind(new_bs);
-      zfp_stream_set_bit_stream(zfp, new_bs);
-    }
-
-    ~DualBitstreamHandle() {
-      zfp_stream_set_bit_stream(zfp, old_bs);
-      stream_close(new_bs);
-    }
-};
-
-class ZfpFieldHandle {
-  public:
-    zfp_field* field;
-
-    ZfpFieldHandle() {
-      field = zfp_field_alloc();
-    }
-
-    ZfpFieldHandle(zfp_type type, int nx, int ny, int nz) {
-      field = zfp_field_3d(0, type, nx, ny, nz);
-    }
-
-    ~ZfpFieldHandle() {
-      zfp_field_free(field);
-    }
-};
-
-class ZfpStreamHandle {
-  public:
-    bitstream* bs;
-    zfp_stream* stream;
-
-    ZfpStreamHandle(AlignedBufferHandle& abh) {
-      bs = stream_open(abh.buffer, abh.buffer_size_bytes);
-      stream = zfp_stream_open(bs);
-    }
-
-    ~ZfpStreamHandle() {
-      zfp_stream_close(stream);
-      stream_close(bs);
-    }
-};
-
-// verify buffer is large enough, with what header describes
-static bool is_valid_buffer_size(const zfp_stream* stream, uint nx, uint ny, uint nz, size_t expected_buffer_size_bytes)
-{
-  uint mx = ((std::max(nx, 1u)) + 3) / 4;
-  uint my = ((std::max(ny, 1u)) + 3) / 4;
-  uint mz = ((std::max(nz, 1u)) + 3) / 4;
-  size_t blocks = (size_t)mx * (size_t)my * (size_t)mz;
-  // no rounding because fixed-rate wra implies rate is multiple of word size
-  size_t described_buffer_size_bytes = blocks * stream->maxbits / CHAR_BIT;
-
-  return expected_buffer_size_bytes >= described_buffer_size_bytes;
-}
-
-static void read_header_contents(const zfp::array::header& header, size_t expected_buffer_size_bytes, uint& dims, zfp_type& type, double& rate, uint n[4])
-{
-  // create zfp_stream and zfp_field structs to call C API zfp_read_header()
-  AlignedBufferHandle abh;
-  memcpy(abh.buffer, header.buffer, BITS_TO_BYTES(ZFP_HEADER_SIZE_BITS));
-
-  ZfpStreamHandle zsh(abh);
-  ZfpFieldHandle zfh;
-
-  if (!zfp_read_header(zsh.stream, zfh.field, ZFP_HEADER_FULL))
-    throw zfp::array::header::exception("Invalid ZFP header.");
-
-  // gather metadata
-  dims = zfp_field_dimensionality(zfh.field);
-  type = zfp_field_type(zfh.field);
-
-  uint num_block_entries = 1u << (2 * dims);
-  rate = (double)zsh.stream->maxbits / num_block_entries;
-
-  zfp_field_size(zfh.field, n);
-
-  // validate header
-  std::string err_msg = "";
-  verify_header_contents(zsh.stream, zfh.field, err_msg);
-
-  if (!err_msg.empty())
-    throw zfp::array::header::exception(err_msg);
-
-  if (expected_buffer_size_bytes && !is_valid_buffer_size(zsh.stream, zfh.field->nx, zfh.field->ny, zfh.field->nz, expected_buffer_size_bytes))
-    throw zfp::array::header::exception("ZFP header expects a longer buffer than what was passed in.");
-}
-
-// verifies metadata on zfp_stream and zfp_field describe a valid compressed array
-static void verify_header_contents(const zfp_stream* stream, const zfp_field* field, std::string& err_msg)
-{
-  // verify read-header contents
-  zfp_type type = zfp_field_type(field);
-  if (type != zfp_type_float && type != zfp_type_double)
-    zfp::array::header::concat_sentence(err_msg, "ZFP compressed arrays do not yet support scalar types beyond floats and doubles.");
-
-  uint dims = zfp_field_dimensionality(field);
-  if (dims < 1 || dims > 3)
-    zfp::array::header::concat_sentence(err_msg, "ZFP compressed arrays do not yet support dimensionalities beyond 1, 2, and 3.");
-
-  if (zfp_stream_compression_mode(stream) != zfp_mode_fixed_rate)
-    zfp::array::header::concat_sentence(err_msg, "ZFP header specified a non fixed-rate mode, unsupported by this object.");
-}
diff --git a/array/zfp/iterator1.h b/array/zfp/iterator1.h
deleted file mode 100644
index 310e8e2d..00000000
--- a/array/zfp/iterator1.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// random access iterator that visits 1D array block by block; this class is nested within zfp::array1
-class iterator {
-public:
-  // typedefs for STL compatibility
-  typedef Scalar value_type;
-  typedef ptrdiff_t difference_type;
-  typedef typename array1::reference reference;
-  typedef typename array1::pointer pointer;
-  typedef std::random_access_iterator_tag iterator_category;
-
-  iterator() : ref(0, 0) {}
-  iterator operator=(const iterator& it) { ref.array = it.ref.array; ref.i = it.ref.i; return *this; }
-  reference operator*() const { return ref; }
-  reference operator[](difference_type d) const { return *operator+(d); }
-  iterator& operator++() { increment(); return *this; }
-  iterator& operator--() { decrement(); return *this; }
-  iterator operator++(int) { iterator it = *this; increment(); return it; }
-  iterator operator--(int) { iterator it = *this; decrement(); return it; }
-  iterator operator+=(difference_type d) { ref.i += d; return *this; }
-  iterator operator-=(difference_type d) { ref.i -= d; return *this; }
-  iterator operator+(difference_type d) const { return iterator(ref.array, ref.i + d); }
-  iterator operator-(difference_type d) const { return iterator(ref.array, ref.i - d); }
-  difference_type operator-(const iterator& it) const { return static_cast<difference_type>(ref.i) - static_cast<difference_type>(it.ref.i); }
-  bool operator==(const iterator& it) const { return ref.array == it.ref.array && ref.i == it.ref.i; }
-  bool operator!=(const iterator& it) const { return !operator==(it); }
-  bool operator<=(const iterator& it) const { return ref.array == it.ref.array && ref.i <= it.ref.i; }
-  bool operator>=(const iterator& it) const { return ref.array == it.ref.array && ref.i >= it.ref.i; }
-  bool operator<(const iterator& it) const { return !operator>=(it); }
-  bool operator>(const iterator& it) const { return !operator<=(it); }
-  uint i() const { return ref.i; }
-
-protected:
-  friend class array1;
-  explicit iterator(array1* array, uint i) : ref(array, i) {}
-  void increment() { ref.i++; }
-  void decrement() { ref.i--; }
-  reference ref;
-};
diff --git a/array/zfp/iterator2.h b/array/zfp/iterator2.h
deleted file mode 100644
index 03052c4e..00000000
--- a/array/zfp/iterator2.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// forward iterator that visits 2D array block by block; this class is nested within zfp::array2
-class iterator {
-public:
-  // typedefs for STL compatibility
-  typedef Scalar value_type;
-  typedef ptrdiff_t difference_type;
-  typedef typename array2::reference reference;
-  typedef typename array2::pointer pointer;
-  typedef std::forward_iterator_tag iterator_category;
-
-  iterator() : ref(0, 0, 0) {}
-  iterator operator=(const iterator& it) { ref.array = it.ref.array; ref.i = it.ref.i; ref.j = it.ref.j; return *this; }
-  reference operator*() const { return ref; }
-  iterator& operator++() { increment(); return *this; }
-  iterator operator++(int) { iterator it = *this; increment(); return it; }
-  bool operator==(const iterator& it) const { return ref.array == it.ref.array && ref.i == it.ref.i && ref.j == it.ref.j; }
-  bool operator!=(const iterator& it) const { return !operator==(it); }
-  uint i() const { return ref.i; }
-  uint j() const { return ref.j; }
-
-protected:
-  friend class array2;
-  explicit iterator(array2* array, uint i, uint j) : ref(array, i, j) {}
-  void increment()
-  {
-    ref.i++;
-    if (!(ref.i & 3u) || ref.i == ref.array->nx) {
-      ref.i = (ref.i - 1) & ~3u;
-      ref.j++;
-      if (!(ref.j & 3u) || ref.j == ref.array->ny) {
-        ref.j = (ref.j - 1) & ~3u;
-        // done with block; advance to next
-        if ((ref.i += 4) >= ref.array->nx) {
-          ref.i = 0;
-          if ((ref.j += 4) >= ref.array->ny)
-            ref.j = ref.array->ny;
-        }
-      }
-    }
-  }
-  reference ref;
-};
diff --git a/array/zfp/iterator3.h b/array/zfp/iterator3.h
deleted file mode 100644
index 3889fc1c..00000000
--- a/array/zfp/iterator3.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// forward iterator that visits 3D array block by block; this class is nested within zfp::array3
-class iterator {
-public:
-  // typedefs for STL compatibility
-  typedef Scalar value_type;
-  typedef ptrdiff_t difference_type;
-  typedef typename array3::reference reference;
-  typedef typename array3::pointer pointer;
-  typedef std::forward_iterator_tag iterator_category;
-
-  iterator() : ref(0, 0, 0, 0) {}
-  iterator operator=(const iterator& it) { ref.array = it.ref.array; ref.i = it.ref.i; ref.j = it.ref.j; ref.k = it.ref.k; return *this; }
-  reference operator*() const { return ref; }
-  iterator& operator++() { increment(); return *this; }
-  iterator operator++(int) { iterator it = *this; increment(); return it; }
-  bool operator==(const iterator& it) const { return ref.array == it.ref.array && ref.i == it.ref.i && ref.j == it.ref.j && ref.k == it.ref.k; }
-  bool operator!=(const iterator& it) const { return !operator==(it); }
-  uint i() const { return ref.i; }
-  uint j() const { return ref.j; }
-  uint k() const { return ref.k; }
-
-protected:
-  friend class array3;
-  explicit iterator(array3* array, uint i, uint j, uint k) : ref(array, i, j, k) {}
-  void increment()
-  {
-    ref.i++;
-    if (!(ref.i & 3u) || ref.i == ref.array->nx) {
-      ref.i = (ref.i - 1) & ~3u;
-      ref.j++;
-      if (!(ref.j & 3u) || ref.j == ref.array->ny) {
-        ref.j = (ref.j - 1) & ~3u;
-        ref.k++;
-        if (!(ref.k & 3u) || ref.k == ref.array->nz) {
-          ref.k = (ref.k - 1) & ~3u;
-          // done with block; advance to next
-          if ((ref.i += 4) >= ref.array->nx) {
-            ref.i = 0;
-            if ((ref.j += 4) >= ref.array->ny) {
-              ref.j = 0;
-              if ((ref.k += 4) >= ref.array->nz)
-                ref.k = ref.array->nz;
-            }
-          }
-        }
-      }
-    }
-  }
-  reference ref;
-};
diff --git a/array/zfp/memory.h b/array/zfp/memory.h
deleted file mode 100644
index 852559da..00000000
--- a/array/zfp/memory.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#ifndef ZFP_MEMORY_H
-#define ZFP_MEMORY_H
-
-#ifdef _WIN32
-extern "C" {
-  #ifdef __MINGW32__
-  #include <x86intrin.h>
-  #endif
-
-  #include <malloc.h>
-}
-#endif
-
-#include <algorithm>
-#include <cstdlib>
-#include "zfp/types.h"
-
-#define unused_(x) ((void)(x))
-
-namespace zfp {
-
-// allocate size bytes
-inline void*
-allocate(size_t size)
-{
-  return new uchar[size];
-}
-
-// allocate size bytes with alignment
-inline void*
-allocate_aligned(size_t size, size_t alignment)
-{
-  void* ptr;
-  bool is_mem_failed = false;
-
-#ifdef ZFP_WITH_ALIGNED_ALLOC
-  #ifdef __INTEL_COMPILER
-  ptr = _mm_malloc(size, alignment);
-
-  #elif defined(__MINGW32__)
-  ptr = __mingw_aligned_malloc(size, alignment);
-
-  #elif defined(_WIN32)
-  ptr = _aligned_malloc(size, alignment);
-
-  #elif (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600) || defined(__MACH__)
-  is_mem_failed = posix_memalign(&ptr, alignment, size);
-
-  #else
-  unused_(alignment);
-  ptr = malloc(size);
-
-  #endif
-
-#else
-  unused_(alignment);
-  ptr = malloc(size);
-
-#endif
-
-  if (is_mem_failed || (ptr == NULL))
-    throw std::bad_alloc();
-
-  return ptr;
-}
-
-// deallocate memory pointed to by ptr
-template <typename T>
-inline void
-deallocate(T* ptr)
-{
-  delete[] ptr;
-}
-
-template <typename T>
-inline void
-deallocate_aligned(T* ptr)
-{
-#ifdef ZFP_WITH_ALIGNED_ALLOC
-  if (ptr)
-  #ifdef __INTEL_COMPILER
-    _mm_free(ptr);
-  #elif defined(__MINGW32__)
-    __mingw_aligned_free(ptr);
-  #elif defined(_WIN32)
-    _aligned_free(ptr);
-  #else
-    free(ptr);
-  #endif
-
-#else
-  if (ptr)
-    free(ptr);
-#endif
-}
-
-// reallocate size bytes
-template <typename T>
-inline void
-reallocate(T*& ptr, size_t size)
-{
-  zfp::deallocate(ptr);
-  ptr = static_cast<T*>(zfp::allocate(size));
-}
-
-template <typename T>
-inline void
-reallocate_aligned(T*& ptr, size_t size, size_t alignment)
-{
-  zfp::deallocate_aligned(ptr);
-  ptr = static_cast<T*>(zfp::allocate_aligned(size, alignment));
-}
-
-// clone array 'T src[count]'
-template <typename T>
-inline void
-clone(T*& dst, const T* src, size_t count)
-{
-  zfp::deallocate(dst);
-  if (src) {
-    dst = static_cast<T*>(zfp::allocate(count * sizeof(T)));
-    std::copy(src, src + count, dst);
-  }
-  else
-    dst = 0;
-}
-
-template <typename T>
-inline void
-clone_aligned(T*& dst, const T* src, size_t count, size_t alignment)
-{
-  zfp::deallocate_aligned(dst);
-  if (src) {
-    dst = static_cast<T*>(zfp::allocate_aligned(count * sizeof(T), alignment));
-    std::copy(src, src + count, dst);
-  }
-  else
-    dst = 0;
-}
-
-}
-
-#undef unused_
-
-#endif
diff --git a/array/zfp/pointer1.h b/array/zfp/pointer1.h
deleted file mode 100644
index f58557c0..00000000
--- a/array/zfp/pointer1.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// pointer to a 1D array element; this class is nested within zfp::array1
-class pointer {
-public:
-  pointer() : ref(0, 0) {}
-  pointer operator=(const pointer& p) { ref.array = p.ref.array; ref.i = p.ref.i; return *this; }
-  reference operator*() const { return ref; }
-  reference operator[](ptrdiff_t d) const { return *operator+(d); }
-  pointer& operator++() { increment(); return *this; }
-  pointer& operator--() { decrement(); return *this; }
-  pointer operator++(int) { pointer p = *this; increment(); return p; }
-  pointer operator--(int) { pointer p = *this; decrement(); return p; }
-  pointer operator+=(ptrdiff_t d) { ref.i += d; return *this; }
-  pointer operator-=(ptrdiff_t d) { ref.i -= d; return *this; }
-  pointer operator+(ptrdiff_t d) const { pointer p = *this; p += d; return p; }
-  pointer operator-(ptrdiff_t d) const { pointer p = *this; p -= d; return p; }
-  ptrdiff_t operator-(const pointer& p) const { return index() - p.index(); }
-  bool operator==(const pointer& p) const { return ref.array == p.ref.array && ref.i == p.ref.i; }
-  bool operator!=(const pointer& p) const { return !operator==(p); }
-
-protected:
-  friend class array1;
-  friend class reference;
-  explicit pointer(reference r) : ref(r) {}
-  explicit pointer(array1* array, uint i) : ref(array, i) {}
-  ptrdiff_t index() const { return ref.i; }
-  void set(ptrdiff_t index) { ref.i = index; }
-  void increment() { ref.i++; }
-  void decrement() { ref.i--; }
-  reference ref;
-};
diff --git a/array/zfp/pointer2.h b/array/zfp/pointer2.h
deleted file mode 100644
index dcdb518f..00000000
--- a/array/zfp/pointer2.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// pointer to a 2D array element; this class is nested within zfp::array2
-class pointer {
-public:
-  pointer() : ref(0, 0, 0) {}
-  pointer operator=(const pointer& p) { ref.array = p.ref.array; ref.i = p.ref.i; ref.j = p.ref.j; return *this; }
-  reference operator*() const { return ref; }
-  reference operator[](ptrdiff_t d) const { return *operator+(d); }
-  pointer& operator++() { increment(); return *this; }
-  pointer& operator--() { decrement(); return *this; }
-  pointer operator++(int) { pointer p = *this; increment(); return p; }
-  pointer operator--(int) { pointer p = *this; decrement(); return p; }
-  pointer operator+=(ptrdiff_t d) { set(index() + d); return *this; }
-  pointer operator-=(ptrdiff_t d) { set(index() - d); return *this; }
-  pointer operator+(ptrdiff_t d) const { pointer p = *this; p += d; return p; }
-  pointer operator-(ptrdiff_t d) const { pointer p = *this; p -= d; return p; }
-  ptrdiff_t operator-(const pointer& p) const { return index() - p.index(); }
-  bool operator==(const pointer& p) const { return ref.array == p.ref.array && ref.i == p.ref.i && ref.j == p.ref.j; }
-  bool operator!=(const pointer& p) const { return !operator==(p); }
-
-protected:
-  friend class array2;
-  friend class reference;
-  explicit pointer(reference r) : ref(r) {}
-  explicit pointer(array2* array, uint i, uint j) : ref(array, i, j) {}
-  ptrdiff_t index() const { return ref.i + ref.array->nx * ref.j; }
-  void set(ptrdiff_t index) { ref.array->ij(ref.i, ref.j, index); }
-  void increment()
-  {
-    if (++ref.i == ref.array->nx) {
-      ref.i = 0;
-      ref.j++;
-    }
-  }
-  void decrement()
-  {
-    if (!ref.i--) {
-      ref.i = ref.array->nx - 1;
-      ref.j--;
-    }
-  }
-  reference ref;
-};
diff --git a/array/zfp/pointer3.h b/array/zfp/pointer3.h
deleted file mode 100644
index 091af604..00000000
--- a/array/zfp/pointer3.h
+++ /dev/null
@@ -1,48 +0,0 @@
-// pointer to a 3D array element; this class is nested within zfp::array3
-class pointer {
-public:
-  pointer() : ref(0, 0, 0, 0) {}
-  pointer operator=(const pointer& p) { ref.array = p.ref.array; ref.i = p.ref.i; ref.j = p.ref.j; ref.k = p.ref.k; return *this; }
-  reference operator*() const { return ref; }
-  reference operator[](ptrdiff_t d) const { return *operator+(d); }
-  pointer& operator++() { increment(); return *this; }
-  pointer& operator--() { decrement(); return *this; }
-  pointer operator++(int) { pointer p = *this; increment(); return p; }
-  pointer operator--(int) { pointer p = *this; decrement(); return p; }
-  pointer operator+=(ptrdiff_t d) { set(index() + d); return *this; }
-  pointer operator-=(ptrdiff_t d) { set(index() - d); return *this; }
-  pointer operator+(ptrdiff_t d) const { pointer p = *this; p += d; return p; }
-  pointer operator-(ptrdiff_t d) const { pointer p = *this; p -= d; return p; }
-  ptrdiff_t operator-(const pointer& p) const { return index() - p.index(); }
-  bool operator==(const pointer& p) const { return ref.array == p.ref.array && ref.i == p.ref.i && ref.j == p.ref.j && ref.k == p.ref.k; }
-  bool operator!=(const pointer& p) const { return !operator==(p); }
-
-protected:
-  friend class array3;
-  friend class reference;
-  explicit pointer(reference r) : ref(r) {}
-  explicit pointer(array3* array, uint i, uint j, uint k) : ref(array, i, j, k) {}
-  ptrdiff_t index() const { return ref.i + ref.array->nx * (ref.j + ref.array->ny * ref.k); }
-  void set(ptrdiff_t index) { ref.array->ijk(ref.i, ref.j, ref.k, index); }
-  void increment()
-  {
-    if (++ref.i == ref.array->nx) {
-      ref.i = 0;
-      if (++ref.j == ref.array->ny) {
-        ref.j = 0;
-        ref.k++;
-      }
-    }
-  }
-  void decrement()
-  {
-    if (!ref.i--) {
-      ref.i = ref.array->nx - 1;
-      if (!ref.j--) {
-        ref.j = ref.array->ny - 1;
-        ref.k--;
-      }
-    }
-  }
-  reference ref;
-};
diff --git a/array/zfp/reference1.h b/array/zfp/reference1.h
deleted file mode 100644
index 99f2e6a6..00000000
--- a/array/zfp/reference1.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// reference to a 1D array element; this class is nested within zfp::array1
-class reference {
-public:
-  operator Scalar() const { return array->get(i); }
-  reference operator=(const reference& r) { array->set(i, r.operator Scalar()); return *this; }
-  reference operator=(Scalar val) { array->set(i, val); return *this; }
-  reference operator+=(Scalar val) { array->add(i, val); return *this; }
-  reference operator-=(Scalar val) { array->sub(i, val); return *this; }
-  reference operator*=(Scalar val) { array->mul(i, val); return *this; }
-  reference operator/=(Scalar val) { array->div(i, val); return *this; }
-  pointer operator&() const { return pointer(*this); }
-  // swap two array elements via proxy references
-  friend void swap(reference a, reference b)
-  {
-    Scalar x = a.operator Scalar();
-    Scalar y = b.operator Scalar();
-    b.operator=(x);
-    a.operator=(y);
-  }
-
-protected:
-  friend class array1;
-  friend class iterator;
-  explicit reference(array1* array, uint i) : array(array), i(i) {}
-  array1* array;
-  uint i;
-};
diff --git a/array/zfp/reference2.h b/array/zfp/reference2.h
deleted file mode 100644
index 76a0bd3b..00000000
--- a/array/zfp/reference2.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// reference to a 2D array element; this class is nested within zfp::array2
-class reference {
-public:
-  operator Scalar() const { return array->get(i, j); }
-  reference operator=(const reference& r) { array->set(i, j, r.operator Scalar()); return *this; }
-  reference operator=(Scalar val) { array->set(i, j, val); return *this; }
-  reference operator+=(Scalar val) { array->add(i, j, val); return *this; }
-  reference operator-=(Scalar val) { array->sub(i, j, val); return *this; }
-  reference operator*=(Scalar val) { array->mul(i, j, val); return *this; }
-  reference operator/=(Scalar val) { array->div(i, j, val); return *this; }
-  pointer operator&() const { return pointer(*this); }
-  // swap two array elements via proxy references
-  friend void swap(reference a, reference b)
-  {
-    Scalar x = a.operator Scalar();
-    Scalar y = b.operator Scalar();
-    b.operator=(x);
-    a.operator=(y);
-  }
-
-protected:
-  friend class array2;
-  friend class iterator;
-  explicit reference(array2* array, uint i, uint j) : array(array), i(i), j(j) {}
-  array2* array;
-  uint i, j;
-};
diff --git a/array/zfp/reference3.h b/array/zfp/reference3.h
deleted file mode 100644
index 91175e18..00000000
--- a/array/zfp/reference3.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// reference to a 3D array element; this class is nested within zfp::array3
-class reference {
-public:
-  operator Scalar() const { return array->get(i, j, k); }
-  reference operator=(const reference& r) { array->set(i, j, k, r.operator Scalar()); return *this; }
-  reference operator=(Scalar val) { array->set(i, j, k, val); return *this; }
-  reference operator+=(Scalar val) { array->add(i, j, k, val); return *this; }
-  reference operator-=(Scalar val) { array->sub(i, j, k, val); return *this; }
-  reference operator*=(Scalar val) { array->mul(i, j, k, val); return *this; }
-  reference operator/=(Scalar val) { array->div(i, j, k, val); return *this; }
-  pointer operator&() const { return pointer(*this); }
-  // swap two array elements via proxy references
-  friend void swap(reference a, reference b)
-  {
-    Scalar x = a.operator Scalar();
-    Scalar y = b.operator Scalar();
-    b.operator=(x);
-    a.operator=(y);
-  }
-
-protected:
-  friend class array3;
-  friend class iterator;
-  explicit reference(array3* array, uint i, uint j, uint k) : array(array), i(i), j(j), k(k) {}
-  array3* array;
-  uint i, j, k;
-};
diff --git a/array/zfp/view1.h b/array/zfp/view1.h
deleted file mode 100644
index 6129ae5e..00000000
--- a/array/zfp/view1.h
+++ /dev/null
@@ -1,291 +0,0 @@
-// 1D array views; these classes are nested within zfp::array1
-
-// abstract view of 1D array (base class)
-class preview {
-public:
-  // rate in bits per value
-  double rate() const { return array->rate(); }
-
-  // dimensions of (sub)array
-  size_t size() const { return size_t(nx); }
-
-  // local to global array index
-  uint global_x(uint i) const { return x + i; }
-
-protected:
-  // construction and assignment--perform shallow copy of (sub)array
-  explicit preview(array1* array) : array(array), x(0), nx(array->nx) {}
-  explicit preview(array1* array, uint x, uint nx) : array(array), x(x), nx(nx) {}
-  preview& operator=(array1* a)
-  {
-    array = a;
-    x = 0;
-    nx = a->nx;
-    return *this;
-  }
-
-  array1* array; // underlying container
-  uint x;        // offset into array
-  uint nx;       // dimensions of subarray
-};
-
-// generic read-only view into a rectangular subset of a 1D array
-class const_view : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::nx;
-public:
-  // construction--perform shallow copy of (sub)array
-  const_view(array1* array) : preview(array) {}
-  const_view(array1* array, uint x, uint nx) : preview(array, x, nx) {}
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-
-  // [i] accessor
-  Scalar operator[](uint index) const { return array->get(x + index); }
-
-  // (i) accessor
-  Scalar operator()(uint i) const { return array->get(x + i); }
-};
-
-// generic read-write view into a rectangular subset of a 1D array
-class view : public const_view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::nx;
-public:
-  // construction--perform shallow copy of (sub)array
-  view(array1* array) : const_view(array) {}
-  view(array1* array, uint x, uint nx) : const_view(array, x, nx) {}
-
-  // [i] accessor from base class
-  using const_view::operator[];
-
-  // (i) accessor from base class
-  using const_view::operator();
-
-  // [i] mutator
-  reference operator[](uint index) { return reference(array, x + index); }
-
-  // (i) mutator
-  reference operator()(uint i) { return reference(array, x + i); }
-};
-
-// thread-safe read-only view of 1D (sub)array with private cache
-class private_const_view : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::nx;
-public:
-  // construction--perform shallow copy of (sub)array
-  private_const_view(array1* array) :
-    preview(array),
-    cache(array->cache.size())
-  {
-    init();
-  }
-  private_const_view(array1* array, uint x, uint nx) :
-    preview(array, x, nx),
-    cache(array->cache.size())
-  {
-    init();
-  }
-
-  // destructor
-  ~private_const_view()
-  {
-    stream_close(zfp->stream);
-    zfp_stream_close(zfp);
-  }
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-
-  // cache size in number of bytes
-  size_t cache_size() const { return cache.size() * sizeof(CacheLine); }
-
-  // set minimum cache size in bytes (array dimensions must be known)
-  void set_cache_size(size_t csize)
-  {
-    cache.resize(array->lines(csize, nx));
-  }
-
-  // empty cache without compressing modified cached blocks
-  void clear_cache() const { cache.clear(); }
-
-  // (i) accessor
-  Scalar operator()(uint i) const { return get(x + i); }
-
-protected:
-  // cache line representing one block of decompressed values
-  class CacheLine {
-  public:
-    const Scalar& operator()(uint i) const { return a[index(i)]; }
-    Scalar& operator()(uint i) { return a[index(i)]; }
-    const Scalar* data() const { return a; }
-    Scalar* data() { return a; }
-  protected:
-    static uint index(uint i) { return i & 3u; }
-    Scalar a[4];
-  };
-
-  // copy private data
-  void init()
-  {
-    // copy compressed stream
-    zfp = zfp_stream_open(0);
-    *zfp = *array->zfp;
-    // copy bit stream
-    zfp->stream = stream_clone(array->zfp->stream);
-  }
-
-  // inspector
-  const Scalar& get(uint i) const
-  {
-    const CacheLine* p = line(i);
-    return (*p)(i);
-  }
-
-  // return cache line for i; may require write-back and fetch
-  CacheLine* line(uint i) const
-  {
-    CacheLine* p = 0;
-    uint b = array->block(i);
-    typename Cache<CacheLine>::Tag t = cache.access(p, b + 1, false);
-    uint c = t.index() - 1;
-    // fetch cache line; no writeback possible since view is read-only
-    if (c != b)
-      decode(b, p->data());
-    return p;
-  }
-
-  // decode block with given index
-  void decode(uint index, Scalar* block) const
-  {
-    stream_rseek(zfp->stream, index * array->blkbits);
-    Codec::decode_block_1(zfp, block, array->shape ? array->shape[index] : 0);
-  }
-
-  zfp_stream* zfp;                // stream of compressed blocks
-  mutable Cache<CacheLine> cache; // cache of decompressed blocks
-};
-
-// thread-safe read-write view of private 1D (sub)array
-class private_view : public private_const_view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::nx;
-  using private_const_view::zfp;
-  using private_const_view::cache;
-  using private_const_view::init;
-  using private_const_view::decode;
-  class view_reference;
-  typedef typename private_const_view::CacheLine CacheLine;
-public:
-  // construction--perform shallow copy of (sub)array
-  private_view(array1* array) : private_const_view(array) {}
-  private_view(array1* array, uint x, uint nx) : private_const_view(array, x, nx) {}
-
-  // partition view into count block-aligned pieces, with 0 <= index < count
-  void partition(uint index, uint count)
-  {
-    partition(x, nx, index, count);
-  }
-
-  // flush cache by compressing all modified cached blocks
-  void flush_cache() const
-  {
-    for (typename Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
-      if (p->tag.dirty()) {
-        uint b = p->tag.index() - 1;
-        encode(b, p->line->data());
-      }
-      cache.flush(p->line);
-    }
-  }
-
-  // (i) accessor from base class
-  using private_const_view::operator();
-
-  // (i) mutator
-  view_reference operator()(uint i) { return view_reference(this, x + i); }
-
-protected:
-  class view_reference {
-  public:
-    operator Scalar() const { return view->get(i); }
-    view_reference operator=(const view_reference& r) { view->set(i, r.operator Scalar()); return *this; }
-    view_reference operator=(Scalar val) { view->set(i, val); return *this; }
-    view_reference operator+=(Scalar val) { view->add(i, val); return *this; }
-    view_reference operator-=(Scalar val) { view->sub(i, val); return *this; }
-    view_reference operator*=(Scalar val) { view->mul(i, val); return *this; }
-    view_reference operator/=(Scalar val) { view->div(i, val); return *this; }
-    // swap two array elements via proxy references
-    friend void swap(view_reference a, view_reference b)
-    {
-      Scalar x = a.operator Scalar();
-      Scalar y = b.operator Scalar();
-      b.operator=(x);
-      a.operator=(y);
-    }
-
-  protected:
-    friend class private_view;
-    explicit view_reference(private_view* view, uint i) : view(view), i(i) {}
-    private_view* view;
-    uint i;
-  };
-
-  // block-aligned partition of [offset, offset + size): index out of count
-  static void partition(uint& offset, uint& size, uint index, uint count)
-  {
-    uint bmin = offset / 4;
-    uint bmax = (offset + size + 3) / 4;
-    uint xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
-    uint xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
-    offset = xmin;
-    size = xmax - xmin;
-  }
-
-  // mutator
-  void set(uint i, Scalar val)
-  {
-    CacheLine* p = line(i, true);
-    (*p)(i) = val;
-  }
-
-  // in-place updates
-  void add(uint i, Scalar val) { (*line(i, true))(i) += val; }
-  void sub(uint i, Scalar val) { (*line(i, true))(i) -= val; }
-  void mul(uint i, Scalar val) { (*line(i, true))(i) *= val; }
-  void div(uint i, Scalar val) { (*line(i, true))(i) /= val; }
-
-  // return cache line for i; may require write-back and fetch
-  CacheLine* line(uint i, bool write) const
-  {
-    CacheLine* p = 0;
-    uint b = array->block(i);
-    typename Cache<CacheLine>::Tag t = cache.access(p, b + 1, write);
-    uint c = t.index() - 1;
-    if (c != b) {
-      // write back occupied cache line if it is dirty
-      if (t.dirty())
-        encode(c, p->data());
-      decode(b, p->data());
-    }
-    return p;
-  }
-
-  // encode block with given index
-  void encode(uint index, const Scalar* block) const
-  {
-    stream_wseek(zfp->stream, index * array->blkbits);
-    Codec::encode_block_1(zfp, block, array->shape ? array->shape[index] : 0);
-    stream_flush(zfp->stream);
-  }
-};
diff --git a/array/zfp/view2.h b/array/zfp/view2.h
deleted file mode 100644
index fcfdf8ca..00000000
--- a/array/zfp/view2.h
+++ /dev/null
@@ -1,393 +0,0 @@
-// 2D array views; these classes are nested within zfp::array2
-
-// abstract view of 2D array (base class)
-class preview {
-public:
-  // rate in bits per value
-  double rate() const { return array->rate(); }
-
-  // dimensions of (sub)array
-  size_t size() const { return size_t(nx) * size_t(ny); }
-
-  // local to global array indices
-  uint global_x(uint i) const { return x + i; }
-  uint global_y(uint j) const { return y + j; }
-
-protected:
-  // construction and assignment--perform shallow copy of (sub)array
-  explicit preview(array2* array) : array(array), x(0), y(0), nx(array->nx), ny(array->ny) {}
-  explicit preview(array2* array, uint x, uint y, uint nx, uint ny) : array(array), x(x), y(y), nx(nx), ny(ny) {}
-  preview& operator=(array2* a)
-  {
-    array = a;
-    x = y = 0;
-    nx = a->nx;
-    ny = a->ny;
-    return *this;
-  }
-
-  array2* array; // underlying container
-  uint x, y;     // offset into array
-  uint nx, ny;   // dimensions of subarray
-};
-
-// generic read-only view into a rectangular subset of a 2D array
-class const_view : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-public:
-  // construction--perform shallow copy of (sub)array
-  const_view(array2* array) : preview(array) {}
-  const_view(array2* array, uint x, uint y, uint nx, uint ny) : preview(array, x, y, nx, ny) {}
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-
-  // (i, j) accessor
-  Scalar operator()(uint i, uint j) const { return array->get(x + i, y + j); }
-};
-
-// generic read-write view into a rectangular subset of a 2D array
-class view : public const_view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-public:
-  // construction--perform shallow copy of (sub)array
-  view(array2* array) : const_view(array) {}
-  view(array2* array, uint x, uint y, uint nx, uint ny) : const_view(array, x, y, nx, ny) {}
-
-  // (i, j) accessor from base class
-  using const_view::operator();
-
-  // (i, j) mutator
-  reference operator()(uint i, uint j) { return reference(array, x + i, y + j); }
-};
-
-// flat view of 2D array (operator[] returns scalar)
-class flat_view : public view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-public:
-  // construction--perform shallow copy of (sub)array
-  flat_view(array2* array) : view(array) {}
-  flat_view(array2* array, uint x, uint y, uint nx, uint ny) : view(array, x, y, nx, ny) {}
-
-  // convert (i, j) index to flat index
-  uint index(uint i, uint j) const { return i + nx * j; }
-
-  // convert flat index to (i, j) index
-  void ij(uint& i, uint& j, uint index) const
-  {
-    i = index % nx; index /= nx;
-    j = index;
-  }
-
-  // flat index accessors
-  Scalar operator[](uint index) const
-  {
-    uint i, j;
-    ij(i, j, index);
-    return array->get(x + i, y + j);
-  }
-  reference operator[](uint index)
-  {
-    uint i, j;
-    ij(i, j, index);
-    return reference(array, x + i, y + j);
-  }
-};
-
-// forward declaration of friends
-class nested_view1;
-class nested_view2;
-
-// nested view into a 1D rectangular subset of a 2D array
-class nested_view1 : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-public:
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-
-  // [i] accessor and mutator
-  Scalar operator[](uint index) const { return array->get(x + index, y); }
-  reference operator[](uint index) { return reference(array, x + index, y); }
-
-  // (i) accessor and mutator
-  Scalar operator()(uint i) const { return array->get(x + i, y); }
-  reference operator()(uint i) { return reference(array, x + i, y); }
-
-protected:
-  // construction--perform shallow copy of (sub)array
-  friend class nested_view2;
-  explicit nested_view1(array2* array) : preview(array) {}
-  explicit nested_view1(array2* array, uint x, uint y, uint nx, uint ny) : preview(array, x, y, nx, ny) {}
-};
-
-// nested view into a 2D rectangular subset of a 2D array
-class nested_view2 : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-public:
-  // construction--perform shallow copy of (sub)array
-  nested_view2(array2* array) : preview(array) {}
-  nested_view2(array2* array, uint x, uint y, uint nx, uint ny) : preview(array, x, y, nx, ny) {}
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-
-  // 1D view
-  nested_view1 operator[](uint index) const { return nested_view1(array, x, y + index, nx, 1); }
-
-  // (i, j) accessor and mutator
-  Scalar operator()(uint i, uint j) const { return array->get(x + i, y + j); }
-  reference operator()(uint i, uint j) { return reference(array, x + i, y + j); }
-};
-
-typedef nested_view2 nested_view;
-
-// thread-safe read-only view of 2D (sub)array with private cache
-class private_const_view : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-public:
-  // construction--perform shallow copy of (sub)array
-  private_const_view(array2* array) :
-    preview(array),
-    cache(array->cache.size())
-  {
-    init();
-  }
-  private_const_view(array2* array, uint x, uint y, uint nx, uint ny) :
-    preview(array, x, y, nx, ny),
-    cache(array->cache.size())
-  {
-    init();
-  }
-
-  // destructor
-  ~private_const_view()
-  {
-    stream_close(zfp->stream);
-    zfp_stream_close(zfp);
-  }
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-
-  // cache size in number of bytes
-  size_t cache_size() const { return cache.size() * sizeof(CacheLine); }
-
-  // set minimum cache size in bytes (array dimensions must be known)
-  void set_cache_size(size_t csize)
-  {
-    cache.resize(array->lines(csize, nx, ny));
-  }
-
-  // empty cache without compressing modified cached blocks
-  void clear_cache() const { cache.clear(); }
-
-  // (i, j) accessor
-  Scalar operator()(uint i, uint j) const { return get(x + i, y + j); }
-
-protected:
-  // cache line representing one block of decompressed values
-  class CacheLine {
-  public:
-    const Scalar& operator()(uint i, uint j) const { return a[index(i, j)]; }
-    Scalar& operator()(uint i, uint j) { return a[index(i, j)]; }
-    const Scalar* data() const { return a; }
-    Scalar* data() { return a; }
-  protected:
-    static uint index(uint i, uint j) { return (i & 3u) + 4 * (j & 3u); }
-    Scalar a[16];
-  };
-
-  // copy private data
-  void init()
-  {
-    // copy compressed stream
-    zfp = zfp_stream_open(0);
-    *zfp = *array->zfp;
-    // copy bit stream
-    zfp->stream = stream_clone(array->zfp->stream);
-  }
-
-  // inspector
-  const Scalar& get(uint i, uint j) const
-  {
-    const CacheLine* p = line(i, j);
-    return (*p)(i, j);
-  }
-
-  // return cache line for (i, j); may require write-back and fetch
-  CacheLine* line(uint i, uint j) const
-  {
-    CacheLine* p = 0;
-    uint b = array->block(i, j);
-    typename Cache<CacheLine>::Tag t = cache.access(p, b + 1, false);
-    uint c = t.index() - 1;
-    // fetch cache line; no writeback possible since view is read-only
-    if (c != b)
-      decode(b, p->data());
-    return p;
-  }
-
-  // decode block with given index
-  void decode(uint index, Scalar* block) const
-  {
-    stream_rseek(zfp->stream, index * array->blkbits);
-    Codec::decode_block_2(zfp, block, array->shape ? array->shape[index] : 0);
-  }
-
-  zfp_stream* zfp;                // stream of compressed blocks
-  mutable Cache<CacheLine> cache; // cache of decompressed blocks
-};
-
-// thread-safe read-write view of private 2D (sub)array
-class private_view : public private_const_view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::nx;
-  using preview::ny;
-  using private_const_view::zfp;
-  using private_const_view::cache;
-  using private_const_view::init;
-  using private_const_view::decode;
-  class view_reference;
-  typedef typename private_const_view::CacheLine CacheLine;
-public:
-  // construction--perform shallow copy of (sub)array
-  private_view(array2* array) : private_const_view(array) {}
-  private_view(array2* array, uint x, uint y, uint nx, uint ny) : private_const_view(array, x, y, nx, ny) {}
-
-  // partition view into count block-aligned pieces, with 0 <= index < count
-  void partition(uint index, uint count)
-  {
-    if (nx > ny)
-      partition(x, nx, index, count);
-    else
-      partition(y, ny, index, count);
-  }
-
-  // flush cache by compressing all modified cached blocks
-  void flush_cache() const
-  {
-    for (typename Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
-      if (p->tag.dirty()) {
-        uint b = p->tag.index() - 1;
-        encode(b, p->line->data());
-      }
-      cache.flush(p->line);
-    }
-  }
-
-  // (i, j) accessor from base class
-  using private_const_view::operator();
-
-  // (i, j) mutator
-  view_reference operator()(uint i, uint j) { return view_reference(this, x + i, y + j); }
-
-protected:
-  class view_reference {
-  public:
-    operator Scalar() const { return view->get(i, j); }
-    view_reference operator=(const view_reference& r) { view->set(i, j, r.operator Scalar()); return *this; }
-    view_reference operator=(Scalar val) { view->set(i, j, val); return *this; }
-    view_reference operator+=(Scalar val) { view->add(i, j, val); return *this; }
-    view_reference operator-=(Scalar val) { view->sub(i, j, val); return *this; }
-    view_reference operator*=(Scalar val) { view->mul(i, j, val); return *this; }
-    view_reference operator/=(Scalar val) { view->div(i, j, val); return *this; }
-    // swap two array elements via proxy references
-    friend void swap(view_reference a, view_reference b)
-    {
-      Scalar x = a.operator Scalar();
-      Scalar y = b.operator Scalar();
-      b.operator=(x);
-      a.operator=(y);
-    }
-
-  protected:
-    friend class private_view;
-    explicit view_reference(private_view* view, uint i, uint j) : view(view), i(i), j(j) {}
-    private_view* view;
-    uint i, j;
-  };
-
-  // block-aligned partition of [offset, offset + size): index out of count
-  static void partition(uint& offset, uint& size, uint index, uint count)
-  {
-    uint bmin = offset / 4;
-    uint bmax = (offset + size + 3) / 4;
-    uint xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
-    uint xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
-    offset = xmin;
-    size = xmax - xmin;
-  }
-
-  // mutator
-  void set(uint i, uint j, Scalar val)
-  {
-    CacheLine* p = line(i, j, true);
-    (*p)(i, j) = val;
-  }
-
-  // in-place updates
-  void add(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) += val; }
-  void sub(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) -= val; }
-  void mul(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) *= val; }
-  void div(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) /= val; }
-
-  // return cache line for (i, j); may require write-back and fetch
-  CacheLine* line(uint i, uint j, bool write) const
-  {
-    CacheLine* p = 0;
-    uint b = array->block(i, j);
-    typename Cache<CacheLine>::Tag t = cache.access(p, b + 1, write);
-    uint c = t.index() - 1;
-    if (c != b) {
-      // write back occupied cache line if it is dirty
-      if (t.dirty())
-        encode(c, p->data());
-      decode(b, p->data());
-    }
-    return p;
-  }
-
-  // encode block with given index
-  void encode(uint index, const Scalar* block) const
-  {
-    stream_wseek(zfp->stream, index * array->blkbits);
-    Codec::encode_block_2(zfp, block, array->shape ? array->shape[index] : 0);
-    stream_flush(zfp->stream);
-  }
-};
diff --git a/array/zfp/view3.h b/array/zfp/view3.h
deleted file mode 100644
index b1bf457f..00000000
--- a/array/zfp/view3.h
+++ /dev/null
@@ -1,445 +0,0 @@
-// 3D array views; these classes are nested within zfp::array3
-
-// abstract view of 3D array (base class)
-class preview {
-public:
-  // rate in bits per value
-  double rate() const { return array->rate(); }
-
-  // dimensions of (sub)array
-  size_t size() const { return size_t(nx) * size_t(ny) * size_t(nz); }
-
-  // local to global array indices
-  uint global_x(uint i) const { return x + i; }
-  uint global_y(uint j) const { return y + j; }
-  uint global_z(uint k) const { return z + k; }
-
-protected:
-  // construction and assignment--perform shallow copy of (sub)array
-  explicit preview(array3* array) : array(array), x(0), y(0), z(0), nx(array->nx), ny(array->ny), nz(array->nz) {}
-  explicit preview(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : array(array), x(x), y(y), z(z), nx(nx), ny(ny), nz(nz) {}
-  preview& operator=(array3* a)
-  {
-    array = a;
-    x = y = z = 0;
-    nx = a->nx;
-    ny = a->ny;
-    nz = a->nz;
-    return *this;
-  }
-
-  array3* array;   // underlying container
-  uint x, y, z;    // offset into array
-  uint nx, ny, nz; // dimensions of subarray
-};
-
-// generic read-only view into a rectangular subset of a 3D array
-class const_view : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // construction--perform shallow copy of (sub)array
-  const_view(array3* array) : preview(array) {}
-  const_view(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : preview(array, x, y, z, nx, ny, nz) {}
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-  uint size_z() const { return nz; }
-
-  // (i, j, k) accessor
-  Scalar operator()(uint i, uint j, uint k) const { return array->get(x + i, y + j, z + k); }
-};
-
-// generic read-write view into a rectangular subset of a 3D array
-class view : public const_view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // construction--perform shallow copy of (sub)array
-  view(array3* array) : const_view(array) {}
-  view(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : const_view(array, x, y, z, nx, ny, nz) {}
-
-  // (i, j, k) accessor from base class
-  using const_view::operator();
-
-  // (i, j, k) mutator
-  reference operator()(uint i, uint j, uint k) { return reference(array, x + i, y + j, z + k); }
-};
-
-// flat view of 3D array (operator[] returns scalar)
-class flat_view : public view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // construction--perform shallow copy of (sub)array
-  flat_view(array3* array) : view(array) {}
-  flat_view(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : view(array, x, y, z, nx, ny, nz) {}
-
-  // convert (i, j, k) index to flat index
-  uint index(uint i, uint j, uint k) const { return i + nx * (j + ny * k); }
-
-  // convert flat index to (i, j, k) index
-  void ijk(uint& i, uint& j, uint& k, uint index) const
-  {
-    i = index % nx; index /= nx;
-    j = index % ny; index /= ny;
-    k = index;
-  }
-
-  // flat index accessors
-  Scalar operator[](uint index) const
-  {
-    uint i, j, k;
-    ijk(i, j, k, index);
-    return array->get(x + i, y + j, z + k);
-  }
-  reference operator[](uint index)
-  {
-    uint i, j, k;
-    ijk(i, j, k, index);
-    return reference(array, x + i, y + j, z + k);
-  }
-};
-
-// forward declaration of friends
-class nested_view1;
-class nested_view2;
-class nested_view3;
-
-// nested view into a 1D rectangular subset of a 3D array
-class nested_view1 : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-
-  // [i] accessor and mutator
-  Scalar operator[](uint index) const { return array->get(x + index, y, z); }
-  reference operator[](uint index) { return reference(array, x + index, y, z); }
-
-  // (i) accessor and mutator
-  Scalar operator()(uint i) const { return array->get(x + i, y, z); }
-  reference operator()(uint i) { return reference(array, x + i, y, z); }
-
-protected:
-  // construction--perform shallow copy of (sub)array
-  friend class nested_view2;
-  explicit nested_view1(array3* array) : preview(array) {}
-  explicit nested_view1(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : preview(array, x, y, z, nx, ny, nz) {}
-};
-
-// nested view into a 2D rectangular subset of a 3D array
-class nested_view2 : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-
-  // 1D view
-  nested_view1 operator[](uint index) const { return nested_view1(array, x, y + index, z, nx, 1, 1); }
-
-  // (i, j) accessor and mutator
-  Scalar operator()(uint i, uint j) const { return array->get(x + i, y + j, z); }
-  reference operator()(uint i, uint j) { return reference(array, x + i, y + j, z); }
-
-protected:
-  // construction--perform shallow copy of (sub)array
-  friend class nested_view3;
-  explicit nested_view2(array3* array) : preview(array) {}
-  explicit nested_view2(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : preview(array, x, y, z, nx, ny, nz) {}
-};
-
-// nested view into a 3D rectangular subset of a 3D array
-class nested_view3 : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // construction--perform shallow copy of (sub)array
-  nested_view3(array3* array) : preview(array) {}
-  nested_view3(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : preview(array, x, y, z, nx, ny, nz) {}
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-  uint size_z() const { return nz; }
-
-  // 2D view
-  nested_view2 operator[](uint index) const { return nested_view2(array, x, y, z + index, nx, ny, 1); }
-
-  // (i, j, k) accessor and mutator
-  Scalar operator()(uint i, uint j, uint k) const { return array->get(x + i, y + j, z + k); }
-  reference operator()(uint i, uint j, uint k) { return reference(array, x + i, y + j, z + k); }
-};
-
-typedef nested_view3 nested_view;
-
-// thread-safe read-only view of 3D (sub)array with private cache
-class private_const_view : public preview {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-public:
-  // construction--perform shallow copy of (sub)array
-  private_const_view(array3* array) :
-    preview(array),
-    cache(array->cache.size())
-  {
-    init();
-  }
-  private_const_view(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) :
-    preview(array, x, y, z, nx, ny, nz),
-    cache(array->cache.size())
-  {
-    init();
-  }
-
-  // destructor
-  ~private_const_view()
-  {
-    stream_close(zfp->stream);
-    zfp_stream_close(zfp);
-  }
-
-  // dimensions of (sub)array
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-  uint size_z() const { return nz; }
-
-  // cache size in number of bytes
-  size_t cache_size() const { return cache.size() * sizeof(CacheLine); }
-
-  // set minimum cache size in bytes (array dimensions must be known)
-  void set_cache_size(size_t csize)
-  {
-    cache.resize(array->lines(csize, nx, ny, nz));
-  }
-
-  // empty cache without compressing modified cached blocks
-  void clear_cache() const { cache.clear(); }
-
-  // (i, j, k) accessor
-  Scalar operator()(uint i, uint j, uint k) const { return get(x + i, y + j, z + k); }
-
-protected:
-  // cache line representing one block of decompressed values
-  class CacheLine {
-  public:
-    const Scalar& operator()(uint i, uint j, uint k) const { return a[index(i, j, k)]; }
-    Scalar& operator()(uint i, uint j, uint k) { return a[index(i, j, k)]; }
-    const Scalar* data() const { return a; }
-    Scalar* data() { return a; }
-  protected:
-    static uint index(uint i, uint j, uint k) { return (i & 3u) + 4 * ((j & 3u) + 4 * (k & 3u)); }
-    Scalar a[64];
-  };
-
-  // copy private data
-  void init()
-  {
-    // copy compressed stream
-    zfp = zfp_stream_open(0);
-    *zfp = *array->zfp;
-    // copy bit stream
-    zfp->stream = stream_clone(array->zfp->stream);
-  }
-
-  // inspector
-  const Scalar& get(uint i, uint j, uint k) const
-  {
-    const CacheLine* p = line(i, j, k);
-    return (*p)(i, j, k);
-  }
-
-  // return cache line for (i, j, k); may require write-back and fetch
-  CacheLine* line(uint i, uint j, uint k) const
-  {
-    CacheLine* p = 0;
-    uint b = array->block(i, j, k);
-    typename Cache<CacheLine>::Tag t = cache.access(p, b + 1, false);
-    uint c = t.index() - 1;
-    // fetch cache line; no writeback possible since view is read-only
-    if (c != b)
-      decode(b, p->data());
-    return p;
-  }
-
-  // decode block with given index
-  void decode(uint index, Scalar* block) const
-  {
-    stream_rseek(zfp->stream, index * array->blkbits);
-    Codec::decode_block_3(zfp, block, array->shape ? array->shape[index] : 0);
-  }
-
-  zfp_stream* zfp;                // stream of compressed blocks
-  mutable Cache<CacheLine> cache; // cache of decompressed blocks
-};
-
-// thread-safe read-write view of private 3D (sub)array
-class private_view : public private_const_view {
-protected:
-  using preview::array;
-  using preview::x;
-  using preview::y;
-  using preview::z;
-  using preview::nx;
-  using preview::ny;
-  using preview::nz;
-  using private_const_view::zfp;
-  using private_const_view::cache;
-  using private_const_view::init;
-  using private_const_view::decode;
-  class view_reference;
-  typedef typename private_const_view::CacheLine CacheLine;
-public:
-  // construction--perform shallow copy of (sub)array
-  private_view(array3* array) : private_const_view(array) {}
-  private_view(array3* array, uint x, uint y, uint z, uint nx, uint ny, uint nz) : private_const_view(array, x, y, z, nx, ny, nz) {}
-
-  // partition view into count block-aligned pieces, with 0 <= index < count
-  void partition(uint index, uint count)
-  {
-    if (nx > std::max(ny, nz))
-      partition(x, nx, index, count);
-    else if (ny > std::max(nx, nz))
-      partition(y, ny, index, count);
-    else
-      partition(z, nz, index, count);
-  }
-
-  // flush cache by compressing all modified cached blocks
-  void flush_cache() const
-  {
-    for (typename Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
-      if (p->tag.dirty()) {
-        uint b = p->tag.index() - 1;
-        encode(b, p->line->data());
-      }
-      cache.flush(p->line);
-    }
-  }
-
-  // (i, j, k) accessor from base class
-  using private_const_view::operator();
-
-  // (i, j, k) mutator
-  view_reference operator()(uint i, uint j, uint k) { return view_reference(this, x + i, y + j, z + k); }
-
-protected:
-  class view_reference {
-  public:
-    operator Scalar() const { return view->get(i, j, k); }
-    view_reference operator=(const view_reference& r) { view->set(i, j, k, r.operator Scalar()); return *this; }
-    view_reference operator=(Scalar val) { view->set(i, j, k, val); return *this; }
-    view_reference operator+=(Scalar val) { view->add(i, j, k, val); return *this; }
-    view_reference operator-=(Scalar val) { view->sub(i, j, k, val); return *this; }
-    view_reference operator*=(Scalar val) { view->mul(i, j, k, val); return *this; }
-    view_reference operator/=(Scalar val) { view->div(i, j, k, val); return *this; }
-    // swap two array elements via proxy references
-    friend void swap(view_reference a, view_reference b)
-    {
-      Scalar x = a.operator Scalar();
-      Scalar y = b.operator Scalar();
-      b.operator=(x);
-      a.operator=(y);
-    }
-
-  protected:
-    friend class private_view;
-    explicit view_reference(private_view* view, uint i, uint j, uint k) : view(view), i(i), j(j), k(k) {}
-    private_view* view;
-    uint i, j, k;
-  };
-
-  // block-aligned partition of [offset, offset + size): index out of count
-  static void partition(uint& offset, uint& size, uint index, uint count)
-  {
-    uint bmin = offset / 4;
-    uint bmax = (offset + size + 3) / 4;
-    uint xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
-    uint xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
-    offset = xmin;
-    size = xmax - xmin;
-  }
-
-  // mutator
-  void set(uint i, uint j, uint k, Scalar val)
-  {
-    CacheLine* p = line(i, j, k, true);
-    (*p)(i, j, k) = val;
-  }
-
-  // in-place updates
-  void add(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) += val; }
-  void sub(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) -= val; }
-  void mul(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) *= val; }
-  void div(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) /= val; }
-
-  // return cache line for (i, j, k); may require write-back and fetch
-  CacheLine* line(uint i, uint j, uint k, bool write) const
-  {
-    CacheLine* p = 0;
-    uint b = array->block(i, j, k);
-    typename Cache<CacheLine>::Tag t = cache.access(p, b + 1, write);
-    uint c = t.index() - 1;
-    if (c != b) {
-      // write back occupied cache line if it is dirty
-      if (t.dirty())
-        encode(c, p->data());
-      decode(b, p->data());
-    }
-    return p;
-  }
-
-  // encode block with given index
-  void encode(uint index, const Scalar* block) const
-  {
-    stream_wseek(zfp->stream, index * array->blkbits);
-    Codec::encode_block_3(zfp, block, array->shape ? array->shape[index] : 0);
-    stream_flush(zfp->stream);
-  }
-};
diff --git a/array/zfparray.h b/array/zfparray.h
deleted file mode 100644
index 2ddbde42..00000000
--- a/array/zfparray.h
+++ /dev/null
@@ -1,286 +0,0 @@
-#ifndef ZFP_ARRAY_H
-#define ZFP_ARRAY_H
-
-#include <algorithm>
-#include <climits>
-#include <cstring>
-#include <stdexcept>
-#include <string>
-
-#include "zfp.h"
-#include "zfp/memory.h"
-
-// all undefined at end
-#define DIV_ROUND_UP(x, y) (((x) + (y) - 1) / (y))
-#define BITS_TO_BYTES(x) DIV_ROUND_UP(x, CHAR_BIT)
-
-#define ZFP_HEADER_SIZE_BITS (ZFP_MAGIC_BITS + ZFP_META_BITS + ZFP_MODE_SHORT_BITS)
-
-namespace zfp {
-
-// abstract base class for compressed array of scalars
-class array {
-public:
-  #include "zfp/header.h"
-
-  static zfp::array* construct(const zfp::array::header& header, const uchar* buffer = 0, size_t buffer_size_bytes = 0);
-
-protected:
-  // default constructor
-  array() :
-    dims(0), type(zfp_type_none),
-    nx(0), ny(0), nz(0),
-    bx(0), by(0), bz(0),
-    blocks(0), blkbits(0),
-    bytes(0), data(0),
-    zfp(0),
-    shape(0)
-  {}
-
-  // generic array with 'dims' dimensions and scalar type 'type'
-  array(uint dims, zfp_type type) :
-    dims(dims), type(type),
-    nx(0), ny(0), nz(0),
-    bx(0), by(0), bz(0),
-    blocks(0), blkbits(0),
-    bytes(0), data(0),
-    zfp(zfp_stream_open(0)),
-    shape(0)
-  {}
-
-  // constructor, from previously-serialized compressed array
-  array(uint dims, zfp_type type, const zfp::array::header& h, size_t expected_buffer_size_bytes) :
-    dims(dims), type(type),
-    nx(0), ny(0), nz(0),
-    bx(0), by(0), bz(0),
-    blocks(0), blkbits(0),
-    bytes(0), data(0),
-    zfp(zfp_stream_open(0)),
-    shape(0)
-  {
-    // read header to populate member variables associated with zfp_stream
-    try {
-      read_from_header(h);
-    } catch (zfp::array::header::exception const &) {
-      zfp_stream_close(zfp);
-      throw;
-    }
-
-    if (expected_buffer_size_bytes && !is_valid_buffer_size(zfp, nx, ny, nz, expected_buffer_size_bytes)) {
-      zfp_stream_close(zfp);
-      throw zfp::array::header::exception("ZFP header expects a longer buffer than what was passed in.");
-    }
-  }
-
-  // copy constructor--performs a deep copy
-  array(const array& a) :
-    data(0),
-    zfp(0),
-    shape(0)
-  {
-    deep_copy(a);
-  }
-
-  // assignment operator--performs a deep copy
-  array& operator=(const array& a)
-  {
-    deep_copy(a);
-    return *this;
-  }
-
-public:
-  // public virtual destructor (can delete array through base class pointer)
-  virtual ~array()
-  {
-    free();
-    zfp_stream_close(zfp);
-  }
-
-  // rate in bits per value
-  double rate() const { return double(blkbits) / block_size(); }
-
-  // set compression rate in bits per value
-  double set_rate(double rate)
-  {
-    rate = zfp_stream_set_rate(zfp, rate, type, dims, 1);
-    blkbits = zfp->maxbits;
-    alloc();
-    return rate;
-  }
-
-  // empty cache without compressing modified cached blocks
-  virtual void clear_cache() const = 0;
-
-  // flush cache by compressing all modified cached blocks
-  virtual void flush_cache() const = 0;
-
-  // number of bytes of compressed data
-  size_t compressed_size() const { return bytes; }
-
-  // pointer to compressed data for read or write access
-  uchar* compressed_data() const
-  {
-    // first write back any modified cached data
-    flush_cache();
-    return data;
-  }
-
-  // dimensionality
-  uint dimensionality() const { return dims; }
-
-  // underlying scalar type
-  zfp_type scalar_type() const { return type; }
-
-  // write header with latest metadata
-  zfp::array::header get_header() const
-  {
-    // intermediate buffer needed (bitstream accesses multiples of wordsize)
-    AlignedBufferHandle abh;
-    DualBitstreamHandle dbh(zfp, abh);
-
-    ZfpFieldHandle zfh(type, nx, ny, nz);
-
-    // avoid long header (alignment issue)
-    if (zfp_stream_mode(zfp) > ZFP_MODE_SHORT_MAX)
-      throw zfp::array::header::exception("ZFP compressed arrays only support short headers at this time.");
-
-    if (!zfp_write_header(zfp, zfh.field, ZFP_HEADER_FULL))
-      throw zfp::array::header::exception("ZFP could not write a header to buffer.");
-    stream_flush(zfp->stream);
-
-    zfp::array::header h;
-    abh.copy_to_header(&h);
-
-    return h;
-  }
-
-private:
-  // private members used when reading/writing headers
-  #include "zfp/headerHelpers.h"
-
-protected:
-  // number of values per block
-  uint block_size() const { return 1u << (2 * dims); }
-
-  // allocate memory for compressed data
-  void alloc(bool clear = true)
-  {
-    bytes = blocks * blkbits / CHAR_BIT;
-    zfp::reallocate_aligned(data, bytes, 0x100u);
-    if (clear)
-      std::fill(data, data + bytes, 0);
-    stream_close(zfp->stream);
-    zfp_stream_set_bit_stream(zfp, stream_open(data, bytes));
-    clear_cache();
-  }
-
-  // free memory associated with compressed data
-  void free()
-  {
-    nx = ny = nz = 0;
-    bx = by = bz = 0;
-    blocks = 0;
-    stream_close(zfp->stream);
-    zfp_stream_set_bit_stream(zfp, 0);
-    bytes = 0;
-    zfp::deallocate_aligned(data);
-    data = 0;
-    zfp::deallocate(shape);
-    shape = 0;
-  }
-
-  // perform a deep copy
-  void deep_copy(const array& a)
-  {
-    // copy metadata
-    dims = a.dims;
-    type = a.type;
-    nx = a.nx;
-    ny = a.ny;
-    nz = a.nz;
-    bx = a.bx;
-    by = a.by;
-    bz = a.bz;
-    blocks = a.blocks;
-    blkbits = a.blkbits;
-    bytes = a.bytes;
-
-    // copy dynamically allocated data
-    zfp::clone_aligned(data, a.data, bytes, 0x100u);
-    if (zfp) {
-      if (zfp->stream)
-        stream_close(zfp->stream);
-      zfp_stream_close(zfp);
-    }
-    zfp = zfp_stream_open(0);
-    *zfp = *a.zfp;
-    zfp_stream_set_bit_stream(zfp, stream_open(data, bytes));
-    zfp::clone(shape, a.shape, blocks);
-  }
-
-  // attempt reading header from zfp::array::header
-  // and verify header contents (throws exceptions upon failure)
-  void read_from_header(const zfp::array::header& h)
-  {
-    // copy header into aligned buffer
-    AlignedBufferHandle abh(&h);
-    DualBitstreamHandle dbh(zfp, abh);
-    ZfpFieldHandle zfh;
-
-    // read header to populate member variables associated with zfp_stream
-    size_t readbits = zfp_read_header(zfp, zfh.field, ZFP_HEADER_FULL);
-    if (!readbits)
-      throw zfp::array::header::exception("Invalid ZFP header.");
-    else if (readbits != ZFP_HEADER_SIZE_BITS)
-      throw zfp::array::header::exception("ZFP compressed arrays only support short headers at this time.");
-
-    // verify metadata on zfp_field match that for this object
-    std::string err_msg = "";
-    if (type != zfp_field_type(zfh.field))
-      zfp::array::header::concat_sentence(err_msg, "ZFP header specified an underlying scalar type different than that for this object.");
-
-    if (dims != zfp_field_dimensionality(zfh.field))
-      zfp::array::header::concat_sentence(err_msg, "ZFP header specified a dimensionality different than that for this object.");
-
-    verify_header_contents(zfp, zfh.field, err_msg);
-
-    if (!err_msg.empty())
-      throw zfp::array::header::exception(err_msg);
-
-    // set class variables
-    nx = zfh.field->nx;
-    ny = zfh.field->ny;
-    nz = zfh.field->nz;
-    type = zfh.field->type;
-    blkbits = zfp->maxbits;
-  }
-
-  // default number of cache lines for array with n blocks
-  static uint lines(size_t n)
-  {
-    // compute m = O(sqrt(n))
-    size_t m;
-    for (m = 1; m * m < n; m *= 2);
-    return static_cast<uint>(m);
-  }
-
-  uint dims;           // array dimensionality (1, 2, or 3)
-  zfp_type type;       // scalar type
-  uint nx, ny, nz;     // array dimensions
-  uint bx, by, bz;     // array dimensions in number of blocks
-  uint blocks;         // number of blocks
-  size_t blkbits;      // number of bits per compressed block
-  size_t bytes;        // total bytes of compressed data
-  mutable uchar* data; // pointer to compressed data
-  zfp_stream* zfp;     // compressed stream of blocks
-  uchar* shape;        // precomputed block dimensions (or null if uniform)
-};
-
-#undef DIV_ROUND_UP
-#undef BITS_TO_BYTES
-
-#undef ZFP_HEADER_SIZE_BITS
-
-}
-
-#endif
diff --git a/array/zfparray1.h b/array/zfparray1.h
deleted file mode 100644
index f95b430d..00000000
--- a/array/zfparray1.h
+++ /dev/null
@@ -1,297 +0,0 @@
-#ifndef ZFP_ARRAY1_H
-#define ZFP_ARRAY1_H
-
-#include <cstddef>
-#include <iterator>
-#include <cstring>
-#include "zfparray.h"
-#include "zfpcodec.h"
-#include "zfp/cache.h"
-
-namespace zfp {
-
-// compressed 1D array of scalars
-template < typename Scalar, class Codec = zfp::codec<Scalar> >
-class array1 : public array {
-public:
-  // forward declarations
-  class reference;
-  class pointer;
-  class iterator;
-  class view;
-  #include "zfp/reference1.h"
-  #include "zfp/pointer1.h"
-  #include "zfp/iterator1.h"
-  #include "zfp/view1.h"
-
-  // default constructor
-  array1() : array(1, Codec::type) {}
-
-  // constructor of n-sample array using rate bits per value, at least
-  // csize bytes of cache, and optionally initialized from flat array p
-  array1(uint n, double rate, const Scalar* p = 0, size_t csize = 0) :
-    array(1, Codec::type),
-    cache(lines(csize, n))
-  {
-    set_rate(rate);
-    resize(n, p == 0);
-    if (p)
-      set(p);
-  }
-
-  // constructor, from previously-serialized compressed array
-  array1(const zfp::array::header& h, const uchar* buffer = 0, size_t buffer_size_bytes = 0) :
-    array(1, Codec::type, h, buffer_size_bytes)
-  {
-    resize(nx, false);
-    if (buffer)
-      memcpy(data, buffer, bytes);
-  }
-
-  // copy constructor--performs a deep copy
-  array1(const array1& a) :
-    array()
-  {
-    deep_copy(a);
-  }
-
-  // construction from view--perform deep copy of (sub)array
-  template <class View>
-  array1(const View& v) :
-    array(1, Codec::type),
-    cache(lines(0, v.size_x()))
-  {
-    set_rate(v.rate());
-    resize(v.size_x(), true);
-    // initialize array in its preferred order
-    for (iterator it = begin(); it != end(); ++it)
-      *it = v(it.i());
-  }
-
-  // virtual destructor
-  virtual ~array1() {}
-
-  // assignment operator--performs a deep copy
-  array1& operator=(const array1& a)
-  {
-    if (this != &a)
-      deep_copy(a);
-    return *this;
-  }
-
-  // total number of elements in array
-  size_t size() const { return size_t(nx); }
-
-  // array dimensions
-  uint size_x() const { return nx; }
-
-  // resize the array (all previously stored data will be lost)
-  void resize(uint n, bool clear = true)
-  {
-    if (n == 0)
-      free();
-    else {
-      nx = n;
-      bx = (nx + 3) / 4;
-      blocks = bx;
-      alloc(clear);
-
-      // precompute block dimensions
-      zfp::deallocate(shape);
-      if (nx & 3u) {
-        shape = (uchar*)zfp::allocate(blocks);
-        uchar* p = shape;
-        for (uint i = 0; i < bx; i++)
-          *p++ = (i == bx - 1 ? -nx & 3u : 0);
-      }
-      else
-        shape = 0;
-    }
-  }
-
-  // cache size in number of bytes
-  size_t cache_size() const { return cache.size() * sizeof(CacheLine); }
-
-  // set minimum cache size in bytes (array dimensions must be known)
-  void set_cache_size(size_t csize)
-  {
-    flush_cache();
-    cache.resize(lines(csize, nx));
-  }
-
-  // empty cache without compressing modified cached blocks
-  void clear_cache() const { cache.clear(); }
-
-  // flush cache by compressing all modified cached blocks
-  void flush_cache() const
-  {
-    for (typename zfp::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
-      if (p->tag.dirty()) {
-        uint b = p->tag.index() - 1;
-        encode(b, p->line->data());
-      }
-      cache.flush(p->line);
-    }
-  }
-
-  // decompress array and store at p
-  void get(Scalar* p) const
-  {
-    uint b = 0;
-    for (uint i = 0; i < bx; i++, p += 4, b++) {
-      const CacheLine* line = cache.lookup(b + 1);
-      if (line)
-        line->get(p, 1, shape ? shape[b] : 0);
-      else
-        decode(b, p, 1);
-    }
-  }
-
-  // initialize array by copying and compressing data stored at p
-  void set(const Scalar* p)
-  {
-    uint b = 0;
-    for (uint i = 0; i < bx; i++, b++, p += 4)
-      encode(b, p, 1);
-    cache.clear();
-  }
-
-  // (i) accessors
-  Scalar operator()(uint i) const { return get(i); }
-  reference operator()(uint i) { return reference(this, i); }
-
-  // flat index accessors
-  Scalar operator[](uint index) const { return get(index); }
-  reference operator[](uint index) { return reference(this, index); }
-
-  // random access iterators
-  iterator begin() { return iterator(this, 0); }
-  iterator end() { return iterator(this, nx); }
-
-protected:
-  // cache line representing one block of decompressed values
-  class CacheLine {
-  public:
-    Scalar operator()(uint i) const { return a[index(i)]; }
-    Scalar& operator()(uint i) { return a[index(i)]; }
-    const Scalar* data() const { return a; }
-    Scalar* data() { return a; }
-    // copy cache line
-    void get(Scalar* p, int sx) const
-    {
-      const Scalar* q = a;
-      for (uint x = 0; x < 4; x++, p += sx, q++)
-        *p = *q;
-    }
-    void get(Scalar* p, int sx, uint shape) const
-    {
-      if (!shape)
-        get(p, sx);
-      else {
-        // determine block dimensions
-        uint nx = 4 - (shape & 3u); shape >>= 2;
-        const Scalar* q = a;
-        for (uint x = 0; x < nx; x++, p += sx, q++)
-          *p = *q;
-      }
-    }
-  protected:
-    static uint index(uint i) { return i & 3u; }
-    Scalar a[4];
-  };
-
-  // perform a deep copy
-  void deep_copy(const array1& a)
-  {
-    // copy base class members
-    array::deep_copy(a);
-    // copy cache
-    cache = a.cache;
-  }
-
-  // inspector
-  Scalar get(uint i) const
-  {
-    const CacheLine* p = line(i, false);
-    return (*p)(i);
-  }
-
-  // mutator
-  void set(uint i, Scalar val)
-  {
-    CacheLine* p = line(i, true);
-    (*p)(i) = val;
-  }
-
-  // in-place updates
-  void add(uint i, Scalar val) { (*line(i, true))(i) += val; }
-  void sub(uint i, Scalar val) { (*line(i, true))(i) -= val; }
-  void mul(uint i, Scalar val) { (*line(i, true))(i) *= val; }
-  void div(uint i, Scalar val) { (*line(i, true))(i) /= val; }
-
-  // return cache line for i; may require write-back and fetch
-  CacheLine* line(uint i, bool write) const
-  {
-    CacheLine* p = 0;
-    uint b = block(i);
-    typename zfp::Cache<CacheLine>::Tag t = cache.access(p, b + 1, write);
-    uint c = t.index() - 1;
-    if (c != b) {
-      // write back occupied cache line if it is dirty
-      if (t.dirty())
-        encode(c, p->data());
-      // fetch cache line
-      decode(b, p->data());
-    }
-    return p;
-  }
-
-  // encode block with given index
-  void encode(uint index, const Scalar* block) const
-  {
-    stream_wseek(zfp->stream, index * blkbits);
-    Codec::encode_block_1(zfp, block, shape ? shape[index] : 0);
-    stream_flush(zfp->stream);
-  }
-
-  // encode block with given index from strided array
-  void encode(uint index, const Scalar* p, int sx) const
-  {
-    stream_wseek(zfp->stream, index * blkbits);
-    Codec::encode_block_strided_1(zfp, p, shape ? shape[index] : 0, sx);
-    stream_flush(zfp->stream);
-  }
-
-  // decode block with given index
-  void decode(uint index, Scalar* block) const
-  {
-    stream_rseek(zfp->stream, index * blkbits);
-    Codec::decode_block_1(zfp, block, shape ? shape[index] : 0);
-  }
-
-  // decode block with given index to strided array
-  void decode(uint index, Scalar* p, int sx) const
-  {
-    stream_rseek(zfp->stream, index * blkbits);
-    Codec::decode_block_strided_1(zfp, p, shape ? shape[index] : 0, sx);
-  }
-
-  // block index for i
-  static uint block(uint i) { return i / 4; }
-
-  // number of cache lines corresponding to size (or suggested size if zero)
-  static uint lines(size_t size, uint n)
-  {
-    n = size ? (size + sizeof(CacheLine) - 1) / sizeof(CacheLine) : array::lines(size_t((n + 3) / 4));
-    return std::max(n, 1u);
-  }
-
-  mutable zfp::Cache<CacheLine> cache; // cache of decompressed blocks
-};
-
-typedef array1<float> array1f;
-typedef array1<double> array1d;
-
-}
-
-#endif
diff --git a/array/zfparray2.h b/array/zfparray2.h
deleted file mode 100644
index 73dfaa8d..00000000
--- a/array/zfparray2.h
+++ /dev/null
@@ -1,324 +0,0 @@
-#ifndef ZFP_ARRAY2_H
-#define ZFP_ARRAY2_H
-
-#include <cstddef>
-#include <iterator>
-#include <cstring>
-#include "zfparray.h"
-#include "zfpcodec.h"
-#include "zfp/cache.h"
-
-namespace zfp {
-
-// compressed 2D array of scalars
-template < typename Scalar, class Codec = zfp::codec<Scalar> >
-class array2 : public array {
-public:
-  // forward declarations
-  class reference;
-  class pointer;
-  class iterator;
-  class view;
-  #include "zfp/reference2.h"
-  #include "zfp/pointer2.h"
-  #include "zfp/iterator2.h"
-  #include "zfp/view2.h"
-
-  // default constructor
-  array2() : array(2, Codec::type) {}
-
-  // constructor of nx * ny array using rate bits per value, at least
-  // csize bytes of cache, and optionally initialized from flat array p
-  array2(uint nx, uint ny, double rate, const Scalar* p = 0, size_t csize = 0) :
-    array(2, Codec::type),
-    cache(lines(csize, nx, ny))
-  {
-    set_rate(rate);
-    resize(nx, ny, p == 0);
-    if (p)
-      set(p);
-  }
-
-  // constructor, from previously-serialized compressed array
-  array2(const zfp::array::header& h, const uchar* buffer = 0, size_t buffer_size_bytes = 0) :
-    array(2, Codec::type, h, buffer_size_bytes)
-  {
-    resize(nx, ny, false);
-    if (buffer)
-      memcpy(data, buffer, bytes);
-  }
-
-  // copy constructor--performs a deep copy
-  array2(const array2& a) :
-    array()
-  {
-    deep_copy(a);
-  }
-
-  // construction from view--perform deep copy of (sub)array
-  template <class View>
-  array2(const View& v) :
-    array(2, Codec::type),
-    cache(lines(0, v.size_x(), v.size_y()))
-  {
-    set_rate(v.rate());
-    resize(v.size_x(), v.size_y(), true);
-    // initialize array in its preferred order
-    for (iterator it = begin(); it != end(); ++it)
-      *it = v(it.i(), it.j());
-  }
-
-  // virtual destructor
-  virtual ~array2() {}
-
-  // assignment operator--performs a deep copy
-  array2& operator=(const array2& a)
-  {
-    if (this != &a)
-      deep_copy(a);
-    return *this;
-  }
-
-  // total number of elements in array
-  size_t size() const { return size_t(nx) * size_t(ny); }
-
-  // array dimensions
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-
-  // resize the array (all previously stored data will be lost)
-  void resize(uint nx, uint ny, bool clear = true)
-  {
-    if (nx == 0 || ny == 0)
-      free();
-    else {
-      this->nx = nx;
-      this->ny = ny;
-      bx = (nx + 3) / 4;
-      by = (ny + 3) / 4;
-      blocks = bx * by;
-      alloc(clear);
-
-      // precompute block dimensions
-      zfp::deallocate(shape);
-      if ((nx | ny) & 3u) {
-        shape = (uchar*)zfp::allocate(blocks);
-        uchar* p = shape;
-        for (uint j = 0; j < by; j++)
-          for (uint i = 0; i < bx; i++)
-            *p++ = (i == bx - 1 ? -nx & 3u : 0) + 4 * (j == by - 1 ? -ny & 3u : 0);
-      }
-      else
-        shape = 0;
-    }
-  }
-
-  // cache size in number of bytes
-  size_t cache_size() const { return cache.size() * sizeof(CacheLine); }
-
-  // set minimum cache size in bytes (array dimensions must be known)
-  void set_cache_size(size_t csize)
-  {
-    flush_cache();
-    cache.resize(lines(csize, nx, ny));
-  }
-
-  // empty cache without compressing modified cached blocks
-  void clear_cache() const { cache.clear(); }
-
-  // flush cache by compressing all modified cached blocks
-  void flush_cache() const
-  {
-    for (typename zfp::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
-      if (p->tag.dirty()) {
-        uint b = p->tag.index() - 1;
-        encode(b, p->line->data());
-      }
-      cache.flush(p->line);
-    }
-  }
-
-  // decompress array and store at p
-  void get(Scalar* p) const
-  {
-    uint b = 0;
-    for (uint j = 0; j < by; j++, p += 4 * (nx - bx))
-      for (uint i = 0; i < bx; i++, p += 4, b++) {
-        const CacheLine* line = cache.lookup(b + 1);
-        if (line)
-          line->get(p, 1, nx, shape ? shape[b] : 0);
-        else
-          decode(b, p, 1, nx);
-      }
-  }
-
-  // initialize array by copying and compressing data stored at p
-  void set(const Scalar* p)
-  {
-    uint b = 0;
-    for (uint j = 0; j < by; j++, p += 4 * (nx - bx))
-      for (uint i = 0; i < bx; i++, p += 4, b++)
-        encode(b, p, 1, nx);
-    cache.clear();
-  }
-
-  // (i, j) accessors
-  Scalar operator()(uint i, uint j) const { return get(i, j); }
-  reference operator()(uint i, uint j) { return reference(this, i, j); }
-
-  // flat index accessors
-  Scalar operator[](uint index) const
-  {
-    uint i, j;
-    ij(i, j, index);
-    return get(i, j);
-  }
-  reference operator[](uint index)
-  {
-    uint i, j;
-    ij(i, j, index);
-    return reference(this, i, j);
-  }
-
-  // sequential iterators
-  iterator begin() { return iterator(this, 0, 0); }
-  iterator end() { return iterator(this, 0, ny); }
-
-protected:
-  // cache line representing one block of decompressed values
-  class CacheLine {
-  public:
-    Scalar operator()(uint i, uint j) const { return a[index(i, j)]; }
-    Scalar& operator()(uint i, uint j) { return a[index(i, j)]; }
-    const Scalar* data() const { return a; }
-    Scalar* data() { return a; }
-    // copy cache line
-    void get(Scalar* p, int sx, int sy) const
-    {
-      const Scalar* q = a;
-      for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
-        for (uint x = 0; x < 4; x++, p += sx, q++)
-          *p = *q;
-    }
-    void get(Scalar* p, int sx, int sy, uint shape) const
-    {
-      if (!shape)
-        get(p, sx, sy);
-      else {
-        // determine block dimensions
-        uint nx = 4 - (shape & 3u); shape >>= 2;
-        uint ny = 4 - (shape & 3u); shape >>= 2;
-        const Scalar* q = a;
-        for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
-          for (uint x = 0; x < nx; x++, p += sx, q++)
-            *p = *q;
-      }
-    }
-  protected:
-    static uint index(uint i, uint j) { return (i & 3u) + 4 * (j & 3u); }
-    Scalar a[16];
-  };
-
-  // perform a deep copy
-  void deep_copy(const array2& a)
-  {
-    // copy base class members
-    array::deep_copy(a);
-    // copy cache
-    cache = a.cache;
-  }
-
-  // inspector
-  Scalar get(uint i, uint j) const
-  {
-    const CacheLine* p = line(i, j, false);
-    return (*p)(i, j);
-  }
-
-  // mutator
-  void set(uint i, uint j, Scalar val)
-  {
-    CacheLine* p = line(i, j, true);
-    (*p)(i, j) = val;
-  }
-
-  // in-place updates
-  void add(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) += val; }
-  void sub(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) -= val; }
-  void mul(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) *= val; }
-  void div(uint i, uint j, Scalar val) { (*line(i, j, true))(i, j) /= val; }
-
-  // return cache line for (i, j); may require write-back and fetch
-  CacheLine* line(uint i, uint j, bool write) const
-  {
-    CacheLine* p = 0;
-    uint b = block(i, j);
-    typename zfp::Cache<CacheLine>::Tag t = cache.access(p, b + 1, write);
-    uint c = t.index() - 1;
-    if (c != b) {
-      // write back occupied cache line if it is dirty
-      if (t.dirty())
-        encode(c, p->data());
-      // fetch cache line
-      decode(b, p->data());
-    }
-    return p;
-  }
-
-  // encode block with given index
-  void encode(uint index, const Scalar* block) const
-  {
-    stream_wseek(zfp->stream, index * blkbits);
-    Codec::encode_block_2(zfp, block, shape ? shape[index] : 0);
-    stream_flush(zfp->stream);
-  }
-
-  // encode block with given index from strided array
-  void encode(uint index, const Scalar* p, int sx, int sy) const
-  {
-    stream_wseek(zfp->stream, index * blkbits);
-    Codec::encode_block_strided_2(zfp, p, shape ? shape[index] : 0, sx, sy);
-    stream_flush(zfp->stream);
-  }
-
-  // decode block with given index
-  void decode(uint index, Scalar* block) const
-  {
-    stream_rseek(zfp->stream, index * blkbits);
-    Codec::decode_block_2(zfp, block, shape ? shape[index] : 0);
-  }
-
-  // decode block with given index to strided array
-  void decode(uint index, Scalar* p, int sx, int sy) const
-  {
-    stream_rseek(zfp->stream, index * blkbits);
-    Codec::decode_block_strided_2(zfp, p, shape ? shape[index] : 0, sx, sy);
-  }
-
-  // block index for (i, j)
-  uint block(uint i, uint j) const { return (i / 4) + bx * (j / 4); }
-
-  // convert flat index to (i, j)
-  void ij(uint& i, uint& j, uint index) const
-  {
-    i = index % nx;
-    index /= nx;
-    j = index;
-  }
-
-  // number of cache lines corresponding to size (or suggested size if zero)
-  static uint lines(size_t size, uint nx, uint ny)
-  {
-    uint n = size ? uint((size + sizeof(CacheLine) - 1) / sizeof(CacheLine)) : array::lines(size_t((nx + 3) / 4) * size_t((ny + 3) / 4));
-    return std::max(n, 1u);
-  }
-
-  mutable zfp::Cache<CacheLine> cache; // cache of decompressed blocks
-};
-
-typedef array2<float> array2f;
-typedef array2<double> array2d;
-
-}
-
-#endif
diff --git a/array/zfparray3.h b/array/zfparray3.h
deleted file mode 100644
index f0f42e88..00000000
--- a/array/zfparray3.h
+++ /dev/null
@@ -1,338 +0,0 @@
-#ifndef ZFP_ARRAY3_H
-#define ZFP_ARRAY3_H
-
-#include <cstddef>
-#include <iterator>
-#include <cstring>
-#include "zfparray.h"
-#include "zfpcodec.h"
-#include "zfp/cache.h"
-
-namespace zfp {
-
-// compressed 3D array of scalars
-template < typename Scalar, class Codec = zfp::codec<Scalar> >
-class array3 : public array {
-public:
-  // forward declarations
-  class reference;
-  class pointer;
-  class iterator;
-  class view;
-  #include "zfp/reference3.h"
-  #include "zfp/pointer3.h"
-  #include "zfp/iterator3.h"
-  #include "zfp/view3.h"
-
-  // default constructor
-  array3() : array(3, Codec::type) {}
-
-  // constructor of nx * ny * nz array using rate bits per value, at least
-  // csize bytes of cache, and optionally initialized from flat array p
-  array3(uint nx, uint ny, uint nz, double rate, const Scalar* p = 0, size_t csize = 0) :
-    array(3, Codec::type),
-    cache(lines(csize, nx, ny, nz))
-  {
-    set_rate(rate);
-    resize(nx, ny, nz, p == 0);
-    if (p)
-      set(p);
-  }
-
-  // constructor, from previously-serialized compressed array
-  array3(const zfp::array::header& h, const uchar* buffer = 0, size_t buffer_size_bytes = 0) :
-    array(3, Codec::type, h, buffer_size_bytes)
-  {
-    resize(nx, ny, nz, false);
-    if (buffer)
-      memcpy(data, buffer, bytes);
-  }
-
-  // copy constructor--performs a deep copy
-  array3(const array3& a) :
-    array()
-  {
-    deep_copy(a);
-  }
-
-  // construction from view--perform deep copy of (sub)array
-  template <class View>
-  array3(const View& v) :
-    array(3, Codec::type),
-    cache(lines(0, v.size_x(), v.size_y(), v.size_z()))
-  {
-    set_rate(v.rate());
-    resize(v.size_x(), v.size_y(), v.size_z(), true);
-    // initialize array in its preferred order
-    for (iterator it = begin(); it != end(); ++it)
-      *it = v(it.i(), it.j(), it.k());
-  }
-
-  // virtual destructor
-  virtual ~array3() {}
-
-  // assignment operator--performs a deep copy
-  array3& operator=(const array3& a)
-  {
-    if (this != &a)
-      deep_copy(a);
-    return *this;
-  }
-
-  // total number of elements in array
-  size_t size() const { return size_t(nx) * size_t(ny) * size_t(nz); }
-
-  // array dimensions
-  uint size_x() const { return nx; }
-  uint size_y() const { return ny; }
-  uint size_z() const { return nz; }
-
-  // resize the array (all previously stored data will be lost)
-  void resize(uint nx, uint ny, uint nz, bool clear = true)
-  {
-    if (nx == 0 || ny == 0 || nz == 0)
-      free();
-    else {
-      this->nx = nx;
-      this->ny = ny;
-      this->nz = nz;
-      bx = (nx + 3) / 4;
-      by = (ny + 3) / 4;
-      bz = (nz + 3) / 4;
-      blocks = bx * by * bz;
-      alloc(clear);
-
-      // precompute block dimensions
-      zfp::deallocate(shape);
-      if ((nx | ny | nz) & 3u) {
-        shape = (uchar*)zfp::allocate(blocks);
-        uchar* p = shape;
-        for (uint k = 0; k < bz; k++)
-          for (uint j = 0; j < by; j++)
-            for (uint i = 0; i < bx; i++)
-              *p++ = (i == bx - 1 ? -nx & 3u : 0) + 4 * ((j == by - 1 ? -ny & 3u : 0) + 4 * (k == bz - 1 ? -nz & 3u : 0));
-      }
-      else
-        shape = 0;
-    }
-  }
-
-  // cache size in number of bytes
-  size_t cache_size() const { return cache.size() * sizeof(CacheLine); }
-
-  // set minimum cache size in bytes (array dimensions must be known)
-  void set_cache_size(size_t csize)
-  {
-    flush_cache();
-    cache.resize(lines(csize, nx, ny, nz));
-  }
-
-  // empty cache without compressing modified cached blocks
-  void clear_cache() const { cache.clear(); }
-
-  // flush cache by compressing all modified cached blocks
-  void flush_cache() const
-  {
-    for (typename zfp::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
-      if (p->tag.dirty()) {
-        uint b = p->tag.index() - 1;
-        encode(b, p->line->data());
-      }
-      cache.flush(p->line);
-    }
-  }
-
-  // decompress array and store at p
-  void get(Scalar* p) const
-  {
-    uint b = 0;
-    for (uint k = 0; k < bz; k++, p += 4 * nx * (ny - by))
-      for (uint j = 0; j < by; j++, p += 4 * (nx - bx))
-        for (uint i = 0; i < bx; i++, p += 4, b++) {
-          const CacheLine* line = cache.lookup(b + 1);
-          if (line)
-            line->get(p, 1, nx, nx * ny, shape ? shape[b] : 0);
-          else
-            decode(b, p, 1, nx, nx * ny);
-        }
-  }
-
-  // initialize array by copying and compressing data stored at p
-  void set(const Scalar* p)
-  {
-    uint b = 0;
-    for (uint k = 0; k < bz; k++, p += 4 * nx * (ny - by))
-      for (uint j = 0; j < by; j++, p += 4 * (nx - bx))
-        for (uint i = 0; i < bx; i++, p += 4, b++)
-          encode(b, p, 1, nx, nx * ny);
-    cache.clear();
-  }
-
-  // (i, j, k) accessors
-  Scalar operator()(uint i, uint j, uint k) const { return get(i, j, k); }
-  reference operator()(uint i, uint j, uint k) { return reference(this, i, j, k); }
-
-  // flat index corresponding to (i, j, k)
-  uint index(uint i, uint j, uint k) const { return i + nx * (j + ny * k); }
-
-  // flat index accessors
-  Scalar operator[](uint index) const
-  {
-    uint i, j, k;
-    ijk(i, j, k, index);
-    return get(i, j, k);
-  }
-  reference operator[](uint index)
-  {
-    uint i, j, k;
-    ijk(i, j, k, index);
-    return reference(this, i, j, k);
-  }
-
-  // sequential iterators
-  iterator begin() { return iterator(this, 0, 0, 0); }
-  iterator end() { return iterator(this, 0, 0, nz); }
-
-protected:
-  // cache line representing one block of decompressed values
-  class CacheLine {
-  public:
-    Scalar operator()(uint i, uint j, uint k) const { return a[index(i, j, k)]; }
-    Scalar& operator()(uint i, uint j, uint k) { return a[index(i, j, k)]; }
-    const Scalar* data() const { return a; }
-    Scalar* data() { return a; }
-    // copy cache line
-    void get(Scalar* p, int sx, int sy, int sz) const
-    {
-      const Scalar* q = a;
-      for (uint z = 0; z < 4; z++, p += sz - 4 * sy)
-        for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
-          for (uint x = 0; x < 4; x++, p += sx, q++)
-            *p = *q;
-    }
-    void get(Scalar* p, int sx, int sy, int sz, uint shape) const
-    {
-      if (!shape)
-        get(p, sx, sy, sz);
-      else {
-        // determine block dimensions
-        uint nx = 4 - (shape & 3u); shape >>= 2;
-        uint ny = 4 - (shape & 3u); shape >>= 2;
-        uint nz = 4 - (shape & 3u); shape >>= 2;
-        const Scalar* q = a;
-        for (uint z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
-          for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
-            for (uint x = 0; x < nx; x++, p += sx, q++)
-              *p = *q;
-      }
-    }
-  protected:
-    static uint index(uint i, uint j, uint k) { return (i & 3u) + 4 * ((j & 3u) + 4 * (k & 3u)); }
-    Scalar a[64];
-  };
-
-  // perform a deep copy
-  void deep_copy(const array3& a)
-  {
-    // copy base class members
-    array::deep_copy(a);
-    // copy cache
-    cache = a.cache;
-  }
-
-  // inspector
-  Scalar get(uint i, uint j, uint k) const
-  {
-    const CacheLine* p = line(i, j, k, false);
-    return (*p)(i, j, k);
-  }
-
-  // mutator
-  void set(uint i, uint j, uint k, Scalar val)
-  {
-    CacheLine* p = line(i, j, k, true);
-    (*p)(i, j, k) = val;
-  }
-
-  // in-place updates
-  void add(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) += val; }
-  void sub(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) -= val; }
-  void mul(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) *= val; }
-  void div(uint i, uint j, uint k, Scalar val) { (*line(i, j, k, true))(i, j, k) /= val; }
-
-  // return cache line for (i, j, k); may require write-back and fetch
-  CacheLine* line(uint i, uint j, uint k, bool write) const
-  {
-    CacheLine* p = 0;
-    uint b = block(i, j, k);
-    typename zfp::Cache<CacheLine>::Tag t = cache.access(p, b + 1, write);
-    uint c = t.index() - 1;
-    if (c != b) {
-      // write back occupied cache line if it is dirty
-      if (t.dirty())
-        encode(c, p->data());
-      // fetch cache line
-      decode(b, p->data());
-    }
-    return p;
-  }
-
-  // encode block with given index
-  void encode(uint index, const Scalar* block) const
-  {
-    stream_wseek(zfp->stream, index * blkbits);
-    Codec::encode_block_3(zfp, block, shape ? shape[index] : 0);
-    stream_flush(zfp->stream);
-  }
-
-  // encode block with given index from strided array
-  void encode(uint index, const Scalar* p, int sx, int sy, int sz) const
-  {
-    stream_wseek(zfp->stream, index * blkbits);
-    Codec::encode_block_strided_3(zfp, p, shape ? shape[index] : 0, sx, sy, sz);
-    stream_flush(zfp->stream);
-  }
-
-  // decode block with given index
-  void decode(uint index, Scalar* block) const
-  {
-    stream_rseek(zfp->stream, index * blkbits);
-    Codec::decode_block_3(zfp, block, shape ? shape[index] : 0);
-  }
-
-  // decode block with given index to strided array
-  void decode(uint index, Scalar* p, int sx, int sy, int sz) const
-  {
-    stream_rseek(zfp->stream, index * blkbits);
-    Codec::decode_block_strided_3(zfp, p, shape ? shape[index] : 0, sx, sy, sz);
-  }
-
-  // block index for (i, j, k)
-  uint block(uint i, uint j, uint k) const { return (i / 4) + bx * ((j / 4) + by * (k / 4)); }
-
-  // convert flat index to (i, j, k)
-  void ijk(uint& i, uint& j, uint& k, uint index) const
-  {
-    i = index % nx;
-    index /= nx;
-    j = index % ny;
-    index /= ny;
-    k = index;
-  }
-
-  // number of cache lines corresponding to size (or suggested size if zero)
-  static uint lines(size_t size, uint nx, uint ny, uint nz)
-  {
-    uint n = size ? (size + sizeof(CacheLine) - 1) / sizeof(CacheLine) : array::lines(size_t((nx + 3) / 4) * size_t((ny + 3) / 4) * size_t((nz + 3) / 4));
-    return std::max(n, 1u);
-  }
-
-  mutable zfp::Cache<CacheLine> cache; // cache of decompressed blocks
-};
-
-typedef array3<float> array3f;
-typedef array3<double> array3d;
-
-}
-
-#endif
diff --git a/array/zfpcodec.h b/array/zfpcodec.h
deleted file mode 100644
index 2d467444..00000000
--- a/array/zfpcodec.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef ZFP_CODEC_H
-#define ZFP_CODEC_H
-
-#include "zfp.h"
-
-namespace zfp {
-
-// C++ wrappers around libzfp C functions
-template <typename Scalar>
-struct codec {};
-
-#include "zfpcodecf.h"
-#include "zfpcodecd.h"
-
-}
-
-#endif
diff --git a/array/zfpcodecd.h b/array/zfpcodecd.h
deleted file mode 100644
index 9e7d8932..00000000
--- a/array/zfpcodecd.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// double-precision codec
-template <>
-struct codec<double> {
-  // encode contiguous 1D block
-  static void encode_block_1(zfp_stream* zfp, const double* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_double_1(zfp, block, nx, 1);
-    }
-    else
-      zfp_encode_block_double_1(zfp, block);
-  }
-
-  // encode 1D block from strided storage
-  static void encode_block_strided_1(zfp_stream* zfp, const double* p, uint shape, int sx)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_double_1(zfp, p, nx, sx);
-    }
-    else
-      zfp_encode_block_strided_double_1(zfp, p, sx);
-  }
-
-  // encode contiguous 2D block
-  static void encode_block_2(zfp_stream* zfp, const double* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_double_2(zfp, block, nx, ny, 1, 4);
-    }
-    else
-      zfp_encode_block_double_2(zfp, block);
-  }
-
-  // encode 2D block from strided storage
-  static void encode_block_strided_2(zfp_stream* zfp, const double* p, uint shape, int sx, int sy)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_double_2(zfp, p, nx, ny, sx, sy);
-    }
-    else
-      zfp_encode_block_strided_double_2(zfp, p, sx, sy);
-  }
-
-  // encode contiguous 3D block
-  static void encode_block_3(zfp_stream* zfp, const double* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_double_3(zfp, block, nx, ny, nz, 1, 4, 16);
-    }
-    else
-      zfp_encode_block_double_3(zfp, block);
-  }
-
-  // encode 3D block from strided storage
-  static void encode_block_strided_3(zfp_stream* zfp, const double* p, uint shape, int sx, int sy, int sz)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_double_3(zfp, p, nx, ny, nz, sx, sy, sz);
-    }
-    else
-      zfp_encode_block_strided_double_3(zfp, p, sx, sy, sz);
-  }
-
-  // decode contiguous 1D block
-  static void decode_block_1(zfp_stream* zfp, double* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_double_1(zfp, block, nx, 1);
-    }
-    else
-      zfp_decode_block_double_1(zfp, block);
-  }
-
-  // decode 1D block to strided storage
-  static void decode_block_strided_1(zfp_stream* zfp, double* p, uint shape, int sx)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_double_1(zfp, p, nx, sx);
-    }
-    else
-      zfp_decode_block_strided_double_1(zfp, p, sx);
-  }
-
-  // decode contiguous 2D block
-  static void decode_block_2(zfp_stream* zfp, double* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_double_2(zfp, block, nx, ny, 1, 4);
-    }
-    else
-      zfp_decode_block_double_2(zfp, block);
-  }
-
-  // decode 2D block to strided storage
-  static void decode_block_strided_2(zfp_stream* zfp, double* p, uint shape, int sx, int sy)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_double_2(zfp, p, nx, ny, sx, sy);
-    }
-    else
-      zfp_decode_block_strided_double_2(zfp, p, sx, sy);
-  }
-
-  // decode contiguous 3D block
-  static void decode_block_3(zfp_stream* zfp, double* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_double_3(zfp, block, nx, ny, nz, 1, 4, 16);
-    }
-    else
-      zfp_decode_block_double_3(zfp, block);
-  }
-
-  // decode 3D block to strided storage
-  static void decode_block_strided_3(zfp_stream* zfp, double* p, uint shape, int sx, int sy, int sz)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_double_3(zfp, p, nx, ny, nz, sx, sy, sz);
-    }
-    else
-      zfp_decode_block_strided_double_3(zfp, p, sx, sy, sz);
-  }
-
-  static const zfp_type type = zfp_type_double;
-};
diff --git a/array/zfpcodecf.h b/array/zfpcodecf.h
deleted file mode 100644
index 1ec74a60..00000000
--- a/array/zfpcodecf.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// single-precision codec
-template <>
-struct codec<float> {
-  // encode contiguous 1D block
-  static void encode_block_1(zfp_stream* zfp, const float* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_float_1(zfp, block, nx, 1);
-    }
-    else
-      zfp_encode_block_float_1(zfp, block);
-  }
-
-  // encode 1D block from strided storage
-  static void encode_block_strided_1(zfp_stream* zfp, const float* p, uint shape, int sx)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_float_1(zfp, p, nx, sx);
-    }
-    else
-      zfp_encode_block_strided_float_1(zfp, p, sx);
-  }
-
-  // encode contiguous 2D block
-  static void encode_block_2(zfp_stream* zfp, const float* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_float_2(zfp, block, nx, ny, 1, 4);
-    }
-    else
-      zfp_encode_block_float_2(zfp, block);
-  }
-
-  // encode 2D block from strided storage
-  static void encode_block_strided_2(zfp_stream* zfp, const float* p, uint shape, int sx, int sy)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_float_2(zfp, p, nx, ny, sx, sy);
-    }
-    else
-      zfp_encode_block_strided_float_2(zfp, p, sx, sy);
-  }
-
-  // encode contiguous 3D block
-  static void encode_block_3(zfp_stream* zfp, const float* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_float_3(zfp, block, nx, ny, nz, 1, 4, 16);
-    }
-    else
-      zfp_encode_block_float_3(zfp, block);
-  }
-
-  // encode 3D block from strided storage
-  static void encode_block_strided_3(zfp_stream* zfp, const float* p, uint shape, int sx, int sy, int sz)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_encode_partial_block_strided_float_3(zfp, p, nx, ny, nz, sx, sy, sz);
-    }
-    else
-      zfp_encode_block_strided_float_3(zfp, p, sx, sy, sz);
-  }
-
-  // decode contiguous 1D block
-  static void decode_block_1(zfp_stream* zfp, float* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_float_1(zfp, block, nx, 1);
-    }
-    else
-      zfp_decode_block_float_1(zfp, block);
-  }
-
-  // decode 1D block to strided storage
-  static void decode_block_strided_1(zfp_stream* zfp, float* p, uint shape, int sx)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_float_1(zfp, p, nx, sx);
-    }
-    else
-      zfp_decode_block_strided_float_1(zfp, p, sx);
-  }
-
-  // decode contiguous 2D block
-  static void decode_block_2(zfp_stream* zfp, float* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_float_2(zfp, block, nx, ny, 1, 4);
-    }
-    else
-      zfp_decode_block_float_2(zfp, block);
-  }
-
-  // decode 2D block to strided storage
-  static void decode_block_strided_2(zfp_stream* zfp, float* p, uint shape, int sx, int sy)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_float_2(zfp, p, nx, ny, sx, sy);
-    }
-    else
-      zfp_decode_block_strided_float_2(zfp, p, sx, sy);
-  }
-
-  // decode contiguous 3D block
-  static void decode_block_3(zfp_stream* zfp, float* block, uint shape)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_float_3(zfp, block, nx, ny, nz, 1, 4, 16);
-    }
-    else
-      zfp_decode_block_float_3(zfp, block);
-  }
-
-  // decode 3D block to strided storage
-  static void decode_block_strided_3(zfp_stream* zfp, float* p, uint shape, int sx, int sy, int sz)
-  {
-    if (shape) {
-      uint nx = 4 - (shape & 3u); shape >>= 2;
-      uint ny = 4 - (shape & 3u); shape >>= 2;
-      uint nz = 4 - (shape & 3u); shape >>= 2;
-      zfp_decode_partial_block_strided_float_3(zfp, p, nx, ny, nz, sx, sy, sz);
-    }
-    else
-      zfp_decode_block_strided_float_3(zfp, p, sx, sy, sz);
-  }
-
-  static const zfp_type type = zfp_type_float;
-};
diff --git a/array/zfpfactory.h b/array/zfpfactory.h
deleted file mode 100644
index 44910bd2..00000000
--- a/array/zfpfactory.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef ZFP_FACTORY_H
-#define ZFP_FACTORY_H
-
-// (assumes zfparray.h already included)
-
-zfp::array* zfp::array::construct(const zfp::array::header& header, const uchar* buffer, size_t buffer_size_bytes)
-{
-  // gather array metadata via C API, then construct with metadata
-  uint dims = 0;
-  zfp_type type = zfp_type_none;
-  double rate = 0;
-  uint n[4] = {0};
-
-  // read once (will throw if reads a noncompatible header)
-  zfp::array::read_header_contents(header, buffer_size_bytes, dims, type, rate, n);
-
-  // construct once (passing zfp::array::header will read it again)
-  zfp::array* arr = 0;
-  std::string err_msg = "";
-  switch (dims) {
-    case 3:
-#ifdef ZFP_ARRAY3_H
-      switch (type) {
-        case zfp_type_double:
-          arr = new zfp::array3d(n[0], n[1], n[2], rate);
-          break;
-
-        case zfp_type_float:
-          arr = new zfp::array3f(n[0], n[1], n[2], rate);
-          break;
-
-        default:
-          /* NOTREACHED */
-          err_msg = "Unexpected ZFP type.";
-          break;
-      }
-#else
-      err_msg = "Header files for 3 dimensional ZFP compressed arrays were not included.";
-#endif
-      break;
-
-    case 2:
-#ifdef ZFP_ARRAY2_H
-      switch (type) {
-        case zfp_type_double:
-          arr = new zfp::array2d(n[0], n[1], rate);
-          break;
-
-        case zfp_type_float:
-          arr = new zfp::array2f(n[0], n[1], rate);
-          break;
-
-        default:
-          /* NOTREACHED */
-          err_msg = "Unexpected ZFP type.";
-          break;
-      }
-#else
-      err_msg = "Header files for 2 dimensional ZFP compressed arrays were not included.";
-#endif
-      break;
-
-    case 1:
-#ifdef ZFP_ARRAY1_H
-      switch (type) {
-        case zfp_type_double:
-          arr = new zfp::array1d(n[0], rate);
-          break;
-
-        case zfp_type_float:
-          arr = new zfp::array1f(n[0], rate);
-          break;
-
-        default:
-          /* NOTREACHED */
-          err_msg = "Unexpected ZFP type.";
-          break;
-      }
-#else
-      err_msg = "Header files for 1 dimensional ZFP compressed arrays were not included.";
-#endif
-      break;
-
-    default:
-      err_msg = "ZFP compressed arrays do not yet support dimensionalities beyond 1, 2, and 3.";
-      break;
-  }
-
-  if (!err_msg.empty())
-    throw zfp::array::header::exception(err_msg);
-
-  if (buffer)
-    memcpy(arr->compressed_data(), buffer, arr->compressed_size());
-
-  return arr;
-}
-
-#endif
diff --git a/cfp/CMakeLists.txt b/cfp/CMakeLists.txt
index febd4f0a..3d8af6ec 100644
--- a/cfp/CMakeLists.txt
+++ b/cfp/CMakeLists.txt
@@ -1 +1,36 @@
-add_subdirectory(src)
+add_library(cfp cfp.cpp)
+
+if(DEFINED CFP_NAMESPACE)
+  list(APPEND cfp_public_defs "CFP_NAMESPACE=${CFP_NAMESPACE}")
+endif()
+
+list(APPEND cfp_private_defs ${zfp_compressed_array_defs})
+
+if(WIN32 AND BUILD_SHARED_LIBS)
+  # define ZFP_SOURCE when compiling libcfp to export symbols to Windows DLL
+  list(APPEND cfp_public_defs ZFP_SHARED_LIBS)
+  list(APPEND cfp_private_defs ZFP_SOURCE)
+endif()
+
+target_compile_definitions(cfp
+  PUBLIC ${cfp_public_defs}
+  PRIVATE ${cfp_private_defs})
+
+target_include_directories(cfp
+  PUBLIC
+    $<BUILD_INTERFACE:${ZFP_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+  PRIVATE
+    ${ZFP_SOURCE_DIR}/src
+)
+
+target_link_libraries(cfp zfp)
+
+set_property(TARGET cfp PROPERTY VERSION ${ZFP_VERSION})
+set_property(TARGET cfp PROPERTY SOVERSION ${ZFP_VERSION_MAJOR})
+set_property(TARGET cfp PROPERTY OUTPUT_NAME ${ZFP_LIBRARY_PREFIX}cfp)
+
+install(TARGETS cfp EXPORT cfp-targets
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/cfp/src/Makefile b/cfp/Makefile
similarity index 59%
rename from cfp/src/Makefile
rename to cfp/Makefile
index eef12ffc..37881a76 100644
--- a/cfp/src/Makefile
+++ b/cfp/Makefile
@@ -1,9 +1,9 @@
-include ../../Config
+include ../Config
 
-CXXFLAGS += -I../../include -I../../src -I../../array
-LIBDIR = ../../lib
+LIBDIR = ../lib
 TARGETS = $(LIBDIR)/libcfp.a $(LIBDIR)/libcfp.so
-OBJECTS = cfparrays.o
+OBJECTS = cfp.o
+INCS = -I../include -I../src
 
 static: $(LIBDIR)/libcfp.a
 
@@ -19,7 +19,7 @@ $(LIBDIR)/libcfp.a: $(OBJECTS)
 
 $(LIBDIR)/libcfp.so: $(OBJECTS)
 	mkdir -p $(LIBDIR)
-	$(CXX) $(CXXLAGS) -shared $(SOFLAGS) $^ -o $@
+	$(CXX) $(CXXFLAGS) -shared $(SOFLAGS) $^ -o $@
 
 .cpp.o:
-	$(CXX) $(CXXFLAGS) -c $<
+	$(CXX) $(CXXFLAGS) $(INCS) -c $<
diff --git a/cfp/cfp.cpp b/cfp/cfp.cpp
new file mode 100644
index 00000000..b360760f
--- /dev/null
+++ b/cfp/cfp.cpp
@@ -0,0 +1,868 @@
+#include "cfpheader.cpp"
+#include "zfp/array.h"
+
+#include "cfparray1f.cpp"
+#include "cfparray1d.cpp"
+#include "cfparray2f.cpp"
+#include "cfparray2d.cpp"
+#include "cfparray3f.cpp"
+#include "cfparray3d.cpp"
+#include "cfparray4f.cpp"
+#include "cfparray4d.cpp"
+
+const cfp_api CFP_NAMESPACE = {
+  // array1f
+  {
+    cfp_array1f_ctor_default,
+    cfp_array1f_ctor,
+    cfp_array1f_ctor_copy,
+    cfp_array1f_ctor_header,
+    cfp_array1f_dtor,
+
+    cfp_array1f_deep_copy,
+
+    cfp_array1f_rate,
+    cfp_array1f_set_rate,
+    cfp_array1f_cache_size,
+    cfp_array1f_set_cache_size,
+    cfp_array1f_clear_cache,
+    cfp_array1f_flush_cache,
+    cfp_array1f_size_bytes,
+    cfp_array1f_compressed_size,
+    cfp_array1f_compressed_data,
+    cfp_array1f_size,
+    cfp_array1f_resize,
+
+    cfp_array1f_get_array,
+    cfp_array1f_set_array,
+    cfp_array1f_get_flat,
+    cfp_array1f_set_flat,
+    cfp_array1f_get,
+    cfp_array1f_set,
+
+    cfp_array1f_ref,
+    cfp_array1f_ref_flat,
+
+    cfp_array1f_ptr,
+    cfp_array1f_ptr_flat,
+
+    cfp_array1f_begin,
+    cfp_array1f_end,
+
+    {
+      cfp_array1f_cfp_ref1f_get,
+      cfp_array1f_cfp_ref1f_set,
+      cfp_array1f_cfp_ref1f_ptr,
+      cfp_array1f_cfp_ref1f_copy,
+    },
+
+    {
+      cfp_array1f_cfp_ptr1f_get,
+      cfp_array1f_cfp_ptr1f_get_at,
+      cfp_array1f_cfp_ptr1f_set,
+      cfp_array1f_cfp_ptr1f_set_at,
+      cfp_array1f_cfp_ptr1f_ref,
+      cfp_array1f_cfp_ptr1f_ref_at,
+      cfp_array1f_cfp_ptr1f_lt,
+      cfp_array1f_cfp_ptr1f_gt,
+      cfp_array1f_cfp_ptr1f_leq,
+      cfp_array1f_cfp_ptr1f_geq,
+      cfp_array1f_cfp_ptr1f_eq,
+      cfp_array1f_cfp_ptr1f_neq,
+      cfp_array1f_cfp_ptr1f_distance,
+      cfp_array1f_cfp_ptr1f_next,
+      cfp_array1f_cfp_ptr1f_prev,
+      cfp_array1f_cfp_ptr1f_inc,
+      cfp_array1f_cfp_ptr1f_dec,
+    },
+
+    {
+      cfp_array1f_cfp_iter1f_get,
+      cfp_array1f_cfp_iter1f_get_at,
+      cfp_array1f_cfp_iter1f_set,
+      cfp_array1f_cfp_iter1f_set_at,
+      cfp_array1f_cfp_iter1f_ref,
+      cfp_array1f_cfp_iter1f_ref_at,
+      cfp_array1f_cfp_iter1f_ptr,
+      cfp_array1f_cfp_iter1f_ptr_at,
+      cfp_array1f_cfp_iter1f_i,
+      cfp_array1f_cfp_iter1f_lt,
+      cfp_array1f_cfp_iter1f_gt,
+      cfp_array1f_cfp_iter1f_leq,
+      cfp_array1f_cfp_iter1f_geq,
+      cfp_array1f_cfp_iter1f_eq,
+      cfp_array1f_cfp_iter1f_neq,
+      cfp_array1f_cfp_iter1f_distance,
+      cfp_array1f_cfp_iter1f_next,
+      cfp_array1f_cfp_iter1f_prev,
+      cfp_array1f_cfp_iter1f_inc,
+      cfp_array1f_cfp_iter1f_dec,
+    },
+
+    {
+      cfp_header_ctor_array1f,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array1d
+  {
+    cfp_array1d_ctor_default,
+    cfp_array1d_ctor,
+    cfp_array1d_ctor_copy,
+    cfp_array1d_ctor_header,
+    cfp_array1d_dtor,
+
+    cfp_array1d_deep_copy,
+
+    cfp_array1d_rate,
+    cfp_array1d_set_rate,
+    cfp_array1d_cache_size,
+    cfp_array1d_set_cache_size,
+    cfp_array1d_clear_cache,
+    cfp_array1d_flush_cache,
+    cfp_array1d_size_bytes,
+    cfp_array1d_compressed_size,
+    cfp_array1d_compressed_data,
+    cfp_array1d_size,
+    cfp_array1d_resize,
+
+    cfp_array1d_get_array,
+    cfp_array1d_set_array,
+    cfp_array1d_get_flat,
+    cfp_array1d_set_flat,
+    cfp_array1d_get,
+    cfp_array1d_set,
+
+    cfp_array1d_ref,
+    cfp_array1d_ref_flat,
+
+    cfp_array1d_ptr,
+    cfp_array1d_ptr_flat,
+
+    cfp_array1d_begin,
+    cfp_array1d_end,
+
+    {
+      cfp_array1d_cfp_ref1d_get,
+      cfp_array1d_cfp_ref1d_set,
+      cfp_array1d_cfp_ref1d_ptr,
+      cfp_array1d_cfp_ref1d_copy,
+    },
+
+    {
+      cfp_array1d_cfp_ptr1d_get,
+      cfp_array1d_cfp_ptr1d_get_at,
+      cfp_array1d_cfp_ptr1d_set,
+      cfp_array1d_cfp_ptr1d_set_at,
+      cfp_array1d_cfp_ptr1d_ref,
+      cfp_array1d_cfp_ptr1d_ref_at,
+      cfp_array1d_cfp_ptr1d_lt,
+      cfp_array1d_cfp_ptr1d_gt,
+      cfp_array1d_cfp_ptr1d_leq,
+      cfp_array1d_cfp_ptr1d_geq,
+      cfp_array1d_cfp_ptr1d_eq,
+      cfp_array1d_cfp_ptr1d_neq,
+      cfp_array1d_cfp_ptr1d_distance,
+      cfp_array1d_cfp_ptr1d_next,
+      cfp_array1d_cfp_ptr1d_prev,
+      cfp_array1d_cfp_ptr1d_inc,
+      cfp_array1d_cfp_ptr1d_dec,
+    },
+
+    {
+      cfp_array1d_cfp_iter1d_get,
+      cfp_array1d_cfp_iter1d_get_at,
+      cfp_array1d_cfp_iter1d_set,
+      cfp_array1d_cfp_iter1d_set_at,
+      cfp_array1d_cfp_iter1d_ref,
+      cfp_array1d_cfp_iter1d_ref_at,
+      cfp_array1d_cfp_iter1d_ptr,
+      cfp_array1d_cfp_iter1d_ptr_at,
+      cfp_array1d_cfp_iter1d_i,
+      cfp_array1d_cfp_iter1d_lt,
+      cfp_array1d_cfp_iter1d_gt,
+      cfp_array1d_cfp_iter1d_leq,
+      cfp_array1d_cfp_iter1d_geq,
+      cfp_array1d_cfp_iter1d_eq,
+      cfp_array1d_cfp_iter1d_neq,
+      cfp_array1d_cfp_iter1d_distance,
+      cfp_array1d_cfp_iter1d_next,
+      cfp_array1d_cfp_iter1d_prev,
+      cfp_array1d_cfp_iter1d_inc,
+      cfp_array1d_cfp_iter1d_dec,
+    },
+
+    {
+      cfp_header_ctor_array1d,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array2f
+  {
+    cfp_array2f_ctor_default,
+    cfp_array2f_ctor,
+    cfp_array2f_ctor_copy,
+    cfp_array2f_ctor_header,
+    cfp_array2f_dtor,
+
+    cfp_array2f_deep_copy,
+
+    cfp_array2f_rate,
+    cfp_array2f_set_rate,
+    cfp_array2f_cache_size,
+    cfp_array2f_set_cache_size,
+    cfp_array2f_clear_cache,
+    cfp_array2f_flush_cache,
+    cfp_array2f_size_bytes,
+    cfp_array2f_compressed_size,
+    cfp_array2f_compressed_data,
+    cfp_array2f_size,
+    cfp_array2f_size_x,
+    cfp_array2f_size_y,
+    cfp_array2f_resize,
+
+    cfp_array2f_get_array,
+    cfp_array2f_set_array,
+    cfp_array2f_get_flat,
+    cfp_array2f_set_flat,
+    cfp_array2f_get,
+    cfp_array2f_set,
+
+    cfp_array2f_ref,
+    cfp_array2f_ref_flat,
+
+    cfp_array2f_ptr,
+    cfp_array2f_ptr_flat,
+
+    cfp_array2f_begin,
+    cfp_array2f_end,
+
+    {
+      cfp_array2f_cfp_ref2f_get,
+      cfp_array2f_cfp_ref2f_set,
+      cfp_array2f_cfp_ref2f_ptr,
+      cfp_array2f_cfp_ref2f_copy,
+    },
+
+    {
+      cfp_array2f_cfp_ptr2f_get,
+      cfp_array2f_cfp_ptr2f_get_at,
+      cfp_array2f_cfp_ptr2f_set,
+      cfp_array2f_cfp_ptr2f_set_at,
+      cfp_array2f_cfp_ptr2f_ref,
+      cfp_array2f_cfp_ptr2f_ref_at,
+      cfp_array2f_cfp_ptr2f_lt,
+      cfp_array2f_cfp_ptr2f_gt,
+      cfp_array2f_cfp_ptr2f_leq,
+      cfp_array2f_cfp_ptr2f_geq,
+      cfp_array2f_cfp_ptr2f_eq,
+      cfp_array2f_cfp_ptr2f_neq,
+      cfp_array2f_cfp_ptr2f_distance,
+      cfp_array2f_cfp_ptr2f_next,
+      cfp_array2f_cfp_ptr2f_prev,
+      cfp_array2f_cfp_ptr2f_inc,
+      cfp_array2f_cfp_ptr2f_dec,
+    },
+
+    {
+      cfp_array2f_cfp_iter2f_get,
+      cfp_array2f_cfp_iter2f_get_at,
+      cfp_array2f_cfp_iter2f_set,
+      cfp_array2f_cfp_iter2f_set_at,
+      cfp_array2f_cfp_iter2f_ref,
+      cfp_array2f_cfp_iter2f_ref_at,
+      cfp_array2f_cfp_iter2f_ptr,
+      cfp_array2f_cfp_iter2f_ptr_at,
+      cfp_array2f_cfp_iter2f_i,
+      cfp_array2f_cfp_iter2f_j,
+      cfp_array2f_cfp_iter2f_lt,
+      cfp_array2f_cfp_iter2f_gt,
+      cfp_array2f_cfp_iter2f_leq,
+      cfp_array2f_cfp_iter2f_geq,
+      cfp_array2f_cfp_iter2f_eq,
+      cfp_array2f_cfp_iter2f_neq,
+      cfp_array2f_cfp_iter2f_distance,
+      cfp_array2f_cfp_iter2f_next,
+      cfp_array2f_cfp_iter2f_prev,
+      cfp_array2f_cfp_iter2f_inc,
+      cfp_array2f_cfp_iter2f_dec,
+    },
+
+    {
+      cfp_header_ctor_array2f,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array2d
+  {
+    cfp_array2d_ctor_default,
+    cfp_array2d_ctor,
+    cfp_array2d_ctor_copy,
+    cfp_array2d_ctor_header,
+    cfp_array2d_dtor,
+
+    cfp_array2d_deep_copy,
+
+    cfp_array2d_rate,
+    cfp_array2d_set_rate,
+    cfp_array2d_cache_size,
+    cfp_array2d_set_cache_size,
+    cfp_array2d_clear_cache,
+    cfp_array2d_flush_cache,
+    cfp_array2d_size_bytes,
+    cfp_array2d_compressed_size,
+    cfp_array2d_compressed_data,
+    cfp_array2d_size,
+    cfp_array2d_size_x,
+    cfp_array2d_size_y,
+    cfp_array2d_resize,
+
+    cfp_array2d_get_array,
+    cfp_array2d_set_array,
+    cfp_array2d_get_flat,
+    cfp_array2d_set_flat,
+    cfp_array2d_get,
+    cfp_array2d_set,
+
+    cfp_array2d_ref,
+    cfp_array2d_ref_flat,
+
+    cfp_array2d_ptr,
+    cfp_array2d_ptr_flat,
+
+    cfp_array2d_begin,
+    cfp_array2d_end,
+
+    {
+      cfp_array2d_cfp_ref2d_get,
+      cfp_array2d_cfp_ref2d_set,
+      cfp_array2d_cfp_ref2d_ptr,
+      cfp_array2d_cfp_ref2d_copy,
+    },
+
+    {
+      cfp_array2d_cfp_ptr2d_get,
+      cfp_array2d_cfp_ptr2d_get_at,
+      cfp_array2d_cfp_ptr2d_set,
+      cfp_array2d_cfp_ptr2d_set_at,
+      cfp_array2d_cfp_ptr2d_ref,
+      cfp_array2d_cfp_ptr2d_ref_at,
+      cfp_array2d_cfp_ptr2d_lt,
+      cfp_array2d_cfp_ptr2d_gt,
+      cfp_array2d_cfp_ptr2d_leq,
+      cfp_array2d_cfp_ptr2d_geq,
+      cfp_array2d_cfp_ptr2d_eq,
+      cfp_array2d_cfp_ptr2d_neq,
+      cfp_array2d_cfp_ptr2d_distance,
+      cfp_array2d_cfp_ptr2d_next,
+      cfp_array2d_cfp_ptr2d_prev,
+      cfp_array2d_cfp_ptr2d_inc,
+      cfp_array2d_cfp_ptr2d_dec,
+    },
+
+    {
+      cfp_array2d_cfp_iter2d_get,
+      cfp_array2d_cfp_iter2d_get_at,
+      cfp_array2d_cfp_iter2d_set,
+      cfp_array2d_cfp_iter2d_set_at,
+      cfp_array2d_cfp_iter2d_ref,
+      cfp_array2d_cfp_iter2d_ref_at,
+      cfp_array2d_cfp_iter2d_ptr,
+      cfp_array2d_cfp_iter2d_ptr_at,
+      cfp_array2d_cfp_iter2d_i,
+      cfp_array2d_cfp_iter2d_j,
+      cfp_array2d_cfp_iter2d_lt,
+      cfp_array2d_cfp_iter2d_gt,
+      cfp_array2d_cfp_iter2d_leq,
+      cfp_array2d_cfp_iter2d_geq,
+      cfp_array2d_cfp_iter2d_eq,
+      cfp_array2d_cfp_iter2d_neq,
+      cfp_array2d_cfp_iter2d_distance,
+      cfp_array2d_cfp_iter2d_next,
+      cfp_array2d_cfp_iter2d_prev,
+      cfp_array2d_cfp_iter2d_inc,
+      cfp_array2d_cfp_iter2d_dec,
+    },
+
+    {
+      cfp_header_ctor_array2d,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array3f
+  {
+    cfp_array3f_ctor_default,
+    cfp_array3f_ctor,
+    cfp_array3f_ctor_copy,
+    cfp_array3f_ctor_header,
+    cfp_array3f_dtor,
+
+    cfp_array3f_deep_copy,
+
+    cfp_array3f_rate,
+    cfp_array3f_set_rate,
+    cfp_array3f_cache_size,
+    cfp_array3f_set_cache_size,
+    cfp_array3f_clear_cache,
+    cfp_array3f_flush_cache,
+    cfp_array3f_size_bytes,
+    cfp_array3f_compressed_size,
+    cfp_array3f_compressed_data,
+    cfp_array3f_size,
+    cfp_array3f_size_x,
+    cfp_array3f_size_y,
+    cfp_array3f_size_z,
+    cfp_array3f_resize,
+
+    cfp_array3f_get_array,
+    cfp_array3f_set_array,
+    cfp_array3f_get_flat,
+    cfp_array3f_set_flat,
+    cfp_array3f_get,
+    cfp_array3f_set,
+
+    cfp_array3f_ref,
+    cfp_array3f_ref_flat,
+
+    cfp_array3f_ptr,
+    cfp_array3f_ptr_flat,
+
+    cfp_array3f_begin,
+    cfp_array3f_end,
+
+    {
+      cfp_array3f_cfp_ref3f_get,
+      cfp_array3f_cfp_ref3f_set,
+      cfp_array3f_cfp_ref3f_ptr,
+      cfp_array3f_cfp_ref3f_copy,
+    },
+
+    {
+      cfp_array3f_cfp_ptr3f_get,
+      cfp_array3f_cfp_ptr3f_get_at,
+      cfp_array3f_cfp_ptr3f_set,
+      cfp_array3f_cfp_ptr3f_set_at,
+      cfp_array3f_cfp_ptr3f_ref,
+      cfp_array3f_cfp_ptr3f_ref_at,
+      cfp_array3f_cfp_ptr3f_lt,
+      cfp_array3f_cfp_ptr3f_gt,
+      cfp_array3f_cfp_ptr3f_leq,
+      cfp_array3f_cfp_ptr3f_geq,
+      cfp_array3f_cfp_ptr3f_eq,
+      cfp_array3f_cfp_ptr3f_neq,
+      cfp_array3f_cfp_ptr3f_distance,
+      cfp_array3f_cfp_ptr3f_next,
+      cfp_array3f_cfp_ptr3f_prev,
+      cfp_array3f_cfp_ptr3f_inc,
+      cfp_array3f_cfp_ptr3f_dec,
+    },
+
+    {
+      cfp_array3f_cfp_iter3f_get,
+      cfp_array3f_cfp_iter3f_get_at,
+      cfp_array3f_cfp_iter3f_set,
+      cfp_array3f_cfp_iter3f_set_at,
+      cfp_array3f_cfp_iter3f_ref,
+      cfp_array3f_cfp_iter3f_ref_at,
+      cfp_array3f_cfp_iter3f_ptr,
+      cfp_array3f_cfp_iter3f_ptr_at,
+      cfp_array3f_cfp_iter3f_i,
+      cfp_array3f_cfp_iter3f_j,
+      cfp_array3f_cfp_iter3f_k,
+      cfp_array3f_cfp_iter3f_lt,
+      cfp_array3f_cfp_iter3f_gt,
+      cfp_array3f_cfp_iter3f_leq,
+      cfp_array3f_cfp_iter3f_geq,
+      cfp_array3f_cfp_iter3f_eq,
+      cfp_array3f_cfp_iter3f_neq,
+      cfp_array3f_cfp_iter3f_distance,
+      cfp_array3f_cfp_iter3f_next,
+      cfp_array3f_cfp_iter3f_prev,
+      cfp_array3f_cfp_iter3f_inc,
+      cfp_array3f_cfp_iter3f_dec,
+    },
+
+    {
+      cfp_header_ctor_array3f,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array3d
+  {
+    cfp_array3d_ctor_default,
+    cfp_array3d_ctor,
+    cfp_array3d_ctor_copy,
+    cfp_array3d_ctor_header,
+    cfp_array3d_dtor,
+
+    cfp_array3d_deep_copy,
+
+    cfp_array3d_rate,
+    cfp_array3d_set_rate,
+    cfp_array3d_cache_size,
+    cfp_array3d_set_cache_size,
+    cfp_array3d_clear_cache,
+    cfp_array3d_flush_cache,
+    cfp_array3d_size_bytes,
+    cfp_array3d_compressed_size,
+    cfp_array3d_compressed_data,
+    cfp_array3d_size,
+    cfp_array3d_size_x,
+    cfp_array3d_size_y,
+    cfp_array3d_size_z,
+    cfp_array3d_resize,
+
+    cfp_array3d_get_array,
+    cfp_array3d_set_array,
+    cfp_array3d_get_flat,
+    cfp_array3d_set_flat,
+    cfp_array3d_get,
+    cfp_array3d_set,
+
+    cfp_array3d_ref,
+    cfp_array3d_ref_flat,
+
+    cfp_array3d_ptr,
+    cfp_array3d_ptr_flat,
+
+    cfp_array3d_begin,
+    cfp_array3d_end,
+
+    {
+      cfp_array3d_cfp_ref3d_get,
+      cfp_array3d_cfp_ref3d_set,
+      cfp_array3d_cfp_ref3d_ptr,
+      cfp_array3d_cfp_ref3d_copy,
+    },
+
+    {
+      cfp_array3d_cfp_ptr3d_get,
+      cfp_array3d_cfp_ptr3d_get_at,
+      cfp_array3d_cfp_ptr3d_set,
+      cfp_array3d_cfp_ptr3d_set_at,
+      cfp_array3d_cfp_ptr3d_ref,
+      cfp_array3d_cfp_ptr3d_ref_at,
+      cfp_array3d_cfp_ptr3d_lt,
+      cfp_array3d_cfp_ptr3d_gt,
+      cfp_array3d_cfp_ptr3d_leq,
+      cfp_array3d_cfp_ptr3d_geq,
+      cfp_array3d_cfp_ptr3d_eq,
+      cfp_array3d_cfp_ptr3d_neq,
+      cfp_array3d_cfp_ptr3d_distance,
+      cfp_array3d_cfp_ptr3d_next,
+      cfp_array3d_cfp_ptr3d_prev,
+      cfp_array3d_cfp_ptr3d_inc,
+      cfp_array3d_cfp_ptr3d_dec,
+    },
+
+    {
+      cfp_array3d_cfp_iter3d_get,
+      cfp_array3d_cfp_iter3d_get_at,
+      cfp_array3d_cfp_iter3d_set,
+      cfp_array3d_cfp_iter3d_set_at,
+      cfp_array3d_cfp_iter3d_ref,
+      cfp_array3d_cfp_iter3d_ref_at,
+      cfp_array3d_cfp_iter3d_ptr,
+      cfp_array3d_cfp_iter3d_ptr_at,
+      cfp_array3d_cfp_iter3d_i,
+      cfp_array3d_cfp_iter3d_j,
+      cfp_array3d_cfp_iter3d_k,
+      cfp_array3d_cfp_iter3d_lt,
+      cfp_array3d_cfp_iter3d_gt,
+      cfp_array3d_cfp_iter3d_leq,
+      cfp_array3d_cfp_iter3d_geq,
+      cfp_array3d_cfp_iter3d_eq,
+      cfp_array3d_cfp_iter3d_neq,
+      cfp_array3d_cfp_iter3d_distance,
+      cfp_array3d_cfp_iter3d_next,
+      cfp_array3d_cfp_iter3d_prev,
+      cfp_array3d_cfp_iter3d_inc,
+      cfp_array3d_cfp_iter3d_dec,
+    },
+
+    {
+      cfp_header_ctor_array3d,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array4f
+  {
+    cfp_array4f_ctor_default,
+    cfp_array4f_ctor,
+    cfp_array4f_ctor_copy,
+    cfp_array4f_ctor_header,
+    cfp_array4f_dtor,
+
+    cfp_array4f_deep_copy,
+
+    cfp_array4f_rate,
+    cfp_array4f_set_rate,
+    cfp_array4f_cache_size,
+    cfp_array4f_set_cache_size,
+    cfp_array4f_clear_cache,
+    cfp_array4f_flush_cache,
+    cfp_array4f_size_bytes,
+    cfp_array4f_compressed_size,
+    cfp_array4f_compressed_data,
+    cfp_array4f_size,
+    cfp_array4f_size_x,
+    cfp_array4f_size_y,
+    cfp_array4f_size_z,
+    cfp_array4f_size_w,
+    cfp_array4f_resize,
+
+    cfp_array4f_get_array,
+    cfp_array4f_set_array,
+    cfp_array4f_get_flat,
+    cfp_array4f_set_flat,
+    cfp_array4f_get,
+    cfp_array4f_set,
+
+    cfp_array4f_ref,
+    cfp_array4f_ref_flat,
+
+    cfp_array4f_ptr,
+    cfp_array4f_ptr_flat,
+
+    cfp_array4f_begin,
+    cfp_array4f_end,
+
+    {
+      cfp_array4f_cfp_ref4f_get,
+      cfp_array4f_cfp_ref4f_set,
+      cfp_array4f_cfp_ref4f_ptr,
+      cfp_array4f_cfp_ref4f_copy,
+    },
+
+    {
+      cfp_array4f_cfp_ptr4f_get,
+      cfp_array4f_cfp_ptr4f_get_at,
+      cfp_array4f_cfp_ptr4f_set,
+      cfp_array4f_cfp_ptr4f_set_at,
+      cfp_array4f_cfp_ptr4f_ref,
+      cfp_array4f_cfp_ptr4f_ref_at,
+      cfp_array4f_cfp_ptr4f_lt,
+      cfp_array4f_cfp_ptr4f_gt,
+      cfp_array4f_cfp_ptr4f_leq,
+      cfp_array4f_cfp_ptr4f_geq,
+      cfp_array4f_cfp_ptr4f_eq,
+      cfp_array4f_cfp_ptr4f_neq,
+      cfp_array4f_cfp_ptr4f_distance,
+      cfp_array4f_cfp_ptr4f_next,
+      cfp_array4f_cfp_ptr4f_prev,
+      cfp_array4f_cfp_ptr4f_inc,
+      cfp_array4f_cfp_ptr4f_dec,
+    },
+
+    {
+      cfp_array4f_cfp_iter4f_get,
+      cfp_array4f_cfp_iter4f_get_at,
+      cfp_array4f_cfp_iter4f_set,
+      cfp_array4f_cfp_iter4f_set_at,
+      cfp_array4f_cfp_iter4f_ref,
+      cfp_array4f_cfp_iter4f_ref_at,
+      cfp_array4f_cfp_iter4f_ptr,
+      cfp_array4f_cfp_iter4f_ptr_at,
+      cfp_array4f_cfp_iter4f_i,
+      cfp_array4f_cfp_iter4f_j,
+      cfp_array4f_cfp_iter4f_k,
+      cfp_array4f_cfp_iter4f_l,
+      cfp_array4f_cfp_iter4f_lt,
+      cfp_array4f_cfp_iter4f_gt,
+      cfp_array4f_cfp_iter4f_leq,
+      cfp_array4f_cfp_iter4f_geq,
+      cfp_array4f_cfp_iter4f_eq,
+      cfp_array4f_cfp_iter4f_neq,
+      cfp_array4f_cfp_iter4f_distance,
+      cfp_array4f_cfp_iter4f_next,
+      cfp_array4f_cfp_iter4f_prev,
+      cfp_array4f_cfp_iter4f_inc,
+      cfp_array4f_cfp_iter4f_dec,
+    },
+
+    {
+      cfp_header_ctor_array4f,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+  // array4d
+  {
+    cfp_array4d_ctor_default,
+    cfp_array4d_ctor,
+    cfp_array4d_ctor_copy,
+    cfp_array4d_ctor_header,
+    cfp_array4d_dtor,
+
+    cfp_array4d_deep_copy,
+
+    cfp_array4d_rate,
+    cfp_array4d_set_rate,
+    cfp_array4d_cache_size,
+    cfp_array4d_set_cache_size,
+    cfp_array4d_clear_cache,
+    cfp_array4d_flush_cache,
+    cfp_array4d_size_bytes,
+    cfp_array4d_compressed_size,
+    cfp_array4d_compressed_data,
+    cfp_array4d_size,
+    cfp_array4d_size_x,
+    cfp_array4d_size_y,
+    cfp_array4d_size_z,
+    cfp_array4d_size_w,
+    cfp_array4d_resize,
+
+    cfp_array4d_get_array,
+    cfp_array4d_set_array,
+    cfp_array4d_get_flat,
+    cfp_array4d_set_flat,
+    cfp_array4d_get,
+    cfp_array4d_set,
+
+    cfp_array4d_ref,
+    cfp_array4d_ref_flat,
+
+    cfp_array4d_ptr,
+    cfp_array4d_ptr_flat,
+
+    cfp_array4d_begin,
+    cfp_array4d_end,
+
+    {
+      cfp_array4d_cfp_ref4d_get,
+      cfp_array4d_cfp_ref4d_set,
+      cfp_array4d_cfp_ref4d_ptr,
+      cfp_array4d_cfp_ref4d_copy,
+    },
+
+    {
+      cfp_array4d_cfp_ptr4d_get,
+      cfp_array4d_cfp_ptr4d_get_at,
+      cfp_array4d_cfp_ptr4d_set,
+      cfp_array4d_cfp_ptr4d_set_at,
+      cfp_array4d_cfp_ptr4d_ref,
+      cfp_array4d_cfp_ptr4d_ref_at,
+      cfp_array4d_cfp_ptr4d_lt,
+      cfp_array4d_cfp_ptr4d_gt,
+      cfp_array4d_cfp_ptr4d_leq,
+      cfp_array4d_cfp_ptr4d_geq,
+      cfp_array4d_cfp_ptr4d_eq,
+      cfp_array4d_cfp_ptr4d_neq,
+      cfp_array4d_cfp_ptr4d_distance,
+      cfp_array4d_cfp_ptr4d_next,
+      cfp_array4d_cfp_ptr4d_prev,
+      cfp_array4d_cfp_ptr4d_inc,
+      cfp_array4d_cfp_ptr4d_dec,
+    },
+
+    {
+      cfp_array4d_cfp_iter4d_get,
+      cfp_array4d_cfp_iter4d_get_at,
+      cfp_array4d_cfp_iter4d_set,
+      cfp_array4d_cfp_iter4d_set_at,
+      cfp_array4d_cfp_iter4d_ref,
+      cfp_array4d_cfp_iter4d_ref_at,
+      cfp_array4d_cfp_iter4d_ptr,
+      cfp_array4d_cfp_iter4d_ptr_at,
+      cfp_array4d_cfp_iter4d_i,
+      cfp_array4d_cfp_iter4d_j,
+      cfp_array4d_cfp_iter4d_k,
+      cfp_array4d_cfp_iter4d_l,
+      cfp_array4d_cfp_iter4d_lt,
+      cfp_array4d_cfp_iter4d_gt,
+      cfp_array4d_cfp_iter4d_leq,
+      cfp_array4d_cfp_iter4d_geq,
+      cfp_array4d_cfp_iter4d_eq,
+      cfp_array4d_cfp_iter4d_neq,
+      cfp_array4d_cfp_iter4d_distance,
+      cfp_array4d_cfp_iter4d_next,
+      cfp_array4d_cfp_iter4d_prev,
+      cfp_array4d_cfp_iter4d_inc,
+      cfp_array4d_cfp_iter4d_dec,
+    },
+
+    {
+      cfp_header_ctor_array4d,
+      cfp_header_ctor_buffer,
+      cfp_header_dtor,
+      cfp_header_scalar_type,
+      cfp_header_dimensionality,
+      cfp_header_size_x,
+      cfp_header_size_y,
+      cfp_header_size_z,
+      cfp_header_size_w,
+      cfp_header_rate,
+      cfp_header_data,
+      cfp_header_size_bytes,
+    },
+  },
+};
diff --git a/cfp/cfparray1d.cpp b/cfp/cfparray1d.cpp
new file mode 100644
index 00000000..3a76b65f
--- /dev/null
+++ b/cfp/cfparray1d.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array1d.h"
+#include "zfp/array1.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array1d
+#define CFP_REF_TYPE cfp_ref1d
+#define CFP_PTR_TYPE cfp_ptr1d
+#define CFP_ITER_TYPE cfp_iter1d
+#define ZFP_ARRAY_TYPE zfp::array1d
+#define ZFP_SCALAR_TYPE double
+
+#include "template/cfparray.cpp"
+#include "template/cfparray1.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray1f.cpp b/cfp/cfparray1f.cpp
new file mode 100644
index 00000000..2df70530
--- /dev/null
+++ b/cfp/cfparray1f.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array1f.h"
+#include "zfp/array1.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array1f
+#define CFP_REF_TYPE cfp_ref1f
+#define CFP_PTR_TYPE cfp_ptr1f
+#define CFP_ITER_TYPE cfp_iter1f
+#define ZFP_ARRAY_TYPE zfp::array1f
+#define ZFP_SCALAR_TYPE float
+
+#include "template/cfparray.cpp"
+#include "template/cfparray1.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray2d.cpp b/cfp/cfparray2d.cpp
new file mode 100644
index 00000000..fa3051b1
--- /dev/null
+++ b/cfp/cfparray2d.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array2d.h"
+#include "zfp/array2.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array2d
+#define CFP_REF_TYPE cfp_ref2d
+#define CFP_PTR_TYPE cfp_ptr2d
+#define CFP_ITER_TYPE cfp_iter2d
+#define ZFP_ARRAY_TYPE zfp::array2d
+#define ZFP_SCALAR_TYPE double
+
+#include "template/cfparray.cpp"
+#include "template/cfparray2.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray2f.cpp b/cfp/cfparray2f.cpp
new file mode 100644
index 00000000..ebfd1d9d
--- /dev/null
+++ b/cfp/cfparray2f.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array2f.h"
+#include "zfp/array2.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array2f
+#define CFP_REF_TYPE cfp_ref2f
+#define CFP_PTR_TYPE cfp_ptr2f
+#define CFP_ITER_TYPE cfp_iter2f
+#define ZFP_ARRAY_TYPE zfp::array2f
+#define ZFP_SCALAR_TYPE float
+
+#include "template/cfparray.cpp"
+#include "template/cfparray2.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray3d.cpp b/cfp/cfparray3d.cpp
new file mode 100644
index 00000000..100d639a
--- /dev/null
+++ b/cfp/cfparray3d.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array3d.h"
+#include "zfp/array3.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array3d
+#define CFP_REF_TYPE cfp_ref3d
+#define CFP_PTR_TYPE cfp_ptr3d
+#define CFP_ITER_TYPE cfp_iter3d
+#define ZFP_ARRAY_TYPE zfp::array3d
+#define ZFP_SCALAR_TYPE double
+
+#include "template/cfparray.cpp"
+#include "template/cfparray3.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray3f.cpp b/cfp/cfparray3f.cpp
new file mode 100644
index 00000000..b5cafb71
--- /dev/null
+++ b/cfp/cfparray3f.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array3f.h"
+#include "zfp/array3.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array3f
+#define CFP_REF_TYPE cfp_ref3f
+#define CFP_PTR_TYPE cfp_ptr3f
+#define CFP_ITER_TYPE cfp_iter3f
+#define ZFP_ARRAY_TYPE zfp::array3f
+#define ZFP_SCALAR_TYPE float
+
+#include "template/cfparray.cpp"
+#include "template/cfparray3.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray4d.cpp b/cfp/cfparray4d.cpp
new file mode 100644
index 00000000..bf1a2b06
--- /dev/null
+++ b/cfp/cfparray4d.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array4d.h"
+#include "zfp/array4.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array4d
+#define CFP_REF_TYPE cfp_ref4d
+#define CFP_PTR_TYPE cfp_ptr4d
+#define CFP_ITER_TYPE cfp_iter4d
+#define ZFP_ARRAY_TYPE zfp::array4d
+#define ZFP_SCALAR_TYPE double
+
+#include "template/cfparray.cpp"
+#include "template/cfparray4.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfparray4f.cpp b/cfp/cfparray4f.cpp
new file mode 100644
index 00000000..ca6bf0dd
--- /dev/null
+++ b/cfp/cfparray4f.cpp
@@ -0,0 +1,21 @@
+#include "zfp/internal/cfp/array4f.h"
+#include "zfp/array4.hpp"
+
+#include "template/template.h"
+
+#define CFP_ARRAY_TYPE cfp_array4f
+#define CFP_REF_TYPE cfp_ref4f
+#define CFP_PTR_TYPE cfp_ptr4f
+#define CFP_ITER_TYPE cfp_iter4f
+#define ZFP_ARRAY_TYPE zfp::array4f
+#define ZFP_SCALAR_TYPE float
+
+#include "template/cfparray.cpp"
+#include "template/cfparray4.cpp"
+
+#undef CFP_ARRAY_TYPE
+#undef CFP_REF_TYPE
+#undef CFP_PTR_TYPE
+#undef CFP_ITER_TYPE
+#undef ZFP_ARRAY_TYPE
+#undef ZFP_SCALAR_TYPE
diff --git a/cfp/cfpheader.cpp b/cfp/cfpheader.cpp
new file mode 100644
index 00000000..b4b66e09
--- /dev/null
+++ b/cfp/cfpheader.cpp
@@ -0,0 +1,21 @@
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/internal/codec/zfpheader.hpp"
+#include "zfp/internal/cfp/header.h"
+#include "zfp/internal/cfp/array1f.h"
+#include "zfp/internal/cfp/array1d.h"
+#include "zfp/internal/cfp/array2f.h"
+#include "zfp/internal/cfp/array2d.h"
+#include "zfp/internal/cfp/array3f.h"
+#include "zfp/internal/cfp/array3d.h"
+#include "zfp/internal/cfp/array4f.h"
+#include "zfp/internal/cfp/array4d.h"
+
+#include "template/template.h"
+
+#define CFP_HEADER_TYPE cfp_header
+#define ZFP_HEADER_TYPE zfp::array::header
+
+#include "template/cfpheader.cpp"
diff --git a/cfp/include/cfparray1d.h b/cfp/include/cfparray1d.h
deleted file mode 100644
index 1be27295..00000000
--- a/cfp/include/cfparray1d.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef CFP_ARRAY_1D
-#define CFP_ARRAY_1D
-
-#include <stddef.h>
-#include "zfp/types.h"
-
-struct cfp_array1d;
-typedef struct cfp_array1d cfp_array1d;
-
-typedef struct {
-  cfp_array1d* (*ctor_default)();
-  cfp_array1d* (*ctor)(uint n, double rate, const double* p, size_t csize);
-  cfp_array1d* (*ctor_copy)(const cfp_array1d* src);
-  void (*dtor)(cfp_array1d* self);
-
-  void (*deep_copy)(cfp_array1d* self, const cfp_array1d* src);
-
-  double (*rate)(const cfp_array1d* self);
-  double (*set_rate)(cfp_array1d* self, double rate);
-  size_t (*cache_size)(const cfp_array1d* self);
-  void (*set_cache_size)(cfp_array1d* self, size_t csize);
-  void (*clear_cache)(const cfp_array1d* self);
-  void (*flush_cache)(const cfp_array1d* self);
-  size_t (*compressed_size)(const cfp_array1d* self);
-  uchar* (*compressed_data)(const cfp_array1d* self);
-  size_t (*size)(const cfp_array1d* self);
-  void (*resize)(cfp_array1d* self, uint n, int clear);
-
-  void (*get_array)(const cfp_array1d* self, double* p);
-  void (*set_array)(cfp_array1d* self, const double* p);
-  double (*get_flat)(const cfp_array1d* self, uint i);
-  void (*set_flat)(cfp_array1d* self, uint i, double val);
-  double (*get)(const cfp_array1d* self, uint i);
-  void (*set)(cfp_array1d* self, uint i, double val);
-} cfp_array1d_api;
-
-#endif
diff --git a/cfp/include/cfparray1f.h b/cfp/include/cfparray1f.h
deleted file mode 100644
index 90d52391..00000000
--- a/cfp/include/cfparray1f.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef CFP_ARRAY_1F
-#define CFP_ARRAY_1F
-
-#include <stddef.h>
-#include "zfp/types.h"
-
-struct cfp_array1f;
-typedef struct cfp_array1f cfp_array1f;
-
-typedef struct {
-  cfp_array1f* (*ctor_default)();
-  cfp_array1f* (*ctor)(uint n, double rate, const float* p, size_t csize);
-  cfp_array1f* (*ctor_copy)(const cfp_array1f* src);
-  void (*dtor)(cfp_array1f* self);
-
-  void (*deep_copy)(cfp_array1f* self, const cfp_array1f* src);
-
-  double (*rate)(const cfp_array1f* self);
-  double (*set_rate)(cfp_array1f* self, double rate);
-  size_t (*cache_size)(const cfp_array1f* self);
-  void (*set_cache_size)(cfp_array1f* self, size_t csize);
-  void (*clear_cache)(const cfp_array1f* self);
-  void (*flush_cache)(const cfp_array1f* self);
-  size_t (*compressed_size)(const cfp_array1f* self);
-  uchar* (*compressed_data)(const cfp_array1f* self);
-  size_t (*size)(const cfp_array1f* self);
-  void (*resize)(cfp_array1f* self, uint n, int clear);
-
-  void (*get_array)(const cfp_array1f* self, float* p);
-  void (*set_array)(cfp_array1f* self, const float* p);
-  float (*get_flat)(const cfp_array1f* self, uint i);
-  void (*set_flat)(cfp_array1f* self, uint i, float val);
-  float (*get)(const cfp_array1f* self, uint i);
-  void (*set)(cfp_array1f* self, uint i, float val);
-} cfp_array1f_api;
-
-#endif
diff --git a/cfp/include/cfparray2d.h b/cfp/include/cfparray2d.h
deleted file mode 100644
index b8d4c2a8..00000000
--- a/cfp/include/cfparray2d.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef CFP_ARRAY_2D
-#define CFP_ARRAY_2D
-
-#include <stddef.h>
-#include "zfp/types.h"
-
-struct cfp_array2d;
-typedef struct cfp_array2d cfp_array2d;
-
-typedef struct {
-  cfp_array2d* (*ctor_default)();
-  cfp_array2d* (*ctor)(uint nx, uint ny, double rate, const double* p, size_t csize);
-  cfp_array2d* (*ctor_copy)(const cfp_array2d* src);
-  void (*dtor)(cfp_array2d* self);
-
-  void (*deep_copy)(cfp_array2d* self, const cfp_array2d* src);
-
-  double (*rate)(const cfp_array2d* self);
-  double (*set_rate)(cfp_array2d* self, double rate);
-  size_t (*cache_size)(const cfp_array2d* self);
-  void (*set_cache_size)(cfp_array2d* self, size_t csize);
-  void (*clear_cache)(const cfp_array2d* self);
-  void (*flush_cache)(const cfp_array2d* self);
-  size_t (*compressed_size)(const cfp_array2d* self);
-  uchar* (*compressed_data)(const cfp_array2d* self);
-  size_t (*size)(const cfp_array2d* self);
-  uint (*size_x)(const cfp_array2d* self);
-  uint (*size_y)(const cfp_array2d* self);
-  void (*resize)(cfp_array2d* self, uint nx, uint ny, int clear);
-
-  void (*get_array)(const cfp_array2d* self, double* p);
-  void (*set_array)(cfp_array2d* self, const double* p);
-  double (*get_flat)(const cfp_array2d* self, uint i);
-  void (*set_flat)(cfp_array2d* self, uint i, double val);
-  double (*get)(const cfp_array2d* self, uint i, uint j);
-  void (*set)(cfp_array2d* self, uint i, uint j, double val);
-} cfp_array2d_api;
-
-#endif
diff --git a/cfp/include/cfparray2f.h b/cfp/include/cfparray2f.h
deleted file mode 100644
index a531ac24..00000000
--- a/cfp/include/cfparray2f.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef CFP_ARRAY_2F
-#define CFP_ARRAY_2F
-
-#include <stddef.h>
-#include "zfp/types.h"
-
-struct cfp_array2f;
-typedef struct cfp_array2f cfp_array2f;
-
-typedef struct {
-  cfp_array2f* (*ctor_default)();
-  cfp_array2f* (*ctor)(uint nx, uint ny, double rate, const float* p, size_t csize);
-  cfp_array2f* (*ctor_copy)(const cfp_array2f* src);
-  void (*dtor)(cfp_array2f* self);
-
-  void (*deep_copy)(cfp_array2f* self, const cfp_array2f* src);
-
-  double (*rate)(const cfp_array2f* self);
-  double (*set_rate)(cfp_array2f* self, double rate);
-  size_t (*cache_size)(const cfp_array2f* self);
-  void (*set_cache_size)(cfp_array2f* self, size_t csize);
-  void (*clear_cache)(const cfp_array2f* self);
-  void (*flush_cache)(const cfp_array2f* self);
-  size_t (*compressed_size)(const cfp_array2f* self);
-  uchar* (*compressed_data)(const cfp_array2f* self);
-  size_t (*size)(const cfp_array2f* self);
-  uint (*size_x)(const cfp_array2f* self);
-  uint (*size_y)(const cfp_array2f* self);
-  void (*resize)(cfp_array2f* self, uint nx, uint ny, int clear);
-
-  void (*get_array)(const cfp_array2f* self, float* p);
-  void (*set_array)(cfp_array2f* self, const float* p);
-  float (*get_flat)(const cfp_array2f* self, uint i);
-  void (*set_flat)(cfp_array2f* self, uint i, float val);
-  float (*get)(const cfp_array2f* self, uint i, uint j);
-  void (*set)(cfp_array2f* self, uint i, uint j, float val);
-} cfp_array2f_api;
-
-#endif
diff --git a/cfp/include/cfparray3d.h b/cfp/include/cfparray3d.h
deleted file mode 100644
index 8390a619..00000000
--- a/cfp/include/cfparray3d.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef CFP_ARRAY_3D
-#define CFP_ARRAY_3D
-
-#include <stddef.h>
-#include "zfp/types.h"
-
-struct cfp_array3d;
-typedef struct cfp_array3d cfp_array3d;
-
-typedef struct {
-  cfp_array3d* (*ctor_default)();
-  cfp_array3d* (*ctor)(uint nx, uint ny, uint nz, double rate, const double* p, size_t csize);
-  cfp_array3d* (*ctor_copy)(const cfp_array3d* src);
-  void (*dtor)(cfp_array3d* self);
-
-  void (*deep_copy)(cfp_array3d* self, const cfp_array3d* src);
-
-  double (*rate)(const cfp_array3d* self);
-  double (*set_rate)(cfp_array3d* self, double rate);
-  size_t (*cache_size)(const cfp_array3d* self);
-  void (*set_cache_size)(cfp_array3d* self, size_t csize);
-  void (*clear_cache)(const cfp_array3d* self);
-  void (*flush_cache)(const cfp_array3d* self);
-  size_t (*compressed_size)(const cfp_array3d* self);
-  uchar* (*compressed_data)(const cfp_array3d* self);
-  size_t (*size)(const cfp_array3d* self);
-  uint (*size_x)(const cfp_array3d* self);
-  uint (*size_y)(const cfp_array3d* self);
-  uint (*size_z)(const cfp_array3d* self);
-  void (*resize)(cfp_array3d* self, uint nx, uint ny, uint nz, int clear);
-
-  void (*get_array)(const cfp_array3d* self, double* p);
-  void (*set_array)(cfp_array3d* self, const double* p);
-  double (*get_flat)(const cfp_array3d* self, uint i);
-  void (*set_flat)(cfp_array3d* self, uint i, double val);
-  double (*get)(const cfp_array3d* self, uint i, uint j, uint k);
-  void (*set)(cfp_array3d* self, uint i, uint j, uint k, double val);
-} cfp_array3d_api;
-
-#endif
diff --git a/cfp/include/cfparray3f.h b/cfp/include/cfparray3f.h
deleted file mode 100644
index 0261df31..00000000
--- a/cfp/include/cfparray3f.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef CFP_ARRAY_3F
-#define CFP_ARRAY_3F
-
-#include <stddef.h>
-#include "zfp/types.h"
-
-struct cfp_array3f;
-typedef struct cfp_array3f cfp_array3f;
-
-typedef struct {
-  cfp_array3f* (*ctor_default)();
-  cfp_array3f* (*ctor)(uint nx, uint ny, uint nz, double rate, const float* p, size_t csize);
-  cfp_array3f* (*ctor_copy)(const cfp_array3f* src);
-  void (*dtor)(cfp_array3f* self);
-
-  void (*deep_copy)(cfp_array3f* self, const cfp_array3f* src);
-
-  double (*rate)(const cfp_array3f* self);
-  double (*set_rate)(cfp_array3f* self, double rate);
-  size_t (*cache_size)(const cfp_array3f* self);
-  void (*set_cache_size)(cfp_array3f* self, size_t csize);
-  void (*clear_cache)(const cfp_array3f* self);
-  void (*flush_cache)(const cfp_array3f* self);
-  size_t (*compressed_size)(const cfp_array3f* self);
-  uchar* (*compressed_data)(const cfp_array3f* self);
-  size_t (*size)(const cfp_array3f* self);
-  uint (*size_x)(const cfp_array3f* self);
-  uint (*size_y)(const cfp_array3f* self);
-  uint (*size_z)(const cfp_array3f* self);
-  void (*resize)(cfp_array3f* self, uint nx, uint ny, uint nz, int clear);
-
-  void (*get_array)(const cfp_array3f* self, float* p);
-  void (*set_array)(cfp_array3f* self, const float* p);
-  float (*get_flat)(const cfp_array3f* self, uint i);
-  void (*set_flat)(cfp_array3f* self, uint i, float val);
-  float (*get)(const cfp_array3f* self, uint i, uint j, uint k);
-  void (*set)(cfp_array3f* self, uint i, uint j, uint k, float val);
-} cfp_array3f_api;
-
-#endif
diff --git a/cfp/include/cfparrays.h b/cfp/include/cfparrays.h
deleted file mode 100644
index f716d828..00000000
--- a/cfp/include/cfparrays.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef CFP_ARRAYS
-#define CFP_ARRAYS
-
-#include "cfparray1f.h"
-#include "cfparray1d.h"
-#include "cfparray2f.h"
-#include "cfparray2d.h"
-#include "cfparray3f.h"
-#include "cfparray3d.h"
-
-#include "zfp/system.h"
-
-typedef struct {
-  cfp_array1f_api array1f;
-  cfp_array1d_api array1d;
-  cfp_array2f_api array2f;
-  cfp_array2d_api array2d;
-  cfp_array3f_api array3f;
-  cfp_array3d_api array3d;
-} cfp_api;
-
-#ifndef CFP_NAMESPACE
-  #define CFP_NAMESPACE cfp
-#endif
-
-extern_ const cfp_api CFP_NAMESPACE;
-
-#endif
diff --git a/cfp/src/CMakeLists.txt b/cfp/src/CMakeLists.txt
deleted file mode 100644
index 386698c6..00000000
--- a/cfp/src/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-add_library(cfp cfparrays.cpp)
-
-if(DEFINED CFP_NAMESPACE)
-  list(APPEND cfp_public_defs "CFP_NAMESPACE=${CFP_NAMESPACE}")
-endif()
-
-list(APPEND cfp_private_defs ${zfp_compressed_array_defs})
-
-if(WIN32)
-  # define ZFP_SOURCE when compiling libcfp to export symbols to Windows DLL
-  list(APPEND cfp_private_defs ZFP_SOURCE)
-endif()
-
-target_compile_definitions(cfp
-  PUBLIC ${cfp_public_defs}
-  PRIVATE ${cfp_private_defs})
-
-target_include_directories(cfp
-  PUBLIC
-    $<BUILD_INTERFACE:${ZFP_SOURCE_DIR}/include>
-    $<BUILD_INTERFACE:${ZFP_SOURCE_DIR}/cfp/include>
-    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
-    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
-  PRIVATE
-    ${ZFP_SOURCE_DIR}/array
-    ${ZFP_SOURCE_DIR}/src
-)
-
-target_link_libraries(cfp zfp)
-
-set_property(TARGET cfp PROPERTY VERSION ${ZFP_VERSION})
-set_property(TARGET cfp PROPERTY SOVERSION ${ZFP_VERSION_MAJOR})
-set_property(TARGET cfp PROPERTY OUTPUT_NAME ${ZFP_LIBRARY_PREFIX}cfp)
-
-install(TARGETS cfp EXPORT cfp-targets
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/cfp/src/cfparray1_source.cpp b/cfp/src/cfparray1_source.cpp
deleted file mode 100644
index bdab414d..00000000
--- a/cfp/src/cfparray1_source.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-static CFP_ARRAY_TYPE *
-_t1(CFP_ARRAY_TYPE, ctor)(uint n, double rate, const ZFP_SCALAR_TYPE * p, size_t csize)
-{
-  return reinterpret_cast<CFP_ARRAY_TYPE *>(new ZFP_ARRAY_TYPE(n, rate, p, csize));
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE * self, uint n, int clear)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->resize(n, clear);
-}
-
-static ZFP_SCALAR_TYPE
-_t1(CFP_ARRAY_TYPE, get)(const CFP_ARRAY_TYPE * self, uint i)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->operator()(i);
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE * self, uint i, ZFP_SCALAR_TYPE val)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->operator()(i) = val;
-}
diff --git a/cfp/src/cfparray1d.cpp b/cfp/src/cfparray1d.cpp
deleted file mode 100644
index 1e71b0d2..00000000
--- a/cfp/src/cfparray1d.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "cfparray1d.h"
-#include "zfparray1.h"
-
-#include "template/template.h"
-
-#define CFP_ARRAY_TYPE cfp_array1d
-#define ZFP_ARRAY_TYPE zfp::array1d
-#define ZFP_SCALAR_TYPE double
-
-#include "cfparray_source.cpp"
-#include "cfparray1_source.cpp"
-
-#undef CFP_ARRAY_TYPE
-#undef ZFP_ARRAY_TYPE
-#undef ZFP_SCALAR_TYPE
diff --git a/cfp/src/cfparray1f.cpp b/cfp/src/cfparray1f.cpp
deleted file mode 100644
index 56ecda58..00000000
--- a/cfp/src/cfparray1f.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "cfparray1f.h"
-#include "zfparray1.h"
-
-#include "template/template.h"
-
-#define CFP_ARRAY_TYPE cfp_array1f
-#define ZFP_ARRAY_TYPE zfp::array1f
-#define ZFP_SCALAR_TYPE float
-
-#include "cfparray_source.cpp"
-#include "cfparray1_source.cpp"
-
-#undef CFP_ARRAY_TYPE
-#undef ZFP_ARRAY_TYPE
-#undef ZFP_SCALAR_TYPE
diff --git a/cfp/src/cfparray2_source.cpp b/cfp/src/cfparray2_source.cpp
deleted file mode 100644
index 6135ae40..00000000
--- a/cfp/src/cfparray2_source.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-static CFP_ARRAY_TYPE *
-_t1(CFP_ARRAY_TYPE, ctor)(uint nx, uint ny, double rate, const ZFP_SCALAR_TYPE * p, size_t csize)
-{
-  return reinterpret_cast<CFP_ARRAY_TYPE *>(new ZFP_ARRAY_TYPE(nx, ny, rate, p, csize));
-}
-
-static uint
-_t1(CFP_ARRAY_TYPE, size_x)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->size_x();
-}
-
-static uint
-_t1(CFP_ARRAY_TYPE, size_y)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->size_y();
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE * self, uint nx, uint ny, int clear)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->resize(nx, ny, clear);
-}
-
-static ZFP_SCALAR_TYPE
-_t1(CFP_ARRAY_TYPE, get)(const CFP_ARRAY_TYPE * self, uint i, uint j)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->operator()(i, j);
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE * self, uint i, uint j, ZFP_SCALAR_TYPE val)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->operator()(i, j) = val;
-}
diff --git a/cfp/src/cfparray2d.cpp b/cfp/src/cfparray2d.cpp
deleted file mode 100644
index 3debb2b8..00000000
--- a/cfp/src/cfparray2d.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "cfparray2d.h"
-#include "zfparray2.h"
-
-#include "template/template.h"
-
-#define CFP_ARRAY_TYPE cfp_array2d
-#define ZFP_ARRAY_TYPE zfp::array2d
-#define ZFP_SCALAR_TYPE double
-
-#include "cfparray_source.cpp"
-#include "cfparray2_source.cpp"
-
-#undef CFP_ARRAY_TYPE
-#undef ZFP_ARRAY_TYPE
-#undef ZFP_SCALAR_TYPE
diff --git a/cfp/src/cfparray2f.cpp b/cfp/src/cfparray2f.cpp
deleted file mode 100644
index 37407cc8..00000000
--- a/cfp/src/cfparray2f.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "cfparray2f.h"
-#include "zfparray2.h"
-
-#include "template/template.h"
-
-#define CFP_ARRAY_TYPE cfp_array2f
-#define ZFP_ARRAY_TYPE zfp::array2f
-#define ZFP_SCALAR_TYPE float
-
-#include "cfparray_source.cpp"
-#include "cfparray2_source.cpp"
-
-#undef CFP_ARRAY_TYPE
-#undef ZFP_ARRAY_TYPE
-#undef ZFP_SCALAR_TYPE
diff --git a/cfp/src/cfparray3_source.cpp b/cfp/src/cfparray3_source.cpp
deleted file mode 100644
index ae2ebf6d..00000000
--- a/cfp/src/cfparray3_source.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-static CFP_ARRAY_TYPE *
-_t1(CFP_ARRAY_TYPE, ctor)(uint nx, uint ny, uint nz, double rate, const ZFP_SCALAR_TYPE * p, size_t csize)
-{
-  return reinterpret_cast<CFP_ARRAY_TYPE *>(new ZFP_ARRAY_TYPE(nx, ny, nz, rate, p, csize));
-}
-
-static uint
-_t1(CFP_ARRAY_TYPE, size_x)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->size_x();
-}
-
-static uint
-_t1(CFP_ARRAY_TYPE, size_y)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->size_y();
-}
-
-static uint
-_t1(CFP_ARRAY_TYPE, size_z)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->size_z();
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE * self, uint nx, uint ny, uint nz, int clear)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->resize(nx, ny, nz, clear);
-}
-
-static ZFP_SCALAR_TYPE
-_t1(CFP_ARRAY_TYPE, get)(const CFP_ARRAY_TYPE * self, uint i, uint j, uint k)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->operator()(i, j, k);
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE * self, uint i, uint j, uint k, ZFP_SCALAR_TYPE val)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->operator()(i, j, k) = val;
-}
diff --git a/cfp/src/cfparray3d.cpp b/cfp/src/cfparray3d.cpp
deleted file mode 100644
index fb5cc2e2..00000000
--- a/cfp/src/cfparray3d.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "cfparray3d.h"
-#include "zfparray3.h"
-
-#include "template/template.h"
-
-#define CFP_ARRAY_TYPE cfp_array3d
-#define ZFP_ARRAY_TYPE zfp::array3d
-#define ZFP_SCALAR_TYPE double
-
-#include "cfparray_source.cpp"
-#include "cfparray3_source.cpp"
-
-#undef CFP_ARRAY_TYPE
-#undef ZFP_ARRAY_TYPE
-#undef ZFP_SCALAR_TYPE
diff --git a/cfp/src/cfparray3f.cpp b/cfp/src/cfparray3f.cpp
deleted file mode 100644
index 69331b1c..00000000
--- a/cfp/src/cfparray3f.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "cfparray3f.h"
-#include "zfparray3.h"
-
-#include "template/template.h"
-
-#define CFP_ARRAY_TYPE cfp_array3f
-#define ZFP_ARRAY_TYPE zfp::array3f
-#define ZFP_SCALAR_TYPE float
-
-#include "cfparray_source.cpp"
-#include "cfparray3_source.cpp"
-
-#undef CFP_ARRAY_TYPE
-#undef ZFP_ARRAY_TYPE
-#undef ZFP_SCALAR_TYPE
diff --git a/cfp/src/cfparray_source.cpp b/cfp/src/cfparray_source.cpp
deleted file mode 100644
index d94e1a49..00000000
--- a/cfp/src/cfparray_source.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-// common constructor, destructor
-static CFP_ARRAY_TYPE *
-_t1(CFP_ARRAY_TYPE, ctor_default)()
-{
-  return reinterpret_cast<CFP_ARRAY_TYPE *>(new ZFP_ARRAY_TYPE());
-}
-
-static CFP_ARRAY_TYPE *
-_t1(CFP_ARRAY_TYPE, ctor_copy)(const CFP_ARRAY_TYPE * src)
-{
-  return reinterpret_cast<CFP_ARRAY_TYPE *>(
-    new ZFP_ARRAY_TYPE(*reinterpret_cast<const ZFP_ARRAY_TYPE *>(src))
-  );
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, dtor)(CFP_ARRAY_TYPE * self)
-{
-  delete reinterpret_cast<ZFP_ARRAY_TYPE *>(self);
-}
-
-// functions defined in zfparray.h (base class)
-static double
-_t1(CFP_ARRAY_TYPE, rate)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->rate();
-}
-
-static double
-_t1(CFP_ARRAY_TYPE, set_rate)(CFP_ARRAY_TYPE * self, double rate)
-{
-  return reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->set_rate(rate);
-}
-
-static size_t
-_t1(CFP_ARRAY_TYPE, compressed_size)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->compressed_size();
-}
-
-static uchar*
-_t1(CFP_ARRAY_TYPE, compressed_data)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->compressed_data();
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, deep_copy)(CFP_ARRAY_TYPE * self, const CFP_ARRAY_TYPE * src)
-{
-  *reinterpret_cast<ZFP_ARRAY_TYPE *>(self) = *reinterpret_cast<const ZFP_ARRAY_TYPE *>(src);
-}
-
-// functions defined in subclasses
-static size_t
-_t1(CFP_ARRAY_TYPE, size)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->size();
-}
-
-static size_t
-_t1(CFP_ARRAY_TYPE, cache_size)(const CFP_ARRAY_TYPE * self)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->cache_size();
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, set_cache_size)(CFP_ARRAY_TYPE * self, size_t csize)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->set_cache_size(csize);
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, clear_cache)(const CFP_ARRAY_TYPE * self)
-{
-  reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->clear_cache();
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, flush_cache)(const CFP_ARRAY_TYPE * self)
-{
-  reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->flush_cache();
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, get_array)(const CFP_ARRAY_TYPE * self, ZFP_SCALAR_TYPE * p)
-{
-  reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->get(p);
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, set_array)(CFP_ARRAY_TYPE * self, const ZFP_SCALAR_TYPE * p)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->set(p);
-}
-
-static ZFP_SCALAR_TYPE
-_t1(CFP_ARRAY_TYPE, get_flat)(const CFP_ARRAY_TYPE * self, uint i)
-{
-  return reinterpret_cast<const ZFP_ARRAY_TYPE *>(self)->operator[](i);
-}
-
-static void
-_t1(CFP_ARRAY_TYPE, set_flat)(CFP_ARRAY_TYPE * self, uint i, ZFP_SCALAR_TYPE val)
-{
-  reinterpret_cast<ZFP_ARRAY_TYPE *>(self)->operator[](i) = val;
-}
diff --git a/cfp/src/cfparrays.cpp b/cfp/src/cfparrays.cpp
deleted file mode 100644
index bcd88686..00000000
--- a/cfp/src/cfparrays.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-#include "cfparrays.h"
-
-#include "cfparray1f.cpp"
-#include "cfparray1d.cpp"
-#include "cfparray2f.cpp"
-#include "cfparray2d.cpp"
-#include "cfparray3f.cpp"
-#include "cfparray3d.cpp"
-
-export_ const cfp_api CFP_NAMESPACE = {
-  // array1f
-  {
-    cfp_array1f_ctor_default,
-    cfp_array1f_ctor,
-    cfp_array1f_ctor_copy,
-    cfp_array1f_dtor,
-
-    cfp_array1f_deep_copy,
-
-    cfp_array1f_rate,
-    cfp_array1f_set_rate,
-    cfp_array1f_cache_size,
-    cfp_array1f_set_cache_size,
-    cfp_array1f_clear_cache,
-    cfp_array1f_flush_cache,
-    cfp_array1f_compressed_size,
-    cfp_array1f_compressed_data,
-    cfp_array1f_size,
-    cfp_array1f_resize,
-
-    cfp_array1f_get_array,
-    cfp_array1f_set_array,
-    cfp_array1f_get_flat,
-    cfp_array1f_set_flat,
-    cfp_array1f_get,
-    cfp_array1f_set,
-  },
-  // array1d
-  {
-    cfp_array1d_ctor_default,
-    cfp_array1d_ctor,
-    cfp_array1d_ctor_copy,
-    cfp_array1d_dtor,
-
-    cfp_array1d_deep_copy,
-
-    cfp_array1d_rate,
-    cfp_array1d_set_rate,
-    cfp_array1d_cache_size,
-    cfp_array1d_set_cache_size,
-    cfp_array1d_clear_cache,
-    cfp_array1d_flush_cache,
-    cfp_array1d_compressed_size,
-    cfp_array1d_compressed_data,
-    cfp_array1d_size,
-    cfp_array1d_resize,
-
-    cfp_array1d_get_array,
-    cfp_array1d_set_array,
-    cfp_array1d_get_flat,
-    cfp_array1d_set_flat,
-    cfp_array1d_get,
-    cfp_array1d_set,
-  },
-  // array2f
-  {
-    cfp_array2f_ctor_default,
-    cfp_array2f_ctor,
-    cfp_array2f_ctor_copy,
-    cfp_array2f_dtor,
-
-    cfp_array2f_deep_copy,
-
-    cfp_array2f_rate,
-    cfp_array2f_set_rate,
-    cfp_array2f_cache_size,
-    cfp_array2f_set_cache_size,
-    cfp_array2f_clear_cache,
-    cfp_array2f_flush_cache,
-    cfp_array2f_compressed_size,
-    cfp_array2f_compressed_data,
-    cfp_array2f_size,
-    cfp_array2f_size_x,
-    cfp_array2f_size_y,
-    cfp_array2f_resize,
-
-    cfp_array2f_get_array,
-    cfp_array2f_set_array,
-    cfp_array2f_get_flat,
-    cfp_array2f_set_flat,
-    cfp_array2f_get,
-    cfp_array2f_set,
-  },
-  // array2d
-  {
-    cfp_array2d_ctor_default,
-    cfp_array2d_ctor,
-    cfp_array2d_ctor_copy,
-    cfp_array2d_dtor,
-
-    cfp_array2d_deep_copy,
-
-    cfp_array2d_rate,
-    cfp_array2d_set_rate,
-    cfp_array2d_cache_size,
-    cfp_array2d_set_cache_size,
-    cfp_array2d_clear_cache,
-    cfp_array2d_flush_cache,
-    cfp_array2d_compressed_size,
-    cfp_array2d_compressed_data,
-    cfp_array2d_size,
-    cfp_array2d_size_x,
-    cfp_array2d_size_y,
-    cfp_array2d_resize,
-
-    cfp_array2d_get_array,
-    cfp_array2d_set_array,
-    cfp_array2d_get_flat,
-    cfp_array2d_set_flat,
-    cfp_array2d_get,
-    cfp_array2d_set,
-  },
-  // array3f
-  {
-    cfp_array3f_ctor_default,
-    cfp_array3f_ctor,
-    cfp_array3f_ctor_copy,
-    cfp_array3f_dtor,
-
-    cfp_array3f_deep_copy,
-
-    cfp_array3f_rate,
-    cfp_array3f_set_rate,
-    cfp_array3f_cache_size,
-    cfp_array3f_set_cache_size,
-    cfp_array3f_clear_cache,
-    cfp_array3f_flush_cache,
-    cfp_array3f_compressed_size,
-    cfp_array3f_compressed_data,
-    cfp_array3f_size,
-    cfp_array3f_size_x,
-    cfp_array3f_size_y,
-    cfp_array3f_size_z,
-    cfp_array3f_resize,
-
-    cfp_array3f_get_array,
-    cfp_array3f_set_array,
-    cfp_array3f_get_flat,
-    cfp_array3f_set_flat,
-    cfp_array3f_get,
-    cfp_array3f_set,
-  },
-  // array3d
-  {
-    cfp_array3d_ctor_default,
-    cfp_array3d_ctor,
-    cfp_array3d_ctor_copy,
-    cfp_array3d_dtor,
-
-    cfp_array3d_deep_copy,
-
-    cfp_array3d_rate,
-    cfp_array3d_set_rate,
-    cfp_array3d_cache_size,
-    cfp_array3d_set_cache_size,
-    cfp_array3d_clear_cache,
-    cfp_array3d_flush_cache,
-    cfp_array3d_compressed_size,
-    cfp_array3d_compressed_data,
-    cfp_array3d_size,
-    cfp_array3d_size_x,
-    cfp_array3d_size_y,
-    cfp_array3d_size_z,
-    cfp_array3d_resize,
-
-    cfp_array3d_get_array,
-    cfp_array3d_set_array,
-    cfp_array3d_get_flat,
-    cfp_array3d_set_flat,
-    cfp_array3d_get,
-    cfp_array3d_set,
-  },
-};
diff --git a/cfp/template/cfparray.cpp b/cfp/template/cfparray.cpp
new file mode 100644
index 00000000..70bb3c2d
--- /dev/null
+++ b/cfp/template/cfparray.cpp
@@ -0,0 +1,136 @@
+// common constructor, destructor
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor_default)()
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE();
+  return a;
+}
+
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor_copy)(CFP_ARRAY_TYPE src)
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE(*static_cast<const ZFP_ARRAY_TYPE *>(src.object));
+  return a;
+}
+
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor_header)(CFP_HEADER_TYPE h, const void* buffer, size_t buffer_size_bytes)
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE(*static_cast<zfp::array::header*>(h.object), buffer, buffer_size_bytes);
+  return a;
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, dtor)(CFP_ARRAY_TYPE self)
+{
+  delete static_cast<ZFP_ARRAY_TYPE*>(self.object);
+}
+
+// functions defined in zfparray.h (base class)
+static double
+_t1(CFP_ARRAY_TYPE, rate)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->rate();
+}
+
+static double
+_t1(CFP_ARRAY_TYPE, set_rate)(CFP_ARRAY_TYPE self, double rate)
+{
+  return static_cast<ZFP_ARRAY_TYPE*>(self.object)->set_rate(rate);
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_bytes)(CFP_ARRAY_TYPE self, uint mask)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_bytes(mask);
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, compressed_size)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->compressed_size();
+}
+
+static void*
+_t1(CFP_ARRAY_TYPE, compressed_data)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->compressed_data();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, deep_copy)(CFP_ARRAY_TYPE self, const CFP_ARRAY_TYPE src)
+{
+  *static_cast<ZFP_ARRAY_TYPE*>(self.object) = *static_cast<const ZFP_ARRAY_TYPE*>(src.object);
+}
+
+// functions defined in subclasses
+static size_t
+_t1(CFP_ARRAY_TYPE, size)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, cache_size)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->cache_size();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set_cache_size)(CFP_ARRAY_TYPE self, size_t bytes)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->set_cache_size(bytes);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, clear_cache)(CFP_ARRAY_TYPE self)
+{
+  static_cast<const ZFP_ARRAY_TYPE*>(self.object)->clear_cache();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, flush_cache)(CFP_ARRAY_TYPE self)
+{
+  static_cast<const ZFP_ARRAY_TYPE*>(self.object)->flush_cache();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, get_array)(CFP_ARRAY_TYPE self, ZFP_SCALAR_TYPE * p)
+{
+  static_cast<const ZFP_ARRAY_TYPE*>(self.object)->get(p);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set_array)(CFP_ARRAY_TYPE self, const ZFP_SCALAR_TYPE * p)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->set(p);
+}
+
+static ZFP_SCALAR_TYPE
+_t1(CFP_ARRAY_TYPE, get_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->operator[](i);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set_flat)(CFP_ARRAY_TYPE self, size_t i, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->operator[](i) = val;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, ptr)(CFP_REF_TYPE self)
+{
+  CFP_PTR_TYPE p;
+  p.reference = self;
+  return p;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, ref)(CFP_PTR_TYPE self)
+{
+  return self.reference;
+}
diff --git a/cfp/template/cfparray1.cpp b/cfp/template/cfparray1.cpp
new file mode 100644
index 00000000..6ebc19ce
--- /dev/null
+++ b/cfp/template/cfparray1.cpp
@@ -0,0 +1,332 @@
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor)(size_t n, double rate, const ZFP_SCALAR_TYPE* p, size_t cache_size)
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE(n, rate, p, cache_size);
+  return a;
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE self, size_t n, zfp_bool clear)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->resize(n, !!clear);
+}
+
+static ZFP_SCALAR_TYPE
+_t1(CFP_ARRAY_TYPE, get)(CFP_ARRAY_TYPE self, size_t i)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->operator()(i);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE self, size_t i, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->operator()(i) = val;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  r.x = i;
+  return r;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  r.x = i;
+  return r;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref)(self, i);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref_flat)(self, i);
+  return p;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, begin)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, end)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_x();
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, get)(CFP_REF_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, set)(CFP_REF_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, copy)(CFP_REF_TYPE self, CFP_REF_TYPE src)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x) =
+    static_cast<const ZFP_ARRAY_TYPE*>(src.array.object)->operator()(src.x);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, lt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x < rhs.reference.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, gt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x > rhs.reference.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, leq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x <= rhs.reference.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, geq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x >= rhs.reference.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x == rhs.reference.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, neq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, distance)(CFP_PTR_TYPE first, CFP_PTR_TYPE last)
+{
+  return last.reference.x - first.reference.x;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  p.reference.x += d;
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, prev)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  p.reference.x -= d;
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, inc)(CFP_PTR_TYPE p)
+{
+  p.reference.x++;
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, dec)(CFP_PTR_TYPE p)
+{
+  p.reference.x--;
+  return p;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get)(CFP_PTR_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set)(CFP_PTR_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set_at)(CFP_PTR_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, ref_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return self.reference;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, lt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && lhs.x < rhs.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, gt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && lhs.x > rhs.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, leq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && lhs.x <= rhs.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, geq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && lhs.x >= rhs.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && lhs.x == rhs.x;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, neq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, distance)(CFP_ITER_TYPE first, CFP_ITER_TYPE last)
+{
+   return last.x - first.x;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  it.x += d;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, prev)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  it.x -= d;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, inc)(CFP_ITER_TYPE it)
+{
+  it.x++;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, dec)(CFP_ITER_TYPE it)
+{
+  it.x--;
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get)(CFP_ITER_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set)(CFP_ITER_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set_at)(CFP_ITER_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x);
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x);
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, i)(CFP_ITER_TYPE self)
+{
+  return self.x;
+}
diff --git a/cfp/template/cfparray2.cpp b/cfp/template/cfparray2.cpp
new file mode 100644
index 00000000..f919ff09
--- /dev/null
+++ b/cfp/template/cfparray2.cpp
@@ -0,0 +1,468 @@
+// utility function: compute onedimensional offset from multidimensional index
+static ptrdiff_t
+ref_offset(const CFP_REF_TYPE& self)
+{
+  size_t nx = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_x();
+  return static_cast<ptrdiff_t>(self.x + nx * self.y);
+}
+
+// utility function: compute multidimensional index from onedimensional offset
+static void
+ref_set_offset(CFP_REF_TYPE& self, size_t offset)
+{
+  size_t nx = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_x();
+  self.x = offset % nx; offset /= nx;
+  self.y = offset;
+}
+
+// utility function: compute onedimensional offset from multidimensional index
+static ptrdiff_t
+iter_offset(const CFP_ITER_TYPE& self)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t nx = xmax - xmin;
+  size_t ny = ymax - ymin;
+  size_t x = self.x;
+  size_t y = self.y;
+  size_t p = 0;
+  if (y == ymax)
+    p += nx * ny;
+  else {
+    size_t m = ~size_t(3);
+    size_t by = std::max(y & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p += (by - ymin) * nx;
+    size_t bx = std::max(x & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p += (bx - xmin) * sy;
+    p += (y - by) * sx;
+    p += (x - bx);
+  }
+  return static_cast<ptrdiff_t>(p);
+}
+
+// utility function: compute multidimensional index from onedimensional offset
+static void
+iter_set_offset(CFP_ITER_TYPE& self, size_t offset)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t nx = xmax - xmin;
+  size_t ny = ymax - ymin;
+  size_t p = offset;
+  size_t x, y;
+  if (p == nx * ny) {
+    x = xmin;
+    y = ymax;
+  }
+  else {
+    size_t m = ~size_t(3);
+    size_t by = std::max((ymin + p / nx) & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p -= (by - ymin) * nx;
+    size_t bx = std::max((xmin + p / sy) & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p -= (bx - xmin) * sy;
+    y = by + p / sx; p -= (y - by) * sx;
+    x = bx + p;      p -= (x - bx);
+  }
+  self.x = x;
+  self.y = y;
+}
+
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor)(size_t nx, size_t ny, double rate, const ZFP_SCALAR_TYPE* p, size_t cache_size)
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE(nx, ny, rate, p, cache_size);
+  return a;
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_x)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_x();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_y)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_y();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE self, size_t nx, size_t ny, zfp_bool clear)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->resize(nx, ny, !!clear);
+}
+
+static ZFP_SCALAR_TYPE
+_t1(CFP_ARRAY_TYPE, get)(CFP_ARRAY_TYPE self, size_t i, size_t j)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->operator()(i, j);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE self, size_t i, size_t j, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->operator()(i, j) = val;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref)(CFP_ARRAY_TYPE self, size_t i, size_t j)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  r.x = i;
+  r.y = j;
+  return r;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  ref_set_offset(r, i);
+  return r;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr)(CFP_ARRAY_TYPE self, size_t i, size_t j)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref)(self, i, j);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref_flat)(self, i);
+  return p;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, begin)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  it.y = 0;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, end)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  it.y = static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_y();
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, get)(CFP_REF_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, set)(CFP_REF_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, copy)(CFP_REF_TYPE self, CFP_REF_TYPE src)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y) =
+    static_cast<const ZFP_ARRAY_TYPE*>(src.array.object)->operator()(src.x, src.y);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, lt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) < ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, gt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) > ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, leq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) <= ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, geq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) >= ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x == rhs.reference.x &&
+         lhs.reference.y == rhs.reference.y;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, neq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, distance)(CFP_PTR_TYPE first, CFP_PTR_TYPE last)
+{
+  return ref_offset(last.reference) - ref_offset(first.reference);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  ref_set_offset(p.reference, ref_offset(p.reference) + d);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, prev)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, -d);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, inc)(CFP_PTR_TYPE p)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, +1);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, dec)(CFP_PTR_TYPE p)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, -1);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get)(CFP_PTR_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set)(CFP_PTR_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set_at)(CFP_PTR_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, ref_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return self.reference;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, lt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) < iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, gt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) > iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, leq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) <= iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, geq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) >= iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object &&
+         lhs.x == rhs.x &&
+         lhs.y == rhs.y;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, neq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, distance)(CFP_ITER_TYPE first, CFP_ITER_TYPE last)
+{
+  return iter_offset(last) - iter_offset(first);
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  iter_set_offset(it, iter_offset(it) + d);
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, prev)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  iter_set_offset(it, iter_offset(it) - d);
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, inc)(CFP_ITER_TYPE it)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(it.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t m = ~size_t(3);
+  ++it.x;
+  if (!(it.x & 3u) || it.x == xmax) {
+    it.x = std::max((it.x - 1) & m, xmin);
+    ++it.y;
+    if (!(it.y & 3u) || it.y == ymax) {
+      it.y = std::max((it.y - 1) & m, ymin);
+      // done with block; advance to next
+      it.x = (it.x + 4) & m;
+      if (it.x >= xmax) {
+        it.x = xmin;
+        it.y = (it.y + 4) & m;
+        if (it.y >= ymax)
+          it.y = ymax;
+      }
+    }
+  }
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, dec)(CFP_ITER_TYPE it)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(it.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t m = ~size_t(3);
+  if (it.y == ymax) {
+    it.x = xmax - 1;
+    it.y = ymax - 1;
+  }
+  else {
+    if (!(it.x & 3u) || it.x == xmin) {
+      it.x = std::min((it.x + 4) & m, xmax);
+      if (!(it.y & 3u) || it.y == ymin) {
+        it.y = std::min((it.y + 4) & m, ymax);
+        // done with block; advance to next
+        it.x = (it.x - 1) & m;
+        if (it.x <= xmin) {
+          it.x = xmax;
+          it.y = (it.y - 1) & m;
+          if (it.y <= ymin)
+            it.y = ymin;
+        }
+      }
+      --it.y;
+    }
+    --it.x;
+  }
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get)(CFP_ITER_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set)(CFP_ITER_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set_at)(CFP_ITER_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x, self.y);
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x, self.y);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x, self.y);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x, self.y);
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, i)(CFP_ITER_TYPE self)
+{
+  return self.x;
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, j)(CFP_ITER_TYPE self)
+{
+  return self.y;
+}
diff --git a/cfp/template/cfparray3.cpp b/cfp/template/cfparray3.cpp
new file mode 100644
index 00000000..efbdc3ce
--- /dev/null
+++ b/cfp/template/cfparray3.cpp
@@ -0,0 +1,522 @@
+// utility function: compute onedimensional offset from multidimensional index
+static ptrdiff_t
+ref_offset(const CFP_REF_TYPE& self)
+{
+  size_t nx = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_x();
+  size_t ny = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_y();
+  return static_cast<ptrdiff_t>(self.x + nx * (self.y + ny * self.z));
+}
+
+// utility function: compute multidimensional index from onedimensional offset
+static void
+ref_set_offset(CFP_REF_TYPE& self, size_t offset)
+{
+  size_t nx = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_x();
+  size_t ny = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_y();
+  self.x = offset % nx; offset /= nx;
+  self.y = offset % ny; offset /= ny;
+  self.z = offset;
+}
+
+// utility function: compute onedimensional offset from multidimensional index
+static ptrdiff_t
+iter_offset(const CFP_ITER_TYPE& self)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t nx = xmax - xmin;
+  size_t ny = ymax - ymin;
+  size_t nz = zmax - zmin;
+  size_t x = self.x;
+  size_t y = self.y;
+  size_t z = self.z;
+  size_t p = 0;
+  if (z == zmax)
+    p += nx * ny * nz;
+  else {
+    size_t m = ~size_t(3);
+    size_t bz = std::max(z & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p += (bz - zmin) * nx * ny;
+    size_t by = std::max(y & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p += (by - ymin) * nx * sz;
+    size_t bx = std::max(x & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p += (bx - xmin) * sy * sz;
+    p += (z - bz) * sx * sy;
+    p += (y - by) * sx;
+    p += (x - bx);
+  }
+  return p;
+}
+
+// utility function: compute multidimensional index from onedimensional offset
+static void
+iter_set_offset(CFP_ITER_TYPE& self, size_t offset)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t nx = xmax - xmin;
+  size_t ny = ymax - ymin;
+  size_t nz = zmax - zmin;
+  size_t p = offset;
+  size_t x, y, z;
+  if (p == nx * ny * nz) {
+    x = xmin;
+    y = ymin;
+    z = zmax;
+  }
+  else {
+    size_t m = ~size_t(3);
+    size_t bz = std::max((zmin + p / (nx * ny)) & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p -= (bz - zmin) * nx * ny;
+    size_t by = std::max((ymin + p / (nx * sz)) & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p -= (by - ymin) * nx * sz;
+    size_t bx = std::max((xmin + p / (sy * sz)) & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p -= (bx - xmin) * sy * sz;
+    z = bz + p / (sx * sy); p -= (z - bz) * sx * sy;
+    y = by + p / sx;        p -= (y - by) * sx;
+    x = bx + p;             p -= (x - bx);
+  }
+  self.x = x;
+  self.y = y;
+  self.z = z;
+}
+
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor)(size_t nx, size_t ny, size_t nz, double rate, const ZFP_SCALAR_TYPE* p, size_t cache_size)
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE(nx, ny, nz, rate, p, cache_size);
+  return a;
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_x)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_x();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_y)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_y();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_z)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_z();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE self, size_t nx, size_t ny, size_t nz, zfp_bool clear)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->resize(nx, ny, nz, !!clear);
+}
+
+static ZFP_SCALAR_TYPE
+_t1(CFP_ARRAY_TYPE, get)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->operator()(i, j, k);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->operator()(i, j, k) = val;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  r.x = i;
+  r.y = j;
+  r.z = k;
+  return r;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  ref_set_offset(r, i);
+  return r;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref)(self, i, j, k);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref_flat)(self, i);
+  return p;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, begin)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  it.y = 0;
+  it.z = 0;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, end)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  it.y = 0;
+  it.z = static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_z();
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, get)(CFP_REF_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, set)(CFP_REF_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, copy)(CFP_REF_TYPE self, CFP_REF_TYPE src)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z) =
+    static_cast<const ZFP_ARRAY_TYPE*>(src.array.object)->operator()(src.x, src.y, src.z);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, lt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) < ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, gt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) > ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, leq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) <= ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, geq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) >= ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x == rhs.reference.x &&
+         lhs.reference.y == rhs.reference.y &&
+         lhs.reference.z == rhs.reference.z;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, neq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, distance)(CFP_PTR_TYPE first, CFP_PTR_TYPE last)
+{
+  return ref_offset(last.reference) - ref_offset(first.reference);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  ref_set_offset(p.reference, ref_offset(p.reference) + d);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, prev)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, -d);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, inc)(CFP_PTR_TYPE p)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, +1);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, dec)(CFP_PTR_TYPE p)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, -1);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get)(CFP_PTR_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set)(CFP_PTR_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set_at)(CFP_PTR_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, ref_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return self.reference;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, lt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) < iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, gt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) > iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, leq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) <= iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, geq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) >= iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object &&
+         lhs.x == rhs.x &&
+         lhs.y == rhs.y &&
+         lhs.z == rhs.z;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, neq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, distance)(CFP_ITER_TYPE first, CFP_ITER_TYPE last)
+{
+  return iter_offset(last) - iter_offset(first);
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  iter_set_offset(it, iter_offset(it) + d);
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, prev)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  iter_set_offset(it, iter_offset(it) - d);
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, inc)(CFP_ITER_TYPE it)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(it.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t m = ~size_t(3);
+  ++it.x;
+  if (!(it.x & 3u) || it.x == xmax) {
+    it.x = std::max((it.x - 1) & m, xmin);
+    ++it.y;
+    if (!(it.y & 3u) || it.y == ymax) {
+      it.y = std::max((it.y - 1) & m, ymin);
+      ++it.z;
+      if (!(it.z & 3u) || it.z == zmax) {
+        it.z = std::max((it.z - 1) & m, zmin);
+        // done with block; advance to next
+        it.x = (it.x + 4) & m;
+        if (it.x >= xmax) {
+          it.x = xmin;
+          it.y = (it.y + 4) & m;
+          if (it.y >= ymax) {
+            it.y = ymin;
+            it.z = (it.z + 4) & m;
+            if (it.z >= zmax)
+              it.z = zmax;
+          }
+        }
+      }
+    }
+  }
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, dec)(CFP_ITER_TYPE it)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(it.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t m = ~size_t(3);
+  if (it.z == zmax) {
+    it.x = xmax - 1;
+    it.y = ymax - 1;
+    it.z = zmax - 1;
+  }
+  else {
+    if (!(it.x & 3u) || it.x == xmin) {
+      it.x = std::min((it.x + 4) & m, xmax);
+      if (!(it.y & 3u) || it.y == ymin) {
+        it.y = std::min((it.y + 4) & m, ymax);
+        if (!(it.z & 3u) || it.z == zmin) {
+          it.z = std::min((it.z + 4) & m, zmax);
+          // done with block; advance to next
+          it.x = (it.x - 1) & m;
+          if (it.x <= xmin) {
+            it.x = xmax;
+            it.y = (it.y - 1) & m;
+            if (it.y <= ymin) {
+              it.y = ymax;
+              it.z = (it.z - 1) & m;
+              if (it.z <= zmin)
+                it.z = zmin;
+            }
+          }
+        }
+        --it.z;
+      }
+      --it.y;
+    }
+    --it.x;
+  }
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get)(CFP_ITER_TYPE self)
+{
+  return static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set)(CFP_ITER_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set_at)(CFP_ITER_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x, self.y, self.z);
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x, self.y, self.z);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x, self.y, self.z);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x, self.y, self.z);
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, i)(CFP_ITER_TYPE self)
+{
+  return self.x;
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, j)(CFP_ITER_TYPE self)
+{
+  return self.y;
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, k)(CFP_ITER_TYPE self)
+{
+  return self.z;
+}
diff --git a/cfp/template/cfparray4.cpp b/cfp/template/cfparray4.cpp
new file mode 100644
index 00000000..d55ef47f
--- /dev/null
+++ b/cfp/template/cfparray4.cpp
@@ -0,0 +1,576 @@
+// utility function: compute onedimensional offset from multidimensional index
+static ptrdiff_t
+ref_offset(const CFP_REF_TYPE& self)
+{
+  size_t nx = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_x();
+  size_t ny = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_y();
+  size_t nz = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_z();
+  return static_cast<ptrdiff_t>(self.x + nx * (self.y + ny * (self.z + nz * self.w)));
+}
+
+// utility function: compute multidimensional index from onedimensional offset
+static void
+ref_set_offset(CFP_REF_TYPE& self, size_t offset)
+{
+  size_t nx = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_x();
+  size_t ny = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_y();
+  size_t nz = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->size_z();
+  self.x = offset % nx; offset /= nx;
+  self.y = offset % ny; offset /= ny;
+  self.z = offset % nz; offset /= nz;
+  self.w = offset;
+}
+
+// utility function: compute onedimensional offset from multidimensional index
+static ptrdiff_t
+iter_offset(const CFP_ITER_TYPE& self)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t wmin = 0;
+  size_t wmax = container->size_w();
+  size_t nx = xmax - xmin;
+  size_t ny = ymax - ymin;
+  size_t nz = zmax - zmin;
+  size_t nw = wmax - wmin;
+  size_t x = self.x;
+  size_t y = self.y;
+  size_t z = self.z;
+  size_t w = self.w;
+  size_t p = 0;
+  if (w == wmax)
+    p += nx * ny * nz * nw;
+  else {
+    size_t m = ~size_t(3);
+    size_t bw = std::max(w & m, wmin); size_t sw = std::min((bw + 4) & m, wmax) - bw; p += (bw - wmin) * nx * ny * nz;
+    size_t bz = std::max(z & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p += (bz - zmin) * nx * ny * sw;
+    size_t by = std::max(y & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p += (by - ymin) * nx * sz * sw;
+    size_t bx = std::max(x & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p += (bx - xmin) * sy * sz * sw;
+    p += (w - bw) * sx * sy * sz;
+    p += (z - bz) * sx * sy;
+    p += (y - by) * sx;
+    p += (x - bx);
+  }
+  return static_cast<ptrdiff_t>(p);
+}
+
+// utility function: compute multidimensional index from onedimensional offset
+static void
+iter_set_offset(CFP_ITER_TYPE& self, size_t offset)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(self.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t wmin = 0;
+  size_t wmax = container->size_w();
+  size_t nx = xmax - xmin;
+  size_t ny = ymax - ymin;
+  size_t nz = zmax - zmin;
+  size_t nw = wmax - wmin;
+  size_t p = offset;
+  size_t x, y, z, w;
+  if (p == nx * ny * nz * nw) {
+    x = xmin;
+    y = ymin;
+    z = zmin;
+    w = wmax;
+  }
+  else {
+    size_t m = ~size_t(3);
+    size_t bw = std::max((wmin + p / (nx * ny * nz)) & m, wmin); size_t sw = std::min((bw + 4) & m, wmax) - bw; p -= (bw - wmin) * nx * ny * nz;
+    size_t bz = std::max((zmin + p / (nx * ny * sw)) & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p -= (bz - zmin) * nx * ny * sw;
+    size_t by = std::max((ymin + p / (nx * sz * sw)) & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p -= (by - ymin) * nx * sz * sw;
+    size_t bx = std::max((xmin + p / (sy * sz * sw)) & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p -= (bx - xmin) * sy * sz * sw;
+    w = bw + p / (sx * sy * sz); p -= (w - bw) * sx * sy * sz;
+    z = bz + p / (sx * sy);      p -= (z - bz) * sx * sy;
+    y = by + p / sx;             p -= (y - by) * sx;
+    x = bx + p;                  p -= (x - bx);
+  }
+  self.x = x;
+  self.y = y;
+  self.z = z;
+  self.w = w;
+}
+
+static CFP_ARRAY_TYPE
+_t1(CFP_ARRAY_TYPE, ctor)(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const ZFP_SCALAR_TYPE* p, size_t cache_size)
+{
+  CFP_ARRAY_TYPE a;
+  a.object = new ZFP_ARRAY_TYPE(nx, ny, nz, nw, rate, p, cache_size);
+  return a;
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_x)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_x();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_y)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_y();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_z)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_z();
+}
+
+static size_t
+_t1(CFP_ARRAY_TYPE, size_w)(CFP_ARRAY_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_w();
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, resize)(CFP_ARRAY_TYPE self, size_t nx, size_t ny, size_t nz, size_t nw, zfp_bool clear)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->resize(nx, ny, nz, nw, !!clear);
+}
+
+static ZFP_SCALAR_TYPE
+_t1(CFP_ARRAY_TYPE, get)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k, size_t l)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.object)->operator()(i, j, k, l);
+}
+
+static void
+_t1(CFP_ARRAY_TYPE, set)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k, size_t l, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.object)->operator()(i, j, k, l) = val;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k, size_t l)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  r.x = i;
+  r.y = j;
+  r.z = k;
+  r.w = l;
+  return r;
+}
+
+static CFP_REF_TYPE
+_t1(CFP_ARRAY_TYPE, ref_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_REF_TYPE r;
+  r.array = self;
+  ref_set_offset(r, i);
+  return r;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr)(CFP_ARRAY_TYPE self, size_t i, size_t j, size_t k, size_t l)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref)(self, i, j, k, l);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t1(CFP_ARRAY_TYPE, ptr_flat)(CFP_ARRAY_TYPE self, size_t i)
+{
+  CFP_PTR_TYPE p;
+  p.reference = _t1(CFP_ARRAY_TYPE, ref_flat)(self, i);
+  return p;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, begin)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  it.y = 0;
+  it.z = 0;
+  it.w = 0;
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t1(CFP_ARRAY_TYPE, end)(CFP_ARRAY_TYPE self)
+{
+  CFP_ITER_TYPE it;
+  it.array = self;
+  it.x = 0;
+  it.y = 0;
+  it.z = 0;
+  it.w = static_cast<const ZFP_ARRAY_TYPE*>(self.object)->size_w();
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, get)(CFP_REF_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, set)(CFP_REF_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_REF_TYPE, copy)(CFP_REF_TYPE self, CFP_REF_TYPE src)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w) =
+    static_cast<const ZFP_ARRAY_TYPE*>(src.array.object)->operator()(src.x, src.y, src.z, src.w);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, lt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) < ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, gt)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) > ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, leq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) <= ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, geq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object && ref_offset(lhs.reference) >= ref_offset(rhs.reference);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return lhs.reference.array.object == rhs.reference.array.object &&
+         lhs.reference.x == rhs.reference.x &&
+         lhs.reference.y == rhs.reference.y &&
+         lhs.reference.z == rhs.reference.z &&
+         lhs.reference.w == rhs.reference.w;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, neq)(CFP_PTR_TYPE lhs, CFP_PTR_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, distance)(CFP_PTR_TYPE first, CFP_PTR_TYPE last)
+{
+  return ref_offset(last.reference) - ref_offset(first.reference);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  ref_set_offset(p.reference, ref_offset(p.reference) + d);
+  return p;
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, prev)(CFP_PTR_TYPE p, ptrdiff_t d)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, -d);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, inc)(CFP_PTR_TYPE p)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, +1);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, dec)(CFP_PTR_TYPE p)
+{
+  return _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(p, -1);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get)(CFP_PTR_TYPE self)
+{
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z, self.reference.w);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, get_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return static_cast<const ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z, self.reference.w);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set)(CFP_PTR_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z, self.reference.w) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, set_at)(CFP_PTR_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.reference.array.object)->operator()(self.reference.x, self.reference.y, self.reference.z, self.reference.w) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, ref_at)(CFP_PTR_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_PTR_TYPE, next)(self, d);
+  return self.reference;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, lt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) < iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, gt)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) > iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, leq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) <= iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, geq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object && iter_offset(lhs) >= iter_offset(rhs);
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return lhs.array.object == rhs.array.object &&
+         lhs.x == rhs.x &&
+         lhs.y == rhs.y &&
+         lhs.z == rhs.z &&
+         lhs.w == rhs.w;
+}
+
+static zfp_bool
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, neq)(CFP_ITER_TYPE lhs, CFP_ITER_TYPE rhs)
+{
+  return !_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, eq)(lhs, rhs);
+}
+
+static ptrdiff_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, distance)(CFP_ITER_TYPE first, CFP_ITER_TYPE last)
+{
+  return iter_offset(last) - iter_offset(first);
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  iter_set_offset(it, iter_offset(it) + d);
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, prev)(CFP_ITER_TYPE it, ptrdiff_t d)
+{
+  iter_set_offset(it, iter_offset(it) - d);
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, inc)(CFP_ITER_TYPE it)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(it.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t wmin = 0;
+  size_t wmax = container->size_w();
+  size_t m = ~size_t(3);
+  ++it.x;
+  if (!(it.x & 3u) || it.x == xmax) {
+    it.x = std::max((it.x - 1) & m, xmin);
+    ++it.y;
+    if (!(it.y & 3u) || it.y == ymax) {
+      it.y = std::max((it.y - 1) & m, ymin);
+      ++it.z;
+      if (!(it.z & 3u) || it.z == zmax) {
+        it.z = std::max((it.z - 1) & m, zmin);
+        ++it.w;
+        if (!(it.w & 3u) || it.w == wmax) {
+          it.w = std::max((it.w - 1) & m, wmin);
+          // done with block; advance to next
+          it.x = (it.x + 4) & m;
+          if (it.x >= xmax) {
+            it.x = xmin;
+            it.y = (it.y + 4) & m;
+            if (it.y >= ymax) {
+              it.y = ymin;
+              it.z = (it.z + 4) & m;
+              if (it.z >= zmax) {
+                it.z = zmin;
+                it.w = (it.w + 4) & m;
+                if (it.w >= wmax)
+                  it.w = wmax;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return it;
+}
+
+static CFP_ITER_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, dec)(CFP_ITER_TYPE it)
+{
+  const ZFP_ARRAY_TYPE* container = static_cast<const ZFP_ARRAY_TYPE*>(it.array.object);
+  size_t xmin = 0;
+  size_t xmax = container->size_x();
+  size_t ymin = 0;
+  size_t ymax = container->size_y();
+  size_t zmin = 0;
+  size_t zmax = container->size_z();
+  size_t wmin = 0;
+  size_t wmax = container->size_w();
+  size_t m = ~size_t(3);
+  if (it.w == wmax) {
+    it.x = xmax - 1;
+    it.y = ymax - 1;
+    it.z = zmax - 1;
+    it.w = wmax - 1;
+  }
+  else {
+    if (!(it.x & 3u) || it.x == xmin) {
+      it.x = std::min((it.x + 4) & m, xmax);
+      if (!(it.y & 3u) || it.y == ymin) {
+        it.y = std::min((it.y + 4) & m, ymax);
+        if (!(it.z & 3u) || it.z == zmin) {
+          it.z = std::min((it.z + 4) & m, zmax);
+          if (!(it.w & 3u) || it.w == wmin) {
+            it.w = std::min((it.w + 4) & m, wmax);
+            // done with block; advance to next
+            it.x = (it.x - 1) & m;
+            if (it.x <= xmin) {
+              it.x = xmax;
+              it.y = (it.y - 1) & m;
+              if (it.y <= ymin) {
+                it.y = ymax;
+                it.z = (it.z - 1) & m;
+                if (it.z <= zmin) {
+                  it.z = zmax;
+                  it.w = (it.w - 1) & m;
+                  if (it.w <= wmin)
+                    it.w = wmin;
+                }
+              }
+            }
+          }
+          --it.w;
+        }
+        --it.z;
+      }
+      --it.y;
+    }
+    --it.x;
+  }
+  return it;
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get)(CFP_ITER_TYPE self)
+{
+  return static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w);
+}
+
+static ZFP_SCALAR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, get_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w);
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set)(CFP_ITER_TYPE self, ZFP_SCALAR_TYPE val)
+{
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w) = val;
+}
+
+static void
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, set_at)(CFP_ITER_TYPE self, ptrdiff_t d, ZFP_SCALAR_TYPE val)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  static_cast<ZFP_ARRAY_TYPE*>(self.array.object)->operator()(self.x, self.y, self.z, self.w) = val;
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x, self.y, self.z, self.w);
+}
+
+static CFP_REF_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ref_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ref)(self.array, self.x, self.y, self.z, self.w);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr)(CFP_ITER_TYPE self)
+{
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x, self.y, self.z, self.w);
+}
+
+static CFP_PTR_TYPE
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, ptr_at)(CFP_ITER_TYPE self, ptrdiff_t d)
+{
+  self = _t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, next)(self, d);
+  return _t1(CFP_ARRAY_TYPE, ptr)(self.array, self.x, self.y, self.z, self.w);
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, i)(CFP_ITER_TYPE self)
+{
+  return self.x;
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, j)(CFP_ITER_TYPE self)
+{
+  return self.y;
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, k)(CFP_ITER_TYPE self)
+{
+  return self.z;
+}
+
+static size_t
+_t2(CFP_ARRAY_TYPE, CFP_ITER_TYPE, l)(CFP_ITER_TYPE self)
+{
+  return self.w;
+}
diff --git a/cfp/template/cfpheader.cpp b/cfp/template/cfpheader.cpp
new file mode 100644
index 00000000..b9f61917
--- /dev/null
+++ b/cfp/template/cfpheader.cpp
@@ -0,0 +1,166 @@
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_buffer)(const void* data, size_t bytes)
+{
+  CFP_HEADER_TYPE h;
+  h.object = 0;
+
+  try {
+    // construct generic header and query array type
+    header hdr(data, bytes);
+    uint dims = hdr.dimensionality();
+    zfp_type scalar_type = hdr.scalar_type();
+    // construct array-specific header
+    switch (dims) {
+      case 1:
+        if (scalar_type == zfp_type_float)
+          h.object = new zfp::array1f::header(data, bytes);
+        else if (scalar_type == zfp_type_double)
+          h.object = new zfp::array1d::header(data, bytes);
+        break;
+      case 2:
+        if (scalar_type == zfp_type_float)
+          h.object = new zfp::array2f::header(data, bytes);
+        else if (scalar_type == zfp_type_double)
+          h.object = new zfp::array2d::header(data, bytes);
+        break;
+      case 3:
+        if (scalar_type == zfp_type_float)
+          h.object = new zfp::array3f::header(data, bytes);
+        else if (scalar_type == zfp_type_double)
+          h.object = new zfp::array3d::header(data, bytes);
+        break;
+      case 4:
+        if (scalar_type == zfp_type_float)
+          h.object = new zfp::array4f::header(data, bytes);
+        else if (scalar_type == zfp_type_double)
+          h.object = new zfp::array4d::header(data, bytes);
+        break;
+    }
+  }
+  catch (...) {}
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array1f)(cfp_array1f a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array1f::header(*static_cast<zfp::array1f*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array1d)(cfp_array1d a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array1d::header(*static_cast<zfp::array1d*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array2f)(cfp_array2f a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array2f::header(*static_cast<zfp::array2f*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array2d)(cfp_array2d a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array2d::header(*static_cast<zfp::array2d*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array3f)(cfp_array3f a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array3f::header(*static_cast<zfp::array3f*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array3d)(cfp_array3d a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array3d::header(*static_cast<zfp::array3d*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array4f)(cfp_array4f a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array4f::header(*static_cast<zfp::array4f*>(a.object));
+  return h;
+}
+
+static CFP_HEADER_TYPE
+_t1(CFP_HEADER_TYPE, ctor_array4d)(cfp_array4d a)
+{
+  CFP_HEADER_TYPE h;
+  h.object = new zfp::array4d::header(*static_cast<zfp::array4d*>(a.object));
+  return h;
+}
+
+static void
+_t1(CFP_HEADER_TYPE, dtor)(CFP_HEADER_TYPE self)
+{
+  delete static_cast<ZFP_HEADER_TYPE*>(self.object);
+}
+
+static zfp_type
+_t1(CFP_HEADER_TYPE, scalar_type)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->scalar_type();
+}
+
+static uint
+_t1(CFP_HEADER_TYPE, dimensionality)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->dimensionality();
+}
+
+static size_t
+_t1(CFP_HEADER_TYPE, size_x)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->size_x();
+}
+
+static size_t
+_t1(CFP_HEADER_TYPE, size_y)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->size_y();
+}
+
+static size_t
+_t1(CFP_HEADER_TYPE, size_z)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->size_z();
+}
+
+static size_t
+_t1(CFP_HEADER_TYPE, size_w)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->size_w();
+}
+
+static double
+_t1(CFP_HEADER_TYPE, rate)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->rate();
+}
+
+static const void*
+_t1(CFP_HEADER_TYPE, data)(CFP_HEADER_TYPE self)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->data();
+}
+
+static size_t
+_t1(CFP_HEADER_TYPE, size_bytes)(CFP_HEADER_TYPE self, uint mask)
+{
+  return static_cast<const ZFP_HEADER_TYPE*>(self.object)->size_bytes(mask);
+}
diff --git a/cmake/appveyor.cmake b/cmake/appveyor.cmake
index 6633b16b..29cc7906 100644
--- a/cmake/appveyor.cmake
+++ b/cmake/appveyor.cmake
@@ -11,7 +11,10 @@ set(CTEST_SITE "appveyor")
 set(CTEST_CMAKE_GENERATOR "${GENERATOR}")
 set(CTEST_BUILD_NAME "$ENV{APPVEYOR_REPO_BRANCH}-${job_details}")
 set(cfg_options
+  -DCMAKE_BUILD_TYPE=$ENV{BUILD_TYPE}
+  -DBUILD_TESTING_FULL=ON
   -DBUILD_CFP=${BUILD_CFP}
+  -DBUILD_ZFPY=${BUILD_ZFPY}
   -DZFP_WITH_OPENMP=${BUILD_OPENMP}
   -DZFP_WITH_CUDA=${BUILD_CUDA}
   )
@@ -46,6 +49,19 @@ if(BUILD_CFP)
   endif()
 endif()
 
+if(BUILD_ZFPY)
+  set(CTEST_SITE "${CTEST_SITE}_zfpy$ENV{PYTHON_VERSION}")
+
+  # sanitize python include dir path (ex. windows vs linux slashes)
+  set(PYTHON_INCLUDE_DIR "")
+  file(TO_CMAKE_PATH "${CTEST_SOURCE_DIRECTORY}\\$ENV{VIRTUALENV_NAME}\\Include" PYTHON_INCLUDE_DIR)
+
+  list(APPEND cfg_options
+    -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR}
+    -DPYTHON_LIBRARY=$ENV{PYTHON_LIB_PATH}
+    )
+endif()
+
 if(OMP_TESTS_ONLY)
   list(APPEND cfg_options
     -DZFP_OMP_TESTS_ONLY=1
diff --git a/cmake/travis.cmake b/cmake/travis.cmake
deleted file mode 100644
index f2bf844b..00000000
--- a/cmake/travis.cmake
+++ /dev/null
@@ -1,87 +0,0 @@
-
-set(CTEST_SOURCE_DIRECTORY "$ENV{TRAVIS_BUILD_DIR}")
-set(CTEST_BINARY_DIRECTORY "$ENV{TRAVIS_BUILD_DIR}/build")
-
-set(CTEST_COMMAND ctest)
-include(${CTEST_SOURCE_DIRECTORY}/CTestConfig.cmake)
-set(CTEST_SITE "travis")
-set(CTEST_CMAKE_GENERATOR "Unix Makefiles")
-set(CTEST_BUILD_NAME "$ENV{TRAVIS_BRANCH}-#$ENV{TRAVIS_JOB_NUMBER}")
-set(cfg_options
-  -DCMAKE_C_STANDARD=${C_STANDARD}
-  -DCMAKE_CXX_STANDARD=${CXX_STANDARD}
-  -DBUILD_CFP=${BUILD_CFP}
-  -DBUILD_ZFPY=${BUILD_ZFPY}
-  -DBUILD_ZFORP=${BUILD_ZFORP}
-  -DZFP_WITH_OPENMP=${BUILD_OPENMP}
-  -DZFP_WITH_CUDA=${BUILD_CUDA}
-  )
-
-# Add the variants to the testers name so that we can report multiple
-# times from the same CI builder
-if(BUILD_OPENMP)
-  set(CTEST_SITE "${CTEST_SITE}_openmp")
-endif()
-
-if(BUILD_CUDA)
-  set(CTEST_SITE "${CTEST_SITE}_cuda")
-endif()
-
-if(BUILD_CFP)
-  set(CTEST_SITE "${CTEST_SITE}_cfp")
-
-  if(CFP_NAMESPACE)
-    list(APPEND cfg_options
-      -DCFP_NAMESPACE=${CFP_NAMESPACE}
-      )
-    set(CTEST_SITE "${CTEST_SITE}namespace")
-  endif()
-endif()
-
-if(BUILD_ZFPY)
-  set(CTEST_SITE "${CTEST_SITE}_zfpy$ENV{PYTHON_VERSION}")
-  list(APPEND cfg_options
-    -DPYTHON_INCLUDE_DIR=$ENV{PYTHON_INCLUDE_DIR}
-    -DPYTHON_LIBRARY=$ENV{PYTHON_LIBRARY}
-    -DPYTHON_EXECUTABLE=$ENV{PYTHON_EXECUTABLE}
-    )
-endif()
-
-if(BUILD_ZFORP)
-  set(CTEST_SITE "${CTEST_SITE}_zforp$ENV{FORTRAN_STANDARD}")
-  list(APPEND cfg_options
-    -DCMAKE_FORTRAN_FLAGS='-std=f$ENV{FORTRAN_STANDARD}'
-    )
-endif()
-
-if(WITH_COVERAGE)
-  list(APPEND cfg_options
-    -DCMAKE_C_FLAGS=-coverage
-    -DCMAKE_CXX_FLAGS=-coverage
-    -DCMAKE_Fortran_FLAGS=-coverage
-    )
-  set(CTEST_SITE "${CTEST_SITE}_coverage")
-endif()
-
-if(OMP_TESTS_ONLY)
-  list(APPEND cfg_options
-    -DZFP_OMP_TESTS_ONLY=1
-    )
-endif()
-
-ctest_start(Experimental TRACK Travis)
-ctest_configure(OPTIONS "${cfg_options}")
-ctest_submit(PARTS Update Notes Configure)
-ctest_build(FLAGS -j1)
-ctest_submit(PARTS Build)
-ctest_test(PARALLEL_LEVEL 6 RETURN_VALUE rv)
-ctest_submit(PARTS Test)
-
-if(WITH_COVERAGE)
-  ctest_coverage()
-  ctest_submit(PARTS Coverage)
-endif()
-
-if(NOT rv EQUAL 0)
-  message(FATAL_ERROR "Test failures occurred.")
-endif()
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 00000000..22a0d97e
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,23 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = /usr/bin/python3 -msphinx
+SPHINXPROJ    = zfp
+SOURCEDIR     = source
+BUILDDIR      = build
+
+all:
+	@$(MAKE) html
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 00000000..5601f5c0
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,36 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=python -msphinx
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+set SPHINXPROJ=zfp
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
+	echo.then set the SPHINXBUILD environment variable to point to the full
+	echo.path of the 'sphinx-build' executable. Alternatively you may add the
+	echo.Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..f0c7424a
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,4 @@
+sphinx-fortran==1.1.1
+
+# required by sphinx-fortran but not installed on RTD
+six
diff --git a/docs/source/algorithm.rst b/docs/source/algorithm.rst
new file mode 100644
index 00000000..fd4c72f3
--- /dev/null
+++ b/docs/source/algorithm.rst
@@ -0,0 +1,155 @@
+.. include:: defs.rst
+
+.. _algorithm:
+
+Algorithm
+=========
+
+|zfp| uses two different algorithms to support :ref:`lossy <algorithm-lossy>`
+and :ref:`lossless <algorithm-lossless>` compression.  These algorithms are
+described in detail below.
+
+.. _algorithm-lossy:
+
+Lossy Compression
+-----------------
+
+The |zfp| lossy compression scheme is based on the idea of breaking a
+*d*-dimensional array into independent blocks of |4powd| values each,
+e.g., |4by4by4| values in three dimensions.  Each block is
+compressed/decompressed entirely independently from all other blocks.  In
+this sense, |zfp| is similar to current hardware texture compression schemes
+for image coding implemented on graphics cards and mobile devices.
+
+The lossy compression scheme implemented in this version of |zfp| has evolved
+from the method described in the :ref:`original paper <tvcg-paper>`, and can
+conceptually be thought of as consisting of eight sequential steps (in
+practice some steps are consolidated or exist only for illustrative
+purposes):
+
+  1. The *d*-dimensional array is partitioned into blocks of dimensions
+     |4powd|.  If the array dimensions are not multiples of four, then
+     blocks near the boundary are padded to the next multiple of four.  This
+     padding is invisible to the application.
+
+  2. The independent floating-point values in a block are converted to what
+     is known as a block-floating-point representation, which uses a single,
+     common floating-point exponent for all |4powd| values.  The effect of
+     this conversion is to turn each floating-point value into a 31- or 63-bit
+     signed integer.  If the values in the block are all zero or are smaller
+     in magnitude than the fixed-accuracy tolerance (see below), then only a
+     single bit is stored with the block to indicate that it is "empty" and
+     expands to all zeros.  Note that the block-floating-point conversion and
+     empty-block encoding are not performed if the input data is represented
+     as integers rather than floating-point numbers.
+
+  3. The integers are decorrelated using a custom, high-speed, near orthogonal
+     transform similar to the discrete cosine transform used in JPEG image
+     coding.  The transform exploits separability and is implemented
+     efficiently in-place using the lifting scheme, requiring only
+     2.5 *d* integer additions and 1.5 *d* bit shifts by one per integer in
+     *d* dimensions.  If the data is "smooth," then this transform will turn
+     most integers into small signed values clustered around zero.
+
+  4. The signed integer coefficients are reordered in a manner similar to
+     JPEG zig-zag ordering so that statistically they appear in a roughly
+     monotonically decreasing order.  Coefficients corresponding to low
+     frequencies tend to have larger magnitude and are listed first.  In 3D,
+     coefficients corresponding to frequencies *i*, *j*, *k* in the three
+     dimensions are ordered by *i* + *j* + *k* first and then by
+     *i*\ :sup:`2` + *j*\ :sup:`2` + *k*\ :sup:`2`.
+
+  5. The two's complement signed integers are converted to their negabinary
+     (base negative two) representation using one addition and one bit-wise
+     exclusive or per integer.  Because negabinary has no single dedicated
+     sign bit, these integers are subsequently treated as unsigned.  Unlike
+     sign-magnitude representations, the leftmost one-bit in negabinary
+     simultaneously encodes the sign and approximate magnitude of a number.
+     Moreover, unlike two's complement, numbers small in magnitude have many
+     leading zeros in negabinary regardless of sign, which facilitates
+     encoding.
+
+  6. The bits that represent the list of |4powd| integers are transposed so
+     that instead of being ordered by coefficient they are ordered by bit
+     plane, from most to least significant bit.  Viewing each bit plane as
+     an unsigned integer, with the lowest bit corresponding to the lowest
+     frequency coefficient, the anticipation is that the first several of
+     these transposed integers are small, because the coefficients are
+     assumed to be ordered by magnitude.
+
+  7. The transform coefficients are compressed losslessly using embedded
+     coding by exploiting the property that the coefficients tend to have many
+     leading zeros that need not be encoded explicitly.  Each bit plane is
+     encoded in two parts, from lowest to highest bit.  First, the *n* lowest
+     bits are emitted verbatim, where *n* is the smallest number such that
+     the |4powd| |minus| *n* highest bits in all previous bit planes are all
+     zero.  Initially, *n* = 0.  Then, a variable-length representation of the
+     remaining |4powd| |minus| *n* bits, *x*, is encoded.  For such an integer
+     *x*, a single bit is emitted to indicate if *x* = 0, in which case we are
+     done with the current bit plane.  If not, then bits of *x* are emitted,
+     starting from the lowest bit, until a one-bit is emitted.  This triggers
+     another test whether this is the highest set bit of *x*, and the result
+     of this test is output as a single bit.  If not, then the procedure
+     repeats until all *m* of *x*'s value bits have been output, where
+     2\ :sup:`m-1` |leq| *x* < 2\ :sup:`m`.  This can be thought of as a
+     run-length encoding of the zeros of *x*, where the run lengths are
+     expressed in unary.  The total number of value bits, *n*, in this bit
+     plane is then incremented by *m* before being passed to the next bit
+     plane, which is encoded by first emitting its *n* lowest bits.  The
+     assumption is that these bits correspond to *n* coefficients whose most
+     significant bits have already been output, i.e., these *n* bits are
+     essentially random and not compressible.  Following this, the remaining
+     |4powd| |minus| *n* bits of the bit plane are run-length encoded as
+     described above, which potentially results in *n* being increased.
+
+     As an example, *x* = 000001001101000 with *m* = 10 is encoded as
+     **0**\ 100\ **1**\ 1\ **1**\ 10\ **1**\ 1000\ **1**, where the bits in
+     boldface indicate "group tests" that determine if the remainder of *x*
+     (to the left) contains any one-bits.  Again, this variable-length code
+     is generated and parsed from right to left.
+
+  8. The embedded coder emits one bit at a time, with each successive bit
+     potentially improving the accuracy of the approximation.  The early
+     bits are most important and have the greatest impact on accuracy,
+     with the last few bits providing very small changes.  The resulting
+     compressed bit stream can be truncated at any point and still allow for
+     a valid approximate reconstruction of the original block of values.
+     The final step truncates the bit stream in one of three ways: to a fixed
+     number of bits (the fixed-rate mode); after some fixed number of bit
+     planes have been encoded (the fixed-precision mode); or until a lowest
+     bit plane number has been encoded, as expressed in relation to the common
+     floating-point exponent within the block (the fixed-accuracy mode).
+
+Various parameters are exposed for controlling the quality and compressed
+size of a block, and can be specified by the user at a very fine
+granularity.  These parameters are discussed :ref:`here <modes>`.
+
+.. _algorithm-lossless:
+
+Lossless Compression
+--------------------
+
+The reversible (lossless) compression algorithm shares most steps with
+the lossy algorithm.  The main differences are steps 2, 3, and 8, which are
+the only sources of error.  Since step 2 may introduce loss in the conversion
+to |zfp|'s block-floating-point representation, the reversible algorithm adds
+a test to see if this conversion is lossless.  It does so by converting the
+values back to the source format and testing the result for bitwise equality
+with the uncompressed data.  If this test passes, then a modified
+decorrelating transform is performed in step 3 that uses reversible integer
+subtraction operations only.  Finally, step 8 is modified so that no one-bits
+are truncated in the variable-length bit stream.  However, all least
+significant bit planes with all-zero bits are truncated, and the number of
+encoded bit planes is recorded in step 7.  As with lossy compression, a
+floating-point block consisting of all ("positive") zeros is represented as
+a single bit, making it possible to efficiently encode sparse data.
+
+If the block-floating-point transform is not lossless, then the reversible
+compression algorithm falls back on a simpler scheme that reinterprets
+floating-point values as integers via *type punning*.  This lossless
+conversion from floating-point to integer data replaces step 2, and the
+algorithm proceeds from there with the modified step 3.  Moreover, this
+conversion ensures that special values like infinities, NaNs, and negative
+zero are preserved.
+
+The lossless algorithm handles integer data also, for which step 2 is omitted.
diff --git a/docs/source/arrays.rst b/docs/source/arrays.rst
new file mode 100644
index 00000000..31962e82
--- /dev/null
+++ b/docs/source/arrays.rst
@@ -0,0 +1,859 @@
+.. include:: defs.rst
+.. _arrays:
+
+Compressed-Array C++ Classes
+============================
+
+.. cpp:namespace:: zfp
+
+|zfp|'s compressed arrays are C++ classes, plus :ref:`C wrappers <cfp>` around
+these classes, that implement random-accessible single- and multi-dimensional
+floating-point arrays.  Since its first release, |zfp| provides *fixed-rate*
+arrays, :code:`zfp::array`, that support both read and write access to
+individual array elements.  As of |carrrelease|, |zfp| also supports
+read-only arrays, :code:`zfp::const_array`, for data that is static or is
+updated only infrequently.  The read-only arrays support all of
+|zfp|'s :ref:`compression modes <modes>` including variable-rate
+and lossless compression.
+
+For fixed-rate arrays, the storage size, specified in number of bits per
+array element, is set by the user.  Such arbitrary storage is achieved via
+|zfp|'s lossy :ref:`fixed-rate compression <mode-fixed-rate>` mode, by
+partitioning each *d*-dimensional array into blocks of |4powd| values
+and compressing each block to a fixed number of bits.  The more smoothly
+the array values vary along each dimension, the more accurately |zfp| can
+represent them.  In other words, these arrays are not suitable for
+representing data where adjacent elements are not correlated.  Rather,
+the expectation is that the array represents a regularly sampled and
+predominantly continuous function, such as a temperature field in a physics
+simulation.
+
+The *rate*, measured in number of bits per array element, can be specified
+in fractions of a bit (but see FAQs :ref:`#12 <q-granularity>` and
+:ref:`#18 <q-rate>` for limitations).  |zfp| supports 1D, 2D, 3D, and (as
+of version |4darrrelease|) 4D arrays.  For higher-dimensional arrays,
+consider using an array of |zfp| arrays.  Note that array dimensions need not
+be multiples of four; |zfp| transparently handles partial blocks on array
+boundaries.
+
+Read-only arrays allow setting compression mode and parameters on
+construction, and can optionally be initialized with uncompressed data.
+These arrays do not allow updating individual array elements, though
+the contents of the whole array may be updated by re-compressing and
+overwriting the array.  This may be useful in applications that decompress
+the whole array, perform a computation that updates its contents (e.g.,
+a stencil operation that advances the solution of a PDE), and then compress
+to memory the updated array.
+
+The C++ templated array classes are implemented entirely as header files
+that call the |zfp| C library to perform compression and decompression.
+These arrays cache decompressed blocks to reduce the number of compression
+and decompression calls.  Whenever an array value is read, the corresponding
+block is first looked up in the cache, and if found the uncompressed value
+is returned.  Otherwise the block is first decompressed and stored in the
+cache.  Whenever an array element is written (whether actually modified or
+not), a "dirty bit" is set with its cached block to indicate that the block
+must be compressed back to persistent storage when evicted from the cache.
+
+This section documents the public interface to the array classes, including
+base classes and member accessor classes like proxy references/pointers,
+iterators, and views.
+
+The following sections are available:
+
+* :ref:`array_classes`
+* :ref:`carray_classes`
+* :ref:`caching`
+* :ref:`serialization`
+* :ref:`references`
+* :ref:`pointers`
+* :ref:`iterators`
+* :ref:`views`
+* :ref:`codec`
+* :ref:`index`
+
+
+.. _array_classes:
+
+Read-Write Fixed-Rate Arrays
+----------------------------
+
+There are eight array classes for 1D, 2D, 3D, and 4D read-write arrays,
+each of which can represent single- or double-precision values.
+Although these arrays store values in a form different from conventional
+single- and double-precision floating point, the user interacts with the
+arrays via floats and doubles.
+
+The array classes can often serve as direct substitutes for C/C++
+single- and multi-dimensional floating-point arrays and STL vectors, but
+have the benefit of allowing fine control over storage size.  All classes
+below belong to the :cpp:any:`zfp` namespace.
+
+.. note::
+  Much of the compressed-array API was modified in |zfp| |64bitrelease|
+  to support 64-bit indexing of very large arrays.  In particular, array
+  dimensions and indices now use the :code:`size_t` type instead of
+  :code:`uint` and strides use the :code:`ptrdiff_t` type instead of
+  :code:`int`.
+
+.. _array_base_class:
+
+Base Class
+^^^^^^^^^^
+
+.. cpp:class:: array
+
+  Virtual base class for common array functionality.
+
+----
+
+.. cpp:function:: zfp_type array::scalar_type() const
+
+  Return the underlying scalar type (:c:type:`zfp_type`) of the array.
+
+----
+
+.. cpp:function:: uint array::dimensionality() const
+
+  Return the dimensionality (aka. rank) of the array: 1, 2, 3, or 4.
+
+----
+
+.. cpp:function:: array::header array::get_header() const
+
+  Deprecated function as of |zfp| |crpirelease|.  See the :ref:`header`
+  section on how to construct a header.
+
+----
+
+.. _array_factory:
+.. cpp:function:: static array* array::construct(const header& h, const void* buffer = 0, size_t buffer_size_bytes = 0)
+
+  Construct a compressed-array object whose scalar type, dimensions, and rate
+  are given by the :ref:`header <header>` *h*.  Return a base class pointer
+  upon success.  The optional *buffer* points to compressed data that, when
+  passed, is copied into the array.  If *buffer* is absent, the array is
+  default initialized with all zeroes.  The optional *buffer_size_bytes*
+  parameter specifies the buffer length in bytes.  When passed, a comparison
+  is made to ensure that the buffer size is at least as large as the size
+  implied by the header.  If this function fails for any reason, an
+  :cpp:class:`exception` is thrown.
+
+
+Common Methods
+^^^^^^^^^^^^^^
+
+The following methods are common to 1D, 2D, 3D, and 4D arrays, but are
+implemented in the array class specific to each dimensionality rather than
+in the base class.
+
+.. cpp:function:: size_t array::size() const
+
+  Total number of elements in array, e.g., *nx* |times| *ny* |times| *nz* for
+  3D arrays.
+
+----
+
+.. cpp:function:: double array::rate() const
+
+  Return rate in bits per value.
+
+----
+
+.. cpp:function:: double array::set_rate(double rate)
+
+  Set desired compression rate in bits per value.  Return the closest rate
+  supported.  See FAQ :ref:`#12 <q-granularity>` and FAQ :ref:`#18 <q-rate>`
+  for discussions of the rate granularity.  This method destroys the previous
+  contents of the array.
+
+----
+
+.. cpp:function:: size_t array::size_bytes(uint mask = ZFP_DATA_ALL) const
+
+  Return storage size of components of array data structure indicated by
+  *mask*.  The mask is constructed via bitwise OR of
+  :ref:`predefined constants <data-macros>`.
+  Available as of |zfp| |carrrelease|.
+
+----
+
+.. cpp:function:: size_t array::compressed_size() const
+
+  Return number of bytes of storage for the compressed data.  This amount
+  does not include the small overhead of other class members or the size
+  of the cache.  Rather, it reflects the size of the memory buffer
+  returned by :cpp:func:`compressed_data`.
+
+----
+
+.. cpp:function:: void* array::compressed_data() const
+
+  Return pointer to compressed data for read or write access.  The size
+  of the buffer is given by :cpp:func:`compressed_size`.
+
+.. note::
+  As of |zfp| |crpirelease|, the return value is :code:`void*` rather than
+  :code:`uchar*` to simplify pointer conversion and to dispel any misconception
+  that the compressed data needs only :code:`uchar` alignment.  Compressed
+  streams are always word aligned (see :c:var:`stream_word_bits` and
+  :c:macro:`BIT_STREAM_WORD_TYPE`).
+
+----
+
+.. cpp:function:: size_t array::cache_size() const
+
+  Return the cache size in number of bytes.
+
+----
+
+.. cpp:function:: void array::set_cache_size(size_t bytes)
+
+  Set minimum cache size in bytes.  The actual size is always a power of two
+  bytes and consists of at least one block.  If *bytes* is zero, then a
+  default cache size is used, which requires the array dimensions to be known.
+
+----
+
+.. cpp:function:: void array::clear_cache() const
+
+  Empty cache without compressing modified cached blocks, i.e., discard any
+  cached updates to the array.
+
+----
+
+.. cpp:function:: virtual void array::flush_cache() const
+
+  Flush cache by compressing all modified cached blocks back to persistent
+  storage and emptying the cache.  This method should be called before
+  writing the compressed representation of the array to disk, for instance.
+
+----
+
+.. cpp:function:: void array::get(Scalar* p) const
+
+  Decompress entire array and store at *p*, for which sufficient storage must
+  have been allocated.  The uncompressed array is assumed to be contiguous
+  (with default strides) and stored in the usual "row-major" order, i.e., with
+  *x* varying faster than *y*, *y* varying faster than *z*, etc.
+
+----
+
+.. cpp:function:: void array::set(const Scalar* p)
+
+  Initialize array by copying and compressing data stored at *p*.  The
+  uncompressed data is assumed to be stored as in the :cpp:func:`get`
+  method.  If *p* = 0, then the array is zero-initialized.
+
+----
+
+.. cpp:function:: const_reference array::operator[](size_t index) const
+
+  Return :ref:`const reference <references>` to scalar stored at given flat
+  index (inspector).  For a 3D array, :code:`index = x + nx * (y + ny * z)`.
+
+.. note::
+  As of |zfp| |crpirelease|, the return value is no longer :code:`Scalar` but
+  is a :ref:`const reference <references>` to the corresponding array element
+  (conceptually equivalent to :code:`const Scalar&`).  This API change was
+  necessary to allow obtaining a const pointer to the element when the array
+  itself is const qualified, e.g., :code:`const_pointer p = &a[index];`.
+
+----
+
+.. _lvref_idx:
+.. cpp:function:: reference array::operator[](size_t index)
+
+  Return :ref:`proxy reference <references>` to scalar stored at given flat
+  index (mutator).  For a 3D array, :code:`index = x + nx * (y + ny * z)`.
+
+----
+
+.. cpp:function:: iterator array::begin()
+
+  Return random-access mutable iterator to beginning of array.
+
+----
+
+.. cpp:function:: iterator array::end()
+
+  Return random-access mutable iterator to end of array.  As with STL iterators,
+  the end points to a virtual element just past the last valid array element.
+
+----
+
+.. cpp:function:: const_iterator array::begin() const
+.. cpp:function:: const_iterator array::cbegin() const
+
+  Return random-access const iterator to beginning of array.
+
+----
+
+.. cpp:function:: const_iterator array::end() const
+.. cpp:function:: const_iterator array::cend() const
+
+  Return random-access const iterator to end of array.
+
+.. note::
+  Const :ref:`references <references>`, :ref:`pointers <pointers>`, and
+  :ref:`iterators <iterators>` are available as of |zfp| |crpirelease|.  
+
+1D, 2D, 3D, and 4D Arrays
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Below are classes and methods specific to each array dimensionality and
+template scalar type (:code:`float` or :code:`double`).  Since the classes
+and methods share obvious similarities regardless of dimensionality, only
+one generic description for all dimensionalities is provided.
+
+Note: In the class declarations below, the class template for the scalar
+type is omitted for readability, e.g.,
+:code:`class array1` is used as shorthand for
+:code:`template <typename Scalar> class array1`.  Wherever the type
+:code:`Scalar` appears, it refers to this template argument.
+
+..
+  .. cpp:class:: template<typename Scalar> array1 : public array
+  .. cpp:class:: template<typename Scalar> array2 : public array
+  .. cpp:class:: template<typename Scalar> array3 : public array
+  .. cpp:class:: template<typename Scalar> array4 : public array
+
+.. cpp:class:: array1 : public array
+.. cpp:class:: array2 : public array
+.. cpp:class:: array3 : public array
+.. cpp:class:: array4 : public array
+
+  This is a 1D, 2D, 3D, or 4D array that inherits basic functionality
+  from the generic :cpp:class:`array` base class.  The template argument,
+  :cpp:type:`Scalar`, specifies the floating type returned for array
+  elements.  The suffixes :code:`f` and :code:`d` can also be appended
+  to each class to indicate float or double type, e.g.,
+  :cpp:class:`array1f` is a synonym for :cpp:class:`array1\<float>`.
+
+----
+
+.. cpp:class:: arrayANY : public array
+
+  Fictitious class used to refer to any one of :cpp:class:`array1`,
+  :cpp:class:`array2`, :cpp:class:`array3`, and :cpp:class:`array4`.
+  This class is not part of the |zfp| API.
+
+----
+
+.. _array_ctor_default:
+.. cpp:function:: array1::array1()
+.. cpp:function:: array2::array2()
+.. cpp:function:: array3::array3()
+.. cpp:function:: array4::array4()
+
+  Default constructor.  Creates an empty array whose size and rate are both
+  zero.
+
+.. note::
+  The default constructor is useful when the array size or rate is not known at
+  time of construction.  Before the array can become usable, however, it must
+  be :ref:`resized <array_resize>` and its rate must be set via
+  :cpp:func:`array::set_rate`.  These two tasks can be performed in either
+  order.  Furthermore, the desired cache size should be set using
+  :cpp:func:`array::set_cache_size`, as the default constructor creates a
+  cache that holds only one |zfp| block, i.e., the minimum possible.
+
+----
+
+.. _array_ctor:
+.. cpp:function:: array1::array1(size_t n, double rate, const Scalar* p = 0, size_t cache_size = 0)
+.. cpp:function:: array2::array2(size_t nx, size_t ny, double rate, const Scalar* p = 0, size_t cache_size = 0)
+.. cpp:function:: array3::array3(size_t nx, size_t ny, size_t nz, double rate, const Scalar* p = 0, size_t cache_size = 0)
+.. cpp:function:: array4::array4(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const Scalar* p = 0, size_t cache_size = 0)
+
+  Constructor of array with dimensions *n* (1D), *nx* |times| *ny* (2D),
+  *nx* |times| *ny* |times| *nz* (3D), or
+  *nx* |times| *ny* |times| *nz* |times| *nw* (4D) using *rate* bits per
+  value, at least *cache_size* bytes of cache, and optionally initialized
+  from flat, uncompressed array *p*.  If *cache_size* is zero, a default
+  cache size suitable for the array dimensions is chosen.
+
+----
+
+.. _array_ctor_header:
+.. cpp:function:: array1::array1(const array::header& h, const void* buffer = 0, size_t buffer_size_bytes = 0)
+.. cpp:function:: array2::array2(const array::header& h, const void* buffer = 0, size_t buffer_size_bytes = 0)
+.. cpp:function:: array3::array3(const array::header& h, const void* buffer = 0, size_t buffer_size_bytes = 0)
+.. cpp:function:: array4::array4(const array::header& h, const void* buffer = 0, size_t buffer_size_bytes = 0)
+
+  Constructor from previously :ref:`serialized <serialization>` compressed
+  array.  The :ref:`header <header>`, *h*, contains array metadata, while the
+  optional *buffer* points to the compressed data that is to be copied to the
+  array.  The optional *buffer_size_bytes* parameter specifies the *buffer*
+  length.  If the constructor fails, an :ref:`exception <exception>` is thrown.
+  See :cpp:func:`array::construct` for further details on the *buffer* and
+  *buffer_size_bytes* parameters.
+
+----
+
+.. _array_copy_constructor:
+.. cpp:function:: array1::array1(const array1& a)
+.. cpp:function:: array2::array2(const array2& a)
+.. cpp:function:: array3::array3(const array3& a)
+.. cpp:function:: array4::array4(const array4& a)
+
+  Copy constructor.  Performs a deep copy.
+
+----
+
+.. cpp:function:: virtual array1::~array1()
+.. cpp:function:: virtual array2::~array2()
+.. cpp:function:: virtual array3::~array3()
+.. cpp:function:: virtual array4::~array4()
+
+  Virtual destructor (allows for inheriting from |zfp| arrays).
+
+----
+
+.. _array_copy:
+.. cpp:function:: array1& array1::operator=(const array1& a)
+.. cpp:function:: array2& array2::operator=(const array2& a)
+.. cpp:function:: array3& array3::operator=(const array3& a)
+.. cpp:function:: array4& array4::operator=(const array4& a)
+
+  Assignment operator.  Performs a deep copy.
+
+----
+
+.. _array_dims:
+.. cpp:function:: size_t array2::size_x() const
+.. cpp:function:: size_t array2::size_y() const
+.. cpp:function:: size_t array3::size_x() const
+.. cpp:function:: size_t array3::size_y() const
+.. cpp:function:: size_t array3::size_z() const
+.. cpp:function:: size_t array4::size_x() const
+.. cpp:function:: size_t array4::size_y() const
+.. cpp:function:: size_t array4::size_z() const
+.. cpp:function:: size_t array4::size_w() const
+
+  Return array dimensions.
+
+----
+
+.. _array_resize:
+.. cpp:function:: void array1::resize(size_t n, bool clear = true)
+.. cpp:function:: void array2::resize(size_t nx, size_t ny, bool clear = true)
+.. cpp:function:: void array3::resize(size_t nx, size_t ny, size_t nz, bool clear = true)
+.. cpp:function:: void array4::resize(size_t nx, size_t ny, size_t nz, size_t nw, bool clear = true)
+
+  Resize the array (all previously stored data will be lost).  If *clear* is
+  true, then the array elements are all initialized to zero.
+
+.. note::
+  It is often desirable (though not a requirement) to also set the cache size
+  when resizing an array, e.g., in proportion to the array size;
+  see :cpp:func:`array::set_cache_size`.  This is particularly important when
+  the array is default constructed, which initializes the cache size to the
+  minimum possible of only one |zfp| block.
+
+----
+
+.. _array_accessor:
+.. cpp:function:: const_reference array1::operator()(size_t i) const
+.. cpp:function:: const_reference array2::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference array3::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: const_reference array4::operator()(size_t i, size_t j, size_t k, size_t l) const
+
+  Return const reference to element stored at multi-dimensional index given by
+  *i*, *j*, *k*, and *l* (inspector).
+
+.. note::
+  As of |zfp| |crpirelease|, the return value is no longer :code:`Scalar` but
+  is a :ref:`const reference <references>` to the corresponding array element
+  (essentially equivalent to :code:`const Scalar&`).  This API change was
+  necessary to allow obtaining a const pointer to the element when the array
+  itself is const qualified, e.g.,
+  :code:`const_pointer p = &a(i, j, k);`.
+
+----
+
+.. _lvref:
+.. cpp:function:: reference array1::operator()(size_t i)
+.. cpp:function:: reference array2::operator()(size_t i, size_t j)
+.. cpp:function:: reference array3::operator()(size_t i, size_t j, size_t k)
+.. cpp:function:: reference array4::operator()(size_t i, size_t j, size_t k, size_t l)
+
+  Return :ref:`proxy reference <references>` to scalar stored at
+  multi-dimensional index given by *i*, *j*, *k*, and *l* (mutator).
+
+
+.. _carray_classes:
+
+Read-Only Variable-Rate Arrays
+------------------------------
+
+Read-only arrays are preferable in applications that store static data,
+e.g., constant tables or simulation output, or data that is updated only
+periodically as a whole, such as when advancing the solution
+of a partial differential equation.  Because such updates have to be applied
+to the whole array, one may choose to tile large arrays into smaller |zfp|
+arrays to support finer granularity updates.  Read-only arrays have the
+benefit of supporting all of |zfp|'s :ref:`compression modes <modes>`, most
+of which provide higher accuracy per bit stored than fixed-rate mode.
+
+The read-only arrays share an API with the read-write fixed-rate arrays,
+with only a few differences:
+
+- All methods other than those that specify array-wide settings, such as
+  compression mode and parameters, array dimensions, and array contents,
+  are :code:`const` qualified.  There are, thus, no methods for obtaining
+  a writeable reference, pointer, or iterator.  Consequently, one may not
+  initialize such arrays one element at a time.  Rather, the user initializes
+  the whole array by passing a pointer to uncompressed data.
+
+- Whereas the constructors for fixed-rate arrays accept a *rate* parameter,
+  the read-only arrays allow specifying any compression mode and
+  corresponding parameters (if any) via a :c:type:`zfp_config` object.
+
+- Additional methods are available for setting and querying compression
+  mode and parameters after construction.
+
+- Read-only arrays are templated on a block index class that encodes the
+  bit offset to each block of data.  Multiple index classes are available
+  that trade compactness and speed of access.  The default :cpp:class:`hybrid4`
+  index represents 64-bit offsets using only 24 bits of amortized storage per
+  block.  An "implicit" index is available for fixed-rate read-only arrays,
+  which computes rather than stores offsets to equal-sized blocks.
+
+.. note::
+  Whereas variable-rate compression almost always improves accuracy per bit
+  of compressed data over fixed rate, one should also weigh the storage and
+  compute overhead associated with the block index needed for variable-rate
+  storage.  The actual storage overhead can be determined by passing
+  :c:macro:`ZFP_DATA_INDEX` to :cpp:func:`const_array::size_bytes`.  This
+  overhead tends to be small for 3D and 4D arrays.
+
+Array initialization may be done at construction time, by passing a pointer
+to uncompressed data, or via the method :cpp:func:`const_array::set`,
+which overwrites the contents of the whole array.  This method may be
+called more than once to update (i.e., re-initialize) the array.
+
+Read-only arrays support a subset of references, pointers, iterators, and
+views; in particular those with a :code:`const_` prefix.
+
+Currently, not all capabilities of read-write arrays are available for
+read-only arrays.  For example, (de)serialization and construction from a
+view have not yet been implemented, and there are no C bindings.
+
+Read-only arrays derive from the :ref:`array base class <array_base_class>`.
+Additional methods are documented below.
+
+..
+  .. cpp:class:: template<typename Scalar> const_array1 : public array
+  .. cpp:class:: template<typename Scalar> const_array2 : public array
+  .. cpp:class:: template<typename Scalar> const_array3 : public array
+  .. cpp:class:: template<typename Scalar> const_array4 : public array
+
+.. cpp:class:: const_array1 : public array
+.. cpp:class:: const_array2 : public array
+.. cpp:class:: const_array3 : public array
+.. cpp:class:: const_array4 : public array
+
+  1D, 2D, 3D, or 4D read-only array that inherits basic functionality
+  from the generic :cpp:class:`array` base class.  The template argument,
+  :cpp:type:`Scalar`, specifies the floating type returned for array
+  elements.  The suffixes :code:`f` and :code:`d` can also be appended
+  to each class to indicate float or double type, e.g.,
+  :cpp:class:`const_array1f` is a synonym for
+  :cpp:class:`const_array1\<float>`.
+
+----
+
+.. cpp:class:: const_array : public array
+
+  Fictitious class used to denote one of the 1D, 2D, 3D, and 4D read-only
+  array classes.  This pseudo base class serves only to document the API
+  shared among the four arrays.
+
+----
+
+.. _carray_ctor_default:
+.. cpp:function:: const_array1::const_array1()
+.. cpp:function:: const_array2::const_array2()
+.. cpp:function:: const_array3::const_array3()
+.. cpp:function:: const_array4::const_array4()
+
+  Default constructor.  Creates an empty array whose size is zero and whose
+  compression mode is unspecified.  The array's cache size is initialized to
+  the minimum possible, which can have performance implications; see
+  :ref:`this note <array_ctor_default>`.
+
+----
+
+.. _carray_ctor:
+.. cpp:function:: const_array1::const_array1(size_t n, const zfp_config& config, const Scalar* p = 0, size_t cache_size = 0)
+.. cpp:function:: const_array2::const_array2(size_t nx, size_t ny, const zfp_config& config, const Scalar* p = 0, size_t cache_size = 0)
+.. cpp:function:: const_array3::const_array3(size_t nx, size_t ny, size_t nz, const zfp_config& config, const Scalar* p = 0, size_t cache_size = 0)
+.. cpp:function:: const_array4::const_array4(size_t nx, size_t ny, size_t nz, size_t nw, const zfp_config& config, const Scalar* p = 0, size_t cache_size = 0)
+
+  Constructor of array with dimensions *n* (1D), *nx* |times| *ny* (2D),
+  *nx* |times| *ny* |times| *nz* (3D), or
+  *nx* |times| *ny* |times| *nz* |times| *nw* (4D).  The compression mode and
+  parameters are given by *config* (see :ref:`configuration <hl-func-config>`).
+  The array uses at least *cache_size* bytes of cache, and is optionally
+  initialized from flat, uncompressed array *p*.  If *cache_size* is zero,
+  a default cache size suitable for the array dimensions is chosen.
+
+----
+
+.. cpp:function:: const_array1::const_array1(const const_array1& a)
+.. cpp:function:: const_array2::const_array2(const const_array2& a)
+.. cpp:function:: const_array3::const_array3(const const_array3& a)
+.. cpp:function:: const_array4::const_array4(const const_array4& a)
+
+  Copy constructor.  Performs a deep copy.
+
+----
+
+.. cpp:function:: virtual const_array1::~const_array1()
+.. cpp:function:: virtual const_array2::~const_array2()
+.. cpp:function:: virtual const_array3::~const_array3()
+.. cpp:function:: virtual const_array4::~const_array4()
+
+  Virtual destructor (allows for inheritance).
+
+----
+
+.. _carray_copy:
+.. cpp:function:: const_array1& const_array1::operator=(const const_array1& a)
+.. cpp:function:: const_array2& const_array2::operator=(const const_array2& a)
+.. cpp:function:: const_array3& const_array3::operator=(const const_array3& a)
+.. cpp:function:: const_array4& const_array4::operator=(const const_array4& a)
+
+  Assignment operator.  Performs a deep copy.
+
+----
+
+.. cpp:function:: size_t const_array::size() const
+
+  Total number of elements in array, e.g., *nx* |times| *ny* |times| *nz* for
+  3D arrays.
+
+----
+
+.. _carray_dims:
+.. cpp:function:: size_t const_array2::size_x() const
+.. cpp:function:: size_t const_array2::size_y() const
+.. cpp:function:: size_t const_array3::size_x() const
+.. cpp:function:: size_t const_array3::size_y() const
+.. cpp:function:: size_t const_array3::size_z() const
+.. cpp:function:: size_t const_array4::size_x() const
+.. cpp:function:: size_t const_array4::size_y() const
+.. cpp:function:: size_t const_array4::size_z() const
+.. cpp:function:: size_t const_array4::size_w() const
+
+  Return array dimensions.
+
+----
+
+.. _carray_resize:
+.. cpp:function:: void const_array1::resize(size_t n, bool clear = true)
+.. cpp:function:: void const_array2::resize(size_t nx, size_t ny, bool clear = true)
+.. cpp:function:: void const_array3::resize(size_t nx, size_t ny, size_t nz, bool clear = true)
+.. cpp:function:: void const_array4::resize(size_t nx, size_t ny, size_t nz, size_t nw, bool clear = true)
+
+  Resize the array (all previously stored data will be lost).  If *clear* is
+  true, then the array elements are all initialized to zero.  See also
+  :ref:`this note <array_resize>`.
+
+----
+
+.. cpp:function:: zfp_mode const_array::mode() const
+
+  Currently selected :ref:`compression mode <mode_struct>`.  If not yet
+  specified, :code:`zfp_mode_null` is returned.
+
+----
+
+.. cpp:function:: double const_array::rate() const
+
+  Return rate in compressed bits per value when
+  :ref:`fixed-rate mode <mode-fixed-rate>` is enabled, else zero.
+
+----
+
+.. cpp:function:: uint const_array::precision() const
+
+  Return precision in uncompressed bits per value when
+  :ref:`fixed-precision mode <mode-fixed-precision>` is enabled, else zero.
+
+----
+
+.. cpp:function:: double const_array::accuracy() const
+
+  Return accuracy as absolute error tolerance when
+  :ref:`fixed-accuracy mode <mode-fixed-accuracy>` is enabled, else zero.
+
+----
+
+.. cpp:function:: void const_array::params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const
+
+  :ref:`Expert mode <mode-expert>` compression parameters (available for
+  all compression modes).  Pointers may be :code:`null` if the corresponding
+  parameter is not requested.
+
+----
+
+.. cpp:function:: double const_array::set_reversible()
+
+  Enable :ref:`reversible mode <mode-reversible>`.  This method destroys
+  the previous contents of the array.
+
+----
+
+.. cpp:function:: double const_array::set_rate(double rate)
+
+  Set desired rate in compressed bits per value (enables
+  :ref:`fixed-rate mode <mode-fixed-rate>`).  This method destroys the
+  previous contents of the array.  See also :cpp:func:`array::set_rate`.
+
+.. note::
+  Whereas the :ref:`read-write fixed-rate arrays <array_classes>`
+  (:cpp:class:`zfp::array`) require that block storage is word aligned, the
+  read-only arrays (:cpp:class:`zfp::const_array`) are not subject to such
+  restrictions and therefore support finer rate granularity.  For a
+  *d*-dimensional :cpp:class:`const_array`, the rate granularity is
+  4\ :sup:`-d` bits/value, e.g., a quarter bit/value for 1D arrays.
+
+----
+
+.. cpp:function:: uint const_array::set_precision(uint precision)
+
+  Set desired precision in uncompressed bits per value (enables
+  :ref:`fixed-precision mode <mode-fixed-precision>`).  This method destroys
+  the previous contents of the array.
+
+----
+
+.. cpp:function:: double const_array::set_accuracy(double tolerance)
+
+  Set desired accuracy as absolute error tolerance (enables
+  :ref:`fixed-accuracy mode <mode-fixed-accuracy>`).  This method destroys
+  the previous contents of the array.
+
+----
+
+.. cpp:function:: bool const_array::set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+
+  Set :ref:`expert mode <mode-expert>` parameters.  This method destroys the
+  previous contents of the array.  Return whether the codec supports the
+  combination of parameters.
+
+----
+
+.. cpp:function:: void const_array::set_config(const zfp_config& config)
+
+  Set compression mode and parameters given by *config*
+  (see :ref:`configuration <hl-func-config>`).  This is a more general
+  method for setting compression parameters such as rate, precision, accuracy,
+  and :ref:`expert mode <mode-expert>` parameters.
+
+----
+
+.. cpp:function:: size_t const_array::size_bytes(uint mask = ZFP_DATA_ALL) const
+
+  Return storage size of components of array data structure indicated by
+  *mask*.  The mask is constructed via bitwise OR of
+  :ref:`predefined constants <data-macros>`.
+
+----
+
+.. cpp:function:: size_t const_array::compressed_size() const
+
+  Return number of bytes of storage for the compressed data.  This amount
+  does not include the small overhead of other class members or the size
+  of the cache.  Rather, it reflects the size of the memory buffer
+  returned by :cpp:func:`compressed_data`.
+
+----
+
+.. cpp:function:: void* const_array::compressed_data() const
+
+  Return pointer to compressed data for read or write access.  The size
+  of the buffer is given by :cpp:func:`compressed_size`.
+
+----
+
+.. cpp:function:: size_t const_array::cache_size() const
+
+  Return the cache size in number of bytes.
+
+----
+
+.. cpp:function:: void const_array::set_cache_size(size_t bytes)
+
+  Set minimum cache size in bytes.  The actual size is always a power of two
+  bytes and consists of at least one block.  If *bytes* is zero, then a
+  default cache size is used, which requires the array dimensions to be known.
+
+----
+
+.. cpp:function:: void const_array::clear_cache() const
+
+  Empty cache.
+
+----
+
+.. cpp:function:: void const_array::get(Scalar* p) const
+
+  Decompress entire array and store at *p*, for which sufficient storage must
+  have been allocated.  The uncompressed array is assumed to be contiguous
+  (with default strides) and stored in the usual "row-major" order, i.e., with
+  *x* varying faster than *y*, *y* varying faster than *z*, etc.
+
+----
+
+.. cpp:function:: void const_array::set(const Scalar* p, bool compact = true)
+
+  Initialize array by copying and compressing floating-point data stored at
+  *p*.  If *p* = 0, then the array is zero-initialized.  The uncompressed data
+  is assumed to be stored as in the :cpp:func:`get` method.  Since the size of
+  compressed data may not be known a priori, this method conservatively
+  allocates enough space to hold it.  If *compact* is true, any unused storage
+  for compressed data is freed after initialization.
+
+----
+
+.. _const_array_accessor:
+.. cpp:function:: const_reference const_array1::operator()(size_t i) const
+.. cpp:function:: const_reference const_array2::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference const_array3::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: const_reference const_array4::operator()(size_t i, size_t j, size_t k, size_t l) const
+
+  Return const reference to element stored at multi-dimensional index given by
+  *i*, *j*, *k*, and *l* (inspector).
+
+----
+
+.. cpp:function:: const_reference const_array::operator[](size_t index) const
+
+  Return :ref:`const reference <references>` to scalar stored at given flat
+  index (inspector).  For a 3D array, :code:`index = x + nx * (y + ny * z)`.
+
+----
+
+.. cpp:function:: const_iterator const_array::begin() const
+.. cpp:function::  const_iterator const_array::cbegin() const
+
+  Return random-access const iterator to beginning of array.
+
+----
+
+.. cpp:function:: const_iterator end() const
+.. cpp:function:: const_iterator cend() const
+
+  Return random-access const iterator to end of array.
+
+.. include:: caching.inc
+.. include:: serialization.inc
+.. include:: references.inc
+.. include:: pointers.inc
+.. include:: iterators.inc
+.. include:: views.inc
+.. include:: codec.inc
+.. include:: index.inc
diff --git a/docs/source/bit-stream.rst b/docs/source/bit-stream.rst
new file mode 100644
index 00000000..c77d2ce3
--- /dev/null
+++ b/docs/source/bit-stream.rst
@@ -0,0 +1,303 @@
+.. include:: defs.rst
+
+.. _bs-api:
+
+Bit Stream API
+==============
+
+|zfp| relies on low-level functions for bit stream I/O, e.g., for
+reading/writing single bits or groups of bits.  |zfp|'s bit streams
+support random access (with some caveats) and, optionally, strided
+access.  The functions read from and write to main memory allocated
+by the user.  Buffer overruns are for performance reasons not guarded
+against.
+
+From an implementation standpoint, bit streams are read from and written
+to memory in increments of *words* of bits.  The constant power-of-two
+word size is configured at :ref:`compile time <config>`, and is limited
+to 8, 16, 32, or 64 bits.
+
+The bit stream API is publicly exposed and may be used to write additional
+information such as metadata into the |zfp| compressed stream and to
+manipulate whole or partial bit streams.  Moreover, we envision releasing
+the bit stream functions as a separate library in the future that may be
+used, for example, in other compressors.
+
+Stream readers and writers are synchronized by making corresponding calls.
+For each write call, there is a corresponding read call.  This ensures
+that reader and writer agree on the position within the stream and the
+number of bits buffered, if any.  The API below reflects this duality.
+
+A bit stream is either in read or write mode, or either, if rewound to
+the beginning.  When in read mode, only read calls should be made,
+and similarly for write mode.
+
+.. _bs-strides:
+
+Strided Streams
+---------------
+
+Bit streams may be strided by sequentially reading/writing a few words at
+a time and then skipping over some user-specified number of words.  This
+allows, for instance, |zfp| to interleave the first few bits of all
+compressed blocks in order to support progressive access.  To enable
+strided access, which does carry a small performance penalty, the
+macro :c:macro:`BIT_STREAM_STRIDED` must be defined during compilation.
+
+Strides are specified in terms of a *block size*---a power-of-two number
+of contiguous words---and a *delta*, which specifies how many words to
+advance the stream by to get to the next contiguous block.  These bit
+stream blocks are entirely independent of the |4powd| blocks used for
+compression in |zfp|.  Setting *delta* to zero ensures a non-strided,
+sequential layout.
+
+.. _bs-macros:
+
+Macros
+------
+
+Two compile-time macros are used to influence the behavior:
+:c:macro:`BIT_STREAM_WORD_TYPE` and :c:macro:`BIT_STREAM_STRIDED`.
+These are documented in the :ref:`installation <installation>`
+section.
+
+.. _bs-types:
+
+Types
+-----
+
+.. c:type:: bitstream_word
+
+  Bits are buffered and read/written in units of words.  By default, the
+  bit stream word type is 64 bits, but may be set to 8, 16, or 32 bits
+  by setting the macro :c:macro:`BIT_STREAM_WORD_TYPE` to :c:type:`uint8`,
+  :c:type:`uint16`, or :c:type:`uint32`, respectively.  Larger words
+  tend to give higher throughput, while 8-bit words are needed to ensure
+  endian independence (see FAQ :ref:`#11 <q-portability>`).
+
+.. note::
+  To avoid potential name clashes, this type was renamed in
+  |zfp| |64bitrelease| from the shorter and more ambiguous type name
+  :code:`word`.
+
+----
+
+.. c:type:: bitstream_offset
+
+  Type holding the offset, measured in number of bits, into the bit stream
+  where the next bit will be read or written.  This type allows referencing
+  bits in streams at least 2\ :sup:`64` bits long.  Note that it is possible
+  that :code:`sizeof(bitstream_offset) > sizeof(size_t)` since a stream may
+  be as long as `sizeof(size_t) * CHAR_BIT` bits.
+
+----
+
+.. c:type:: bitstream_size
+
+  Alias for :c:type:`bitstream_offset` that signifies the bit length of a
+  stream or substream rather than an offset into it.
+
+----
+
+.. c:type:: bitstream_count
+
+  Type sufficient to count the number of bits read or written in functions
+  like :c:func:`stream_read_bits` and :c:func:`stream_write_bits`.
+  :code:`sizeof(bitstream_count) <= sizeof(bitstream_size)`.
+
+----
+
+.. c:type:: bitstream
+
+  The bit stream struct maintains all the state associated with a bit
+  stream.  This struct is passed to all bit stream functions.  Its members
+  should not be accessed directly.
+  ::
+
+    struct bitstream {
+      bitstream_count bits;  // number of buffered bits (0 <= bits < word size)
+      bitstream_word buffer; // incoming/outgoing bits (buffer < 2^bits)
+      bitstream_word* ptr;   // pointer to next word to be read/written
+      bitstream_word* begin; // beginning of stream
+      bitstream_word* end;   // end of stream (not enforced)
+      size_t mask;           // one less the block size in number of words (if BIT_STREAM_STRIDED)
+      ptrdiff_t delta;       // number of words between consecutive blocks (if BIT_STREAM_STRIDED)
+    };
+
+.. _bs-data:
+
+Constants
+---------
+
+.. c:var:: const size_t stream_word_bits
+
+  The number of bits in a word.  The size of a flushed bit stream will be
+  a multiple of this number of bits.  See :c:macro:`BIT_STREAM_WORD_TYPE`
+  and :c:func:`stream_alignment`.
+
+.. _bs-functions:
+
+Functions
+---------
+
+.. c:function:: bitstream* stream_open(void* buffer, size_t bytes)
+
+  Allocate a :c:type:`bitstream` struct and associate it with the memory
+  buffer allocated by the caller.
+
+----
+
+.. c:function:: void stream_close(bitstream* stream)
+
+  Close the bit stream and deallocate *stream*.
+
+----
+
+.. c:function:: bitstream* stream_clone(const bitstream* stream)
+
+  Create a copy of *stream* that points to the same memory buffer.
+
+----
+
+.. c:function:: bitstream_count stream_alignment()
+
+  Word size in bits.  This is a functional form of the constant
+  :c:var:`stream_word_bits` and returns the same value.
+  Available since |zfp| |crpirelease|.
+
+----
+
+.. c:function:: void* stream_data(const bitstream* stream)
+
+  Return pointer to the beginning of bit stream *stream*.
+
+----
+
+.. c:function:: size_t stream_size(const bitstream* stream)
+
+  Return position of stream pointer in number of bytes, which equals the
+  end of stream if no seeks have been made.  Note that additional bits
+  may be buffered and not reported unless the stream has been flushed.
+
+----
+
+.. c:function:: size_t stream_capacity(const bitstream* stream)
+
+  Return byte size of memory buffer associated with *stream* specified
+  in :c:func:`stream_open`.
+
+----
+
+.. c:function:: uint stream_read_bit(bitstream* stream)
+
+  Read a single bit from *stream*.
+
+----
+
+.. c:function:: uint stream_write_bit(bitstream* stream, uint bit)
+
+  Write single *bit* to *stream*.  *bit* must be one of 0 or 1.
+  The value of *bit* is returned.
+
+----
+
+.. c:function:: uint64 stream_read_bits(bitstream* stream, bitstream_count n)
+
+  Read and return 0 |leq| *n* |leq| 64 bits from *stream*.
+
+----
+
+.. c:function:: uint64 stream_write_bits(bitstream* stream, uint64 value, bitstream_count n)
+
+  Write 0 |leq| *n* |leq| 64 low bits of *value* to *stream*.  Return any
+  remaining bits from *value*, i.e., *value* >> *n*.
+
+----
+
+.. c:function:: bitstream_offset stream_rtell(const bitstream* stream)
+
+  Return bit offset to next bit to be read.
+
+----
+
+.. c:function:: bitstream_offset stream_wtell(const bitstream* stream)
+
+  Return bit offset to next bit to be written.
+
+----
+
+.. c:function:: void stream_rewind(bitstream* stream)
+
+  Rewind stream to beginning of memory buffer.  Following this call, the
+  stream may either be read or written.
+
+----
+
+.. c:function:: void stream_rseek(bitstream* stream, bitstream_offset offset)
+
+  Position stream for reading at given bit offset.  This places the
+  stream in read mode.
+
+----
+
+.. c:function:: void stream_wseek(bitstream* stream, bitstream_offset offset)
+
+  Position stream for writing at given bit offset.  This places the
+  stream in write mode.
+
+----
+
+.. c:function:: void stream_skip(bitstream* stream, bitstream_count n)
+
+  Skip over the next *n* bits, i.e., without reading them.
+
+----
+
+.. c:function:: void stream_pad(bitstream* stream, bitstream_count n)
+
+  Append *n* zero-bits to *stream*.
+
+----
+
+.. c:function:: bitstream_count stream_align(bitstream* stream)
+
+  Align stream on next word boundary by skipping bits, i.e., without reading
+  them.  No skipping is done if the stream is already word aligned.  Return
+  the number of skipped bits, if any.
+
+----
+
+.. c:function:: bitstream_count stream_flush(bitstream* stream)
+
+  Write out any remaining buffered bits.  When one or more bits are
+  buffered, append zero-bits to the stream to align it on a word boundary.
+  Return the number of bits of padding, if any.
+
+----
+
+.. c:function:: void stream_copy(bitstream* dst, bitstream* src, bitstream_size n)
+
+  Copy *n* bits from *src* to *dst*, advancing both bit streams.
+
+----
+
+.. c:function:: size_t stream_stride_block(const bitstream* stream)
+
+  Return stream block size in number of words.  The block size is always
+  one word unless strided streams are enabled.  See :ref:`bs-strides`
+  for more information.
+
+----
+
+.. c:function:: ptrdiff_t stream_stride_delta(const bitstream* stream)
+
+  Return stream delta in number of words between blocks.  See
+  :ref:`bs-strides` for more information.
+
+----
+
+.. c:function:: int stream_set_stride(bitstream* stream, size_t block, ptrdiff_t delta)
+
+  Set block size, *block*, in number of words and spacing, *delta*, in number
+  of blocks for :ref:`strided access <bs-strides>`.  Return nonzero upon
+  success.  Requires :c:macro:`BIT_STREAM_STRIDED`.
diff --git a/docs/source/caching.inc b/docs/source/caching.inc
new file mode 100644
index 00000000..374963e0
--- /dev/null
+++ b/docs/source/caching.inc
@@ -0,0 +1,38 @@
+.. _caching:
+
+Caching
+-------
+
+As mentioned above, the array classes maintain a software write-back cache
+of at least one uncompressed block.  When a block in this cache is evicted
+(e.g., due to a conflict), it is compressed back to permanent storage only
+if it was modified while stored in the cache.
+
+The size cache to use is specified by the user and is an important
+parameter that needs careful consideration in order to balance the extra
+memory usage, performance, and quality (recall that data loss is incurred
+only when a block is evicted from the cache and compressed).  Although the
+best choice varies from one application to another, we suggest allocating
+at least two "layers" of blocks, e.g., 2 |times| (*nx* / 4) |times| (*ny* / 4)
+blocks for 3D arrays, for applications that stream through the array and
+perform stencil computations such as gathering data from neighboring elements.
+This allows limiting the cache misses to compulsory ones.  If the *cache_size*
+parameter provided to the constructor is set to zero bytes, then a default
+cache size of at least |sqrt|\ *n* blocks is used, where *n* is the total
+number of blocks contained in the array.
+
+The cache size can be set during construction, or can be set at a later time
+via :cpp:func:`array::set_cache_size`.  Note that if *cache_size* = 0, then
+the array dimensions must have already been specified for the default size
+to be computed correctly.  When the cache is resized, it is first flushed
+if not already empty.  The cache can also be flushed explicitly if desired
+by calling :cpp:func:`array::flush_cache`.  To empty the cache without
+compressing any cached data, call :cpp:func:`array::clear_cache`.  To query
+the byte size of the cache, use :cpp:func:`array::cache_size`.
+
+By default, a direct-mapped cache is used with a hash function that maps
+block indices to cache lines.  A faster but more collision prone hash
+can be enabled by defining the preprocessor macro
+:c:macro:`ZFP_WITH_CACHE_FAST_HASH`.
+A two-way skew-associative cache is enabled by defining the preprocessor
+macro :c:macro:`ZFP_WITH_CACHE_TWOWAY`.
diff --git a/docs/source/cfp.rst b/docs/source/cfp.rst
new file mode 100644
index 00000000..87f474e9
--- /dev/null
+++ b/docs/source/cfp.rst
@@ -0,0 +1,983 @@
+.. include:: defs.rst
+.. index::
+   single: cfp
+.. _cfp:
+
+Compressed-Array C Bindings
+===========================
+
+.. cpp:namespace:: zfp
+
+|zfp| |cfprelease| adds |cfp|: C language bindings for compressed arrays
+via wrappers around the :ref:`C++ classes <arrays>`.  |zfp| |crpirelease|
+modifies its API (see below).
+
+The C API has been designed to facilitate working with compressed arrays
+without the benefits of C++ operator overloading and self-aware objects,
+which greatly simplify the syntax.  Whereas one possible design considered
+is to map each C++ method to a C function with a prefix, such as
+:code:`zfp_array3d_get(a, i, j, k)` in place of :code:`a(i, j, k)` for
+accessing an element of a 3D array of doubles, such code would quickly
+become unwieldy when part of longer expressions.
+
+Instead, |cfp| uses the notion of nested C *namespaces* that are structs
+of function pointers, such as :code:`cfp.array3d`.  Although this may
+seem no more concise than a design based on prefixes, the user may alias
+these namespaces (somewhat similar to C++ :code:`using namespace`
+declarations) using far shorter names via C macros or local variables.
+For instance::
+
+  const cfp_array3d_api _ = cfp.array3d; // _ is a namespace alias
+  cfp_array3d a = _.ctor(nx, ny, nz, rate, 0, 0);
+  double value = _.get(a, i, j, k);
+  _.set(a, i, j, k, value + 1);
+
+which is a substitute for the C++ code
+::
+
+  zfp::array3d a(nx, ny, nz, rate, 0, 0);
+  double value = a(i, j, k);
+  a(i, j, k) = value + 1;
+
+Because the underlying C++ array objects have no corresponding C
+representation, and because C objects are not self aware (they have no
+implicit :code:`this` pointer), the C interface interacts with compressed
+arrays through array object *pointers*, wrapped in structs, that |cfp|
+converts to pointers to the corresponding C++ objects.  As a consequence,
+|cfp| compressed arrays must be allocated on the heap and must be explicitly
+freed via designated destructor functions to avoid memory leaks (this is
+not necessary for references, pointers, and iterators, which have their
+own C representation).  The C++ constructors are mapped to C by allocating
+objects via C++ :code:`new`.  Moreover, the C API requires passing an array
+*self pointer* (wrapped within a cfp array struct) in order to manipulate
+the array.
+
+As with the :ref:`C++ classes <array_classes>`, array elements can be
+accessed via multidimensional array indexing, e.g., :code:`get(array, i, j)`,
+and via flat, linear indexing, e.g., :code:`get_flat(array, i + nx * j)`.
+
+.. note::
+
+  The |cfp| API changed in |zfp| |crpirelease| by wrapping array
+  *self pointers* in structs to align the interface more closely with the
+  C++ API and to avoid confusion when discussing arrays (now
+  :code:`cfp.array` rather than :code:`cfp.array*`) and pointers to
+  arrays (now :code:`cfp.array*` rather than :code:`cfp.array**`).
+  Furthermore, |zfp| |crpirelease| adds support for proxy references,
+  proxy pointers, and iterators that also wrap C++ classes.  Manipulating
+  those indirectly via pointers (like the old |cfp| arrays) would require
+  additional user effort to destroy dynamically allocated lightweight objects
+  and would also reduce code readability, e.g., :code:`cfp_ptr1d*` (whose
+  corresponding C++ type is :code:`zfp::array1d::pointer*`) reads more
+  naturally as a raw pointer to a proxy pointer than an indirectly referenced
+  proxy pointer object that the user must remember to implicitly dereference.
+
+The following sections are available:
+
+* :ref:`cfp_arrays`
+* :ref:`cfp_serialization`
+* :ref:`cfp_references`
+* :ref:`cfp_pointers`
+* :ref:`cfp_iterators`
+
+
+.. _cfp_arrays:
+
+Arrays
+------
+
+|cfp| implements eight array types for 1D, 2D, 3D, and 4D arrays of floats and
+doubles.  These array types share many functions that have the same signature.
+To reduce redundancy in the documentation, we define fictitious types
+:c:type:`cfp_arrayf` and :c:type:`cfp_arrayd` for *N*-dimensional
+(1 |leq| *N* |leq| 4) arrays of floats or doubles, :c:type:`cfp_array1`,
+:c:type:`cfp_array2`, :c:type:`cfp_array3`, and :c:type:`cfp_array4` for
+1D, 2D, 3D, and 4D arrays of either floats or doubles, and :c:type:`cfp_array`
+for arrays of any dimensionality and type.  We also make use of corresponding
+namespaces, e.g., :c:struct:`cfp.array1` refers to the API common to
+one-dimensional arrays of floats or doubles.  These types and namespaces are
+not actually part of the |cfp| API.
+
+.. note::
+
+  The |cfp| array API makes use of :code:`const` qualifiers for :code:`struct`
+  parameters (passed by value) merely to indicate when the corresponding object
+  is not modified, e.g., :code:`const cfp_array1f self`.  This construction
+  serves to document functions that are analogous to :code:`const` qualified
+  C++ member functions.
+
+.. note::
+
+  Support for 4D arrays was added to cfp in version |crpirelease|.
+
+.. c:type:: cfp_array1f
+.. c:type:: cfp_array1d
+.. c:type:: cfp_array2f
+.. c:type:: cfp_array2d
+.. c:type:: cfp_array3f
+.. c:type:: cfp_array3d
+.. c:type:: cfp_array4f
+.. c:type:: cfp_array4d
+
+  Opaque types for 1D, 2D, 3D, and 4D compressed arrays of floats and doubles.
+
+----
+
+.. c:type:: cfp_array1
+.. c:type:: cfp_array2
+.. c:type:: cfp_array3
+.. c:type:: cfp_array4
+
+  Fictitious types denoting 1D, 2D, 3D, and 4D arrays of any scalar type.
+
+----
+
+.. c:type:: cfp_arrayf
+.. c:type:: cfp_arrayd
+
+  Fictitious types denoting any-dimensional arrays of floats and doubles.
+
+----
+
+.. c:type:: cfp_array
+
+  Fictitious type denoting array of any dimensionality and scalar type.
+
+----
+
+.. c:struct:: cfp
+
+  .. c:struct:: array1f
+  .. c:struct:: array1d
+  .. c:struct:: array2f
+  .. c:struct:: array2d
+  .. c:struct:: array3f
+  .. c:struct:: array3d
+  .. c:struct:: array4f
+  .. c:struct:: array4d
+  .. c:struct:: header
+
+  Nested C "namespaces" for encapsulating the |cfp| API.  The outer
+  :c:struct:`cfp` namespace may be redefined at compile-time via the macro
+  :c:macro:`CFP_NAMESPACE`, e.g., to avoid symbol clashes.  The inner
+  namespaces hold function pointers to the |cfp| wrappers documented below.
+
+----
+
+.. _cfp_ctor:
+.. c:function:: cfp_array1f cfp.array1f.ctor(size_t nx, double rate, const float* p, size_t cache_size)
+.. c:function:: cfp_array1d cfp.array1d.ctor(size_t nx, double rate, const double* p, size_t cache_size)
+.. c:function:: cfp_array2f cfp.array2f.ctor(size_t nx, size_t ny, double rate, const float* p, size_t cache_size)
+.. c:function:: cfp_array2d cfp.array2d.ctor(size_t nx, size_t ny, double rate, const double* p, size_t cache_size)
+.. c:function:: cfp_array3f cfp.array3f.ctor(size_t nx, size_t ny, size_t nz, double rate, const float* p, size_t cache_size)
+.. c:function:: cfp_array3d cfp.array3d.ctor(size_t nx, size_t ny, size_t nz, double rate, const double* p, size_t cache_size)
+.. c:function:: cfp_array4f cfp.array4f.ctor(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const float* p, size_t cache_size)
+.. c:function:: cfp_array4d cfp.array4d.ctor(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const double* p, size_t cache_size)
+
+  :ref:`Array constructors <array_ctor>`.
+  If *p* is not :code:`NULL`, then the array is initialized from uncompressed
+  storage; otherwise the array is zero initialized.  *cache_size* is the
+  minimum size cache (in bytes) to use.  If *cache_size* is zero, a default
+  size is chosen.
+
+----
+
+.. c:function:: cfp_array cfp.array.ctor_default()
+  
+  Default constructor.  Allocate an empty array that later can be
+  :ref:`resized <cfp_resize>` and whose rate and cache size can be
+  set by :c:func:`cfp.array.set_rate` and
+  :c:func:`cfp.array.set_cache_size`.
+
+----
+
+.. c:function:: cfp_array cfp.array.ctor_copy(const cfp_array src)
+
+  :ref:`Copy constructor <array_ctor_default>`.
+
+----
+
+.. _cfp_ctor_header:
+.. c:function:: cfp_array cfp.array.ctor_header(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+
+  Constructor from metadata given by the :ref:`header <cfp_header>` *h*
+  and optionally initialized with compressed data from *buffer* of
+  size *buffer_size_bytes*.
+  See :ref:`corresponding C++ constructor <array_ctor_header>`.
+
+----
+
+.. c:function:: void cfp.array.dtor(cfp_array self)
+
+  Destructor.  The destructor not only deallocates any compressed data
+  owned by the array, but also frees memory for itself, invalidating
+  the *self* object upon return.  Note that the user must explicitly
+  call the destructor to avoid memory leaks.
+
+----
+
+.. c:function:: void cfp.array.deep_copy(cfp_array self, const cfp_array src)
+
+  Perform a deep copy of *src* analogous to the
+  :ref:`C++ assignment operator <array_copy>`.
+
+----
+
+.. _cfp_inspectors:
+.. c:function:: float cfp.array1f.get(const cfp_array1f self, size_t i)
+.. c:function:: float cfp.array2f.get(const cfp_array2f self, size_t i, size_t j)
+.. c:function:: float cfp.array3f.get(const cfp_array3f self, size_t i, size_t j, size_t k)
+.. c:function:: float cfp.array4f.get(const cfp_array4f self, size_t i, size_t j, size_t k, size_t l)
+.. c:function:: double cfp.array1d.get(const cfp_array1d self, size_t i)
+.. c:function:: double cfp.array2d.get(const cfp_array2d self, size_t i, size_t j)
+.. c:function:: double cfp.array3d.get(const cfp_array3d self, size_t i, size_t j, size_t k)
+.. c:function:: double cfp.array4d.get(const cfp_array4d self, size_t i, size_t j, size_t k, size_t l)
+
+  :ref:`Array inspectors <array_accessor>` via multidimensional indexing.
+
+----
+
+.. _cfp_mutators:
+.. c:function:: void cfp.array1f.set(const cfp_array1f self, size_t i, float val)
+.. c:function:: void cfp.array2f.set(const cfp_array2f self, size_t i, size_t j, float val)
+.. c:function:: void cfp.array3f.set(const cfp_array3f self, size_t i, size_t j, size_t k, float val)
+.. c:function:: void cfp.array4f.set(const cfp_array4f self, size_t i, size_t j, size_t k, size_t l, float val)
+.. c:function:: void cfp.array1d.set(const cfp_array1d self, size_t i, double val)
+.. c:function:: void cfp.array2d.set(const cfp_array2d self, size_t i, size_t j, double val)
+.. c:function:: void cfp.array3d.set(const cfp_array3d self, size_t i, size_t j, size_t k, double val)
+.. c:function:: void cfp.array4d.set(const cfp_array4d self, size_t i, size_t j, size_t k, size_t l, double val)
+
+  :ref:`Array mutators <lvref>` for assigning values to array elements via
+  multidimensional indexing.
+
+----
+
+.. c:function:: float cfp.arrayf.get_flat(const cfp_arrayf self, size_t index)
+.. c:function:: double cfp.arrayd.get_flat(const cfp_arrayd self, size_t index)
+
+  Flat index array inspectors; see :cpp:func:`array::operator[]`.
+
+----
+
+.. c:function:: void cfp.arrayf.set_flat(cfp_arrayf self, size_t index, float val)
+.. c:function:: void cfp.arrayd.set_flat(cfp_arrayd self, size_t index, double val)
+
+  Flat index array mutators; set array element with flat *index* to *val*.
+
+----
+
+.. c:function:: void cfp.arrayf.get_array(const cfp_arrayf self, float* p)
+.. c:function:: void cfp.arrayd.get_array(const cfp_arrayd self, double* p)
+
+  Decompress entire array; see :cpp:func:`array::get`.
+
+----
+
+.. c:function:: void cfp.arrayf.set_array(cfp_arrayf self, const float* p)
+.. c:function:: void cfp.arrayd.set_array(cfp_arrayd self, const double* p)
+
+  Initialize entire array; see :cpp:func:`array::set`.
+
+----
+
+.. c:function:: size_t cfp.array2.size_x(const cfp_array2 self)
+.. c:function:: size_t cfp.array2.size_y(const cfp_array2 self)
+.. c:function:: size_t cfp.array3.size_x(const cfp_array3 self)
+.. c:function:: size_t cfp.array3.size_y(const cfp_array3 self)
+.. c:function:: size_t cfp.array3.size_z(const cfp_array3 self)
+.. c:function:: size_t cfp.array4.size_x(const cfp_array4 self)
+.. c:function:: size_t cfp.array4.size_y(const cfp_array4 self)
+.. c:function:: size_t cfp.array4.size_z(const cfp_array4 self)
+.. c:function:: size_t cfp.array4.size_w(const cfp_array4 self)
+
+  :ref:`Array dimensions <array_dims>`.
+
+----
+
+.. c:function:: size_t cfp.array.size(const cfp_array self)
+
+  See :cpp:func:`array::size`.
+
+----
+
+.. _cfp_resize:
+.. c:function:: void cfp.array1.resize(cfp_array1 self, size_t n, zfp_bool clear)
+.. c:function:: void cfp.array2.resize(cfp_array2 self, size_t nx, size_t ny, zfp_bool clear)
+.. c:function:: void cfp.array3.resize(cfp_array3 self, size_t nx, size_t ny, size_t nz, zfp_bool clear)
+.. c:function:: void cfp.array4.resize(cfp_array4 self, size_t nx, size_t ny, size_t nz, size_t nw, zfp_bool clear)
+
+  :ref:`Resize array <array_resize>`.
+
+----
+
+.. c:function:: double cfp.array.rate(const cfp_array self)
+
+  See :cpp:func:`array::rate`.
+
+----
+
+.. c:function:: double cfp.array.set_rate(cfp_array self, double rate)
+
+  See :cpp:func:`array::set_rate`.
+
+----
+
+.. c:function:: size_t cfp.array.cache_size(const cfp_array self)
+
+  See :cpp:func:`array::cache_size`.
+
+----
+
+.. c:function:: void cfp.array.set_cache_size(cfp_array self, size_t cache_size)
+
+  See :cpp:func:`array::set_cache_size`.
+
+----
+
+.. c:function:: void cfp.array.clear_cache(const cfp_array self)
+
+  See :cpp:func:`array::clear_cache`.
+
+----
+
+.. c:function:: void cfp.array.flush_cache(const cfp_array self)
+
+  See :cpp:func:`array::flush_cache`.
+
+----
+
+.. c:function:: size_t cfp.array.size_bytes(const cfp_array self, uint mask)
+
+  See :cpp:func:`array::size_bytes`.
+
+----
+
+.. c:function:: size_t cfp.array.compressed_size(const cfp_array self)
+
+  See :cpp:func:`array::compressed_size`.
+
+----
+
+.. c:function:: void* cfp.array.compressed_data(const cfp_array self)
+
+  See :cpp:func:`array::compressed_data`.
+
+----
+
+.. c:function:: cfp_ref1 cfp.array1.ref(cfp_array1 self, size_t i)
+.. c:function:: cfp_ref2 cfp.array2.ref(cfp_array2 self, size_t i, size_t j)
+.. c:function:: cfp_ref3 cfp.array3.ref(cfp_array3 self, size_t i, size_t j, size_t k)
+.. c:function:: cfp_ref4 cfp.array4.ref(cfp_array4 self, size_t i, size_t j, size_t k, size_t l)
+
+  Reference :ref:`constructor <lvref>` via multidimensional indexing.
+
+----
+
+.. c:function:: cfp_ref cfp.array.ref_flat(cfp_array self, size_t i)
+
+  Reference :ref:`constructor <lvref_idx>` via flat indexing.
+
+----
+
+.. c:function:: cfp_ptr1 cfp.array1.ptr(cfp_array1 self, size_t i)
+.. c:function:: cfp_ptr2 cfp.array2.ptr(cfp_array2 self, size_t i, size_t j)
+.. c:function:: cfp_ptr3 cfp.array3.ptr(cfp_array3 self, size_t i, size_t j, size_t k)
+.. c:function:: cfp_ptr4 cfp.array4.ptr(cfp_array4 self, size_t i, size_t j, size_t k, size_t l)
+
+  Obtain pointer to array element via multidimensional indexing.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.ptr_flat(cfp_array self, size_t i)
+
+  Obtain pointer to array element via flat indexing.
+
+----
+
+.. c:function:: cfp_iter cfp.array.begin(cfp_array self)
+
+  Return iterator to beginning of array;
+  see :cpp:func:`array::begin()`.
+
+----
+
+.. c:function:: cfp_iter cfp.array.end(cfp_array self)
+
+  Return iterator to end of array;
+  see :cpp:func:`array::end()`.
+
+
+.. _cfp_serialization:
+
+Serialization
+-------------
+
+.. cpp:namespace:: zfp
+
+|zfp| |crpirelease| adds |cfp| array :ref:`serialization <serialization>`.
+Like |zfp|'s C++ arrays, |cfp| arrays can be serialized and deserialized to
+and from sequential storage.  As with the C++ arrays, (de)serialization is
+done with the assistance of a header class, :c:type:`cfp_header`.  Currently,
+|cfp| provides no :ref:`factory function <array_factory>`---the caller must
+either know which type of array (dimensionality and scalar type) to
+:ref:`construct <cfp_ctor>` at compile-time or obtain this information at
+run-time from a header :ref:`constructed <cfp_ctor_header>` from a memory
+buffer.
+
+.. _cfp_header:
+
+Header
+^^^^^^
+
+:c:type:`cfp_header` is a wrapper around :cpp:class:`array::header`.
+Although the header type is shared among all array types, the header API
+is accessed through the associated array type whose metadata the header
+describes.  For example, :code:`cfp.array3f.header.ctor(const cfp_array3f a)`
+constructs a header for a :c:type:`cfp_array3f`.  The header is dynamically
+allocated and must be explicitly destructed via
+:c:func:`cfp.array.header.dtor`.
+
+.. c:type:: cfp_header
+
+  Wrapper around :cpp:class:`array::header`.
+
+----
+
+.. c:function:: cfp_header cfp.array.header.ctor(const cfp_array a);
+
+  :ref:`Construct <header_ctor>` a header that describes the metadata of an
+  existing array *a*.
+
+----
+
+.. c:function:: cfp_header cfp.array.header.ctor_buffer(const void* data, size_t size)
+
+  :ref:`Construct <header_ctor_buffer>` a header from header *data* buffer
+  of given byte *size*.
+
+----
+
+.. c:function:: void cfp.array.header.dtor(cfp_header self);
+
+  Destructor.  Deallocates all data associated with the header.  The user
+  must call the destructor to avoid memory leaks.
+
+----
+
+.. cpp:namespace:: zfp::array
+
+.. c:function:: zfp_type cfp.array.header.scalar_type(const cfp_header self);
+
+  Scalar type associated with array.  See :cpp:func:`header::scalar_type`.
+
+----
+
+.. c:function:: uint cfp.array.header.dimensionality(const cfp_header self);
+
+  Dimensionality associated with array.
+  See :cpp:func:`header::dimensionality`.
+
+----
+
+.. c:function:: size_t cfp.array.header.size_x(const cfp_header self);
+.. c:function:: size_t cfp.array.header.size_y(const cfp_header self);
+.. c:function:: size_t cfp.array.header.size_z(const cfp_header self);
+.. c:function:: size_t cfp.array.header.size_w(const cfp_header self);
+
+  :ref:`Array dimensions <header_dims>`.  Unused dimensions have a size of zero.
+
+----
+
+.. c:function:: double cfp.array.header.rate(const cfp_header self);
+
+  Rate in bits/value.  See :cpp:func:`header::rate`.
+
+----
+
+.. c:function:: const void* cfp.array.header.data(const cfp_header self);
+
+  Pointer to header data buffer needed for serializing the header.
+  See :cpp:func:`header::data`.
+
+----
+
+.. c:function:: size_t cfp.array.header.size_bytes(const cfp_header self, uint mask);
+
+  When *mask* = :c:macro:`ZFP_DATA_HEADER`, byte size of header data buffer needed
+  for serializing the header.  See :cpp:func:`header::size_bytes`.
+
+
+Array Accessors
+---------------
+
+.. cpp:namespace:: zfp::arrayANY
+  
+|zfp| |crpirelease| adds |cfp| support for proxy
+:ref:`references <references>` and :ref:`pointers <pointers>` to individual
+array elements, as well as :ref:`iterators <iterators>` for traversing arrays.
+These are analogues to the corresponding C++ classes. As with
+:ref:`arrays <cfp_arrays>`, fictitious types and namespaces are used to
+shorten the documentation.
+  
+.. _cfp_rpi_value_semantics:
+.. note::
+  
+  Unlike the case of arrays, for which the surrounding struct stores a pointer
+  to the underlying array object to allow modifications of the array, the
+  |cfp| proxy reference, proxy pointer, and iterator objects are all passed
+  by value, and hence none of the functions below modify the *self* argument.
+  To increment a pointer, for instance, one should call
+  :code:`p = cfp.array.pointer.inc(p)`. Note that while the references,
+  pointers, and iterators are not themselves modified, the array elements
+  that they reference can be modified.
+
+.. _cfp_references:
+
+References
+----------
+
+|cfp| proxy references wrap the C++ :ref:`reference <references>` classes.
+References are constructed via :c:func:`cfp.array.ref`, 
+:c:func:`cfp.array.pointer.ref`, and :c:func:`cfp.array.iterator.ref` 
+(as well as associated :code:`ref_flat` and :code:`ref_at` calls).
+
+.. note::
+
+  |cfp| references exist primarily to provide parity with |zfp| references.
+  As references do not exist in C, the preferred way of accessing arrays is
+  via :ref:`proxy pointers <cfp_pointers>`, :ref:`iterators <cfp_iterators>`,
+  or :ref:`index-based array accessors <cfp_inspectors>`.
+  
+  |cfp| references do provide the same guarantees as C++ references,
+  functioning as aliases to initialized members of the |cfp| wrapped |zfp|
+  array. This is with the caveat that they are only accessed via |cfp| API
+  calls (use of the :code:`=` C assignment operator to shallow copy a
+  :c:type:`cfp_ref` is also allowed in this case).
+
+.. c:type:: cfp_ref1f
+.. c:type:: cfp_ref2f
+.. c:type:: cfp_ref3f
+.. c:type:: cfp_ref4f
+.. c:type:: cfp_ref1d
+.. c:type:: cfp_ref2d
+.. c:type:: cfp_ref3d
+.. c:type:: cfp_ref4d
+
+  Opaque types for proxy references to 1D, 2D, 3D, and 4D compressed float or
+  double array elements.
+
+----
+
+.. c:type:: cfp_ref1
+.. c:type:: cfp_ref2
+.. c:type:: cfp_ref3
+.. c:type:: cfp_ref4
+
+  Fictitious types denoting references into 1D, 2D, 3D, and 4D arrays of any
+  scalar type.
+
+----
+
+.. c:type:: cfp_reff
+.. c:type:: cfp_refd
+
+  Fictitious types denoting references into float or double arrays of any
+  dimensionality.
+
+----
+
+.. c:type:: cfp_ref
+
+  Fictitious type denoting reference into array of any dimensionality and
+  scalar type.
+
+----
+
+.. c:function:: float  cfp.arrayf.reference.get(const cfp_reff self)
+.. c:function:: double cfp.arrayd.reference.get(const cfp_refd self)
+
+  Retrieve value referenced by *self*.
+
+----
+
+.. c:function:: void cfp.arrayf.reference.set(cfp_reff self, float val)
+.. c:function:: void cfp.arrayd.reference.set(cfp_refd self, double val)
+
+  Update value referenced by *self*;
+  see :cpp:func:`reference::operator=()`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.reference.ptr(cfp_ref self)
+
+  Obtain proxy pointer to value referenced by *self*;
+  see :cpp:func:`reference::operator&()`.
+
+----
+
+.. c:function:: void cfp.array.reference.copy(cfp_ref self, const cfp_ref src)
+
+  Copy value referenced by *src* to value referenced by *self*;
+  see :cpp:func:`reference::operator=()`.  This performs a
+  deep copy.  This is in contrast to :code:`self = src`, which performs
+  only a shallow copy.
+
+
+.. _cfp_pointers:
+
+Pointers
+--------
+
+|cfp| proxy pointers wrap the C++ :ref:`pointer <pointers>` classes.
+Pointers are constructed via :c:func:`cfp.array.ptr` and
+:c:func:`cfp.array.reference.ptr` (and associated :code:`ptr_flat` and
+:code:`ptr_at` calls).  All pointers are
+:ref:`passed by value <cfp_rpi_value_semantics>` 
+and are themselves not modified by these functions.
+
+.. note::
+
+  As with :cpp:class:`array::pointer`, :c:type:`cfp_ptr` indexing is 
+  based on element-wise ordering and is unaware of |zfp| blocks. This 
+  may result in a suboptimal access pattern if sequentially 
+  accessing array members. To take advantage of |zfp| block 
+  traversal optimization, see :ref:`iterators <cfp_iterators>`.
+
+.. c:type:: cfp_ptr1f
+.. c:type:: cfp_ptr2f
+.. c:type:: cfp_ptr3f
+.. c:type:: cfp_ptr4f
+.. c:type:: cfp_ptr1d
+.. c:type:: cfp_ptr2d
+.. c:type:: cfp_ptr3d
+.. c:type:: cfp_ptr4d
+
+  Opaque types for proxy pointers to 1D, 2D, 3D, and 4D compressed float or
+  double array elements.
+
+----
+
+.. c:type:: cfp_ptr1
+.. c:type:: cfp_ptr2
+.. c:type:: cfp_ptr3
+.. c:type:: cfp_ptr4
+
+  Fictitious types denoting pointers into 1D, 2D, 3D, and 4D arrays of any
+  scalar type.
+
+----
+
+.. c:type:: cfp_ptrf
+.. c:type:: cfp_ptrd
+
+  Fictitious types denoting pointers into float or double arrays of any
+  dimensionality.
+
+----
+
+.. c:type:: cfp_ptr
+
+  Fictitious type denoting pointer into array of any dimensionality and
+  scalar type.
+
+----
+
+.. c:function:: float cfp.arrayf.pointer.get(const cfp_ptrf self)
+.. c:function:: double cfp.arrayd.pointer.get(const cfp_ptrd self)
+
+  Dereference operator; :code:`*self`.
+  See :cpp:func:`pointer::operator*()`.
+
+----
+
+.. c:function:: float cfp.arrayf.pointer.get_at(const cfp_ptrf self, ptrdiff_t d)
+.. c:function:: double cfp.arrayd.pointer.get_at(const cfp_ptrd self, ptrdiff_t d)
+
+  Offset dereference operator; :code:`self[d]`.
+  See :cpp:func:`pointer::operator[]()`.
+
+----
+
+.. c:function:: void cfp.arrayf.pointer.set(cfp_ptrf self, float val)
+.. c:function:: void cfp.arrayd.pointer.set(cfp_ptrd self, double val)
+
+  Dereference operator with assignment; :code:`*self = val`.
+  See :cpp:func:`pointer::operator*()`.
+
+----
+
+.. c:function:: void cfp.arrayf.pointer.set_at(cfp_ptrf self, ptrdiff_t d, float val)
+.. c:function:: void cfp.arrayd.pointer.set_at(cfp_ptrd self, ptrdiff_t d, double val)
+
+  Offset dereference operator with assignment; :code:`self[d] = val`.
+  See :cpp:func:`pointer::operator[]()`.
+
+----
+
+.. c:function:: cfp_ref cfp.array.pointer.ref(cfp_ptr self)
+
+  Get proxy reference to element stored at :code:`*self`.
+  See :cpp:func:`pointer::operator*()`.
+
+----
+
+.. c:function:: cfp_ref cfp.array.pointer.ref_at(cfp_ptr self, ptrdiff_t d)
+
+  Get proxy reference to element stored at :code:`self[d]`.
+  See :cpp:func:`pointer::operator[]()`.
+
+----
+
+.. c:function:: zfp_bool cfp.array.pointer.lt(const cfp_ptr lhs, const cfp_ptr rhs)
+.. c:function:: zfp_bool cfp.array.pointer.gt(const cfp_ptr lhs, const cfp_ptr rhs)
+.. c:function:: zfp_bool cfp.array.pointer.leq(const cfp_ptr lhs, const cfp_ptr rhs)
+.. c:function:: zfp_bool cfp.array.pointer.geq(const cfp_ptr lhs, const cfp_ptr rhs)
+  
+  Return true if the two pointers satisfy the given
+  :ref:`relationship <ptr_inequalities>`;
+  :code:`lhs < rhs`, :code:`lhs > rhs`, :code:`lhs <= rhs`, :code:`lhs >= rhs`.
+
+----
+
+.. c:function:: zfp_bool cfp.array.pointer.eq(const cfp_ptr lhs, const cfp_ptr rhs)
+
+  Compare two proxy pointers for equality; :code:`lhs == rhs`.
+  The pointers must be to elements with the same index within the same
+  array to satisfy equality.  See :cpp:func:`pointer::operator==()`.
+
+----
+
+.. c:function:: int cfp.array.pointer.neq(const cfp_ptr lhs, const cfp_ptr rhs)
+
+  Compare two proxy pointers for inequality; :code:`lhs != rhs`.
+  The pointers are not equal if they point to different arrays or to
+  elements with different index within the same array.  See
+  :cpp:func:`pointer::operator!=()`.
+
+----
+
+.. c:function:: ptrdiff_t cfp.array.pointer.distance(const cfp_ptr first, const cfp_ptr last)
+
+  Return the difference between two proxy pointers in number of linear array
+  elements; :code:`last - first`.  See :cpp:func:`pointer::operator-()`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.pointer.next(const cfp_ptr p, ptrdiff_t d)
+
+  Return the result of incrementing pointer by *d* elements; :code:`p + d`.
+  See :cpp:func:`pointer::operator+()`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.pointer.prev(const cfp_ptr p, ptrdiff_t d)
+
+  Return the result of decrementing pointer by *d* elements; :code:`p - d`.
+  See :cpp:func:`pointer::operator-()`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.pointer.inc(const cfp_ptr p)
+
+  Return the result of incrementing pointer by one element; :code:`p + 1`.
+  See :cpp:func:`pointer::operator++()`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.pointer.dec(const cfp_ptr p)
+
+  Return the result of decrementing pointer by one element; :code:`p - 1`.
+  See :cpp:func:`pointer::operator--()`.
+
+
+.. _cfp_iterators:
+
+Iterators
+---------
+
+|cfp| random-access iterators wrap the C++ :ref:`iterator <iterators>` classes.
+All iterators are :ref:`passed by value <cfp_rpi_value_semantics>` and
+are themselves not modified by these functions. Iterators are constructed 
+similar to C++ iterators via :c:func:`cfp.array.begin` and 
+:c:func:`cfp.array.end`. Iterator usage maps closely to equivalent C++ 
+iterator syntax. For example, to set an array to all ones::
+
+  // _ and _iter are namespace aliases
+  const cfp_array3d_api _ = cfp.array3d; 
+  const cfp_iter3d_api _iter = _.iterator;
+
+  cfp_array3d a = _.ctor(nx, ny, nz, rate, 0, 0);
+  cfp_iter3d it;
+
+  for (it = _.begin(a); _iter.neq(it, _.end(a)); it = _iter.inc(it))
+    _iter.set(it, 1.0);
+
+.. c:type:: cfp_iter1f
+.. c:type:: cfp_iter2f
+.. c:type:: cfp_iter3f
+.. c:type:: cfp_iter4f
+.. c:type:: cfp_iter1d
+.. c:type:: cfp_iter2d
+.. c:type:: cfp_iter3d
+.. c:type:: cfp_iter4d
+
+  Opaque types for block iterators over 1D, 2D, 3D, and 4D compressed float
+  or double array elements.
+
+----
+
+.. c:type:: cfp_iter1
+.. c:type:: cfp_iter2
+.. c:type:: cfp_iter3
+.. c:type:: cfp_iter4
+
+  Fictitious types denoting iterators over 1D, 2D, 3D, and 4D arrays of any
+  scalar type.
+
+----
+
+.. c:type:: cfp_iterf
+.. c:type:: cfp_iterd
+
+  Fictitious types denoting iterators over float or double arrays of any
+  dimensionality.
+
+----
+
+.. c:type:: cfp_iter
+
+  Fictitious type denoting iterator over array of any dimensionality and
+  scalar type.
+
+----
+
+.. c:function:: float cfp.arrayf.iterator.get(const cfp_iterf self)
+.. c:function:: double cfp.arrayd.iterator.get(const cfp_iterd self)
+
+  Return element referenced by iterator; :code:`*self`.
+  See :cpp:func:`iterator::operator*()`.
+
+----
+
+.. c:function:: float cfp.array1f.iterator.get_at(const cfp_iter1f self, ptrdiff_t d)
+.. c:function:: double cfp.array1d.iterator.get_at(const cfp_iter1d self, ptrdiff_t d)
+
+  Return element *d* elements (may be negative) from iterator; :code:`self[d]`.
+  See :cpp:func:`iterator::operator[]()`.
+
+----
+
+.. c:function:: void cfp.arrayf.iterator.set(cfp_iterf self, float val)
+.. c:function:: void cfp.arrayd.iterator.set(cfp_iterd self, double val)
+
+  Update element referenced by iterator; :code:`*self = val`.
+  See :cpp:func:`iterator::operator*()`.
+
+----
+
+.. c:function:: void cfp.array1f.iterator.set_at(cfp_iter1 self, ptrdiff_t d, float val)
+.. c:function:: void cfp.array1d.iterator.set_at(cfp_iter1 self, ptrdiff_t d, double val)
+
+  Update element *d* elements (may be negative) from iterator;
+  :code:`self[d] = val`.
+  See :cpp:func:`iterator::operator[]()`.
+
+----
+
+.. c:function:: cfp_ref cfp.array.iterator.ref(cfp_iter self)
+
+  Return reference to element referenced by iterator; :code:`*self`.
+  See :cpp:func:`iterator::operator*()`.
+
+----
+
+.. c:function:: cfp_ref cfp.array.iterator.ref_at(cfp_iter self, ptrdiff_t d)
+
+  Return reference to an element offset *d* elements (may be negative) from
+  iterator; :code:`self[d]`.
+  See :cpp:func:`iterator::operator[]()`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.iterator.ptr(cfp_iter self)
+
+  Return pointer to element referenced by iterator;
+  :code:`&*self`.
+
+----
+
+.. c:function:: cfp_ptr cfp.array.iterator.ptr_at(cfp_iter self, ptrdiff_t d)
+
+  Return pointer to element offset *d* elements (may be negative) from 
+  iterator; :code:`&self[d]`.
+
+----
+
+.. c:function:: size_t cfp.array.iterator.i(const cfp_iter self)
+.. c:function:: size_t cfp.array.iterator.j(const cfp_iter self)
+.. c:function:: size_t cfp.array.iterator.k(const cfp_iter self)
+.. c:function:: size_t cfp.array.iterator.l(const cfp_iter self)
+
+  Return *i*, *j*, *k*, and *l* component of array element referenced by
+  iterator; see :cpp:func:`iterator::i()`, :cpp:func:`iterator::j()`,
+  :cpp:func:`iterator::k()`, and :cpp:func:`iterator::l()`.
+
+----
+
+.. c:function:: zfp_bool cfp.array.iterator.lt(const cfp_iter lhs, const cfp_iter rhs)
+.. c:function:: zfp_bool cfp.array.iterator.gt(const cfp_iter lhs, const cfp_iter rhs)
+.. c:function:: zfp_bool cfp.array.iterator.leq(const cfp_iter lhs, const cfp_iter rhs)
+.. c:function:: zfp_bool cfp.array.iterator.geq(const cfp_iter lhs, const cfp_iter rhs)
+
+  Return true if the two iterators satisfy the given
+  :ref:`relationship <iter_inequalities>`;
+  :code:`lhs < rhs`, :code:`lhs > rhs`, :code:`lhs <= rhs`, :code:`lhs >= rhs`.
+
+----
+
+.. c:function:: zfp_bool cfp.array.iterator.eq(const cfp_iter lhs, const cfp_iter rhs)
+
+  Return whether two iterators are equal; :code:`lhs == rhs`.
+  See :cpp:func:`iterator::operator==()`.
+
+----
+
+.. c:function:: zfp_bool cfp.array.iterator.neq(const cfp_iter lhs, const cfp_iter rhs)
+
+  Return whether two iterators are not equal; :code:`lhs != rhs`.
+  See :cpp:func:`iterator::operator!=()`.
+
+----
+
+.. c:function:: ptrdiff_t cfp.array.iterator.distance(const cfp_iter first, const cfp_iter last)
+
+  Return the difference between two iterators; :code:`last - first`.
+  See :cpp:func:`iterator::operator-()`.
+
+----
+
+.. c:function:: cfp_iter cfp.array.iterator.next(const cfp_iter it, ptrdiff_t d)
+
+  Return the result of advancing iterator by *d* elements; :code:`it + d`.
+  See :cpp:func:`iterator::operator+()`.
+
+----
+
+.. c:function:: cfp_iter cfp.array.iterator.prev(const cfp_iter it, ptrdiff_t d)
+
+  Return the result of decrementing iterator by *d* elements; :code:`it - d`.
+  See :cpp:func:`iterator::operator-()`.
+
+----
+
+.. c:function:: cfp_iter cfp.array.iterator.inc(const cfp_iter it)
+
+  Return the result of incrementing iterator by one element;
+  :code:`it + 1`.  See :cpp:func:`iterator::operator++()`.
+
+----
+
+.. c:function:: cfp_iter cfp.array.iterator.dec(const cfp_iter it)
+
+  Return the result of decrementing iterator by one element;
+  :code:`it - 1`.  See :cpp:func:`iterator::operator--()`.
diff --git a/docs/source/codec.inc b/docs/source/codec.inc
new file mode 100644
index 00000000..136e42a3
--- /dev/null
+++ b/docs/source/codec.inc
@@ -0,0 +1,258 @@
+.. index::
+   single: Codec
+.. _codec:
+
+Codec
+-----
+
+.. cpp:namespace:: zfp
+
+|zfp| arrays are partitioned into independent blocks that are compressed
+and decompressed using a *codec* (encoder/decoder).  This codec defaults
+to the |zfp| compression scheme, but can in principle be any compression
+scheme or number representation that represents *d*-dimensional blocks
+of |4powd| values.  The :cpp:class:`zfp::array` and
+:cpp:class:`zfp::const_array` classes take such a codec class as an
+optional template parameter.
+
+This section documents the API that prospective codecs must support to
+interface with the |zfp| compressed-array classes.  Any one codec supports a
+specific scalar type (e.g., :code:`float` or :code:`double`), denoted
+:code:`Scalar` below, and data dimensionality (1D, 2D, 3D, or 4D).  If the
+codec does not support a certain compression mode, it should throw an
+:ref:`exception <exception>` when the user attempts to invoke that mode.
+Codecs reside in the :code:`zfp::codec` namespace, e.g.,
+:code:`zfp::codec::zfp3<Scalar>` is the default codec for 3D arrays.
+
+As of |zfp| |cpprelease|, there is in addition to the default |zfp| codec
+a "generic" codec that allows storing data in |zfp| arrays in "uncompressed"
+form using any scalar type (specified as a template parameter).  This
+"internal" scalar type may differ from the "external" scalar type exposed
+to the user through the :cpp:class:`zfp::array` API.  For instance, the
+internal type may be :code:`float` while the external type is :code:`double`,
+which provides for 2:1 fixed-rate "compression" using IEEE 754 floating point.
+
+.. cpp:namespace:: zfp::codec
+
+.. cpp:class:: codec
+
+  Fictitious class encapsulating the codec API.  This may be thought of as
+  a base class for the classes below specialized on dimensionality.
+
+----
+
+.. cpp:class:: codec1
+.. cpp:class:: codec2
+.. cpp:class:: codec3
+.. cpp:class:: codec4
+
+  Fictitious classes encapsulating the codec API specialized for a given
+  data dimensionality (1D, 2D, 3D, or 4D).
+
+----
+
+.. cpp:function:: codec& codec::operator=(const codec& c)
+
+  Assignment operator.  Performs a deep copy.  This method is invoked when
+  performing a :ref:`deep copy <array_copy>` of an array.
+
+----
+
+.. cpp:function:: size_t codec::buffer_size(const zfp_field* field) const
+
+  Maximum buffer size needed to encode the *field* of given scalar type and
+  dimensions (see :c:func:`zfp_stream_maximum_size`).  The size should be
+  based on the current compression mode and parameters.  This method is
+  called to determine how large a buffer to allocate and pass to
+  :cpp:func:`codec::open`.
+
+----
+
+.. cpp:function:: void codec::open(void* data, size_t size)
+
+  Open codec for (de)compression to/from buffer pointed to by *data* of
+  *size* bytes.  The caller is responsible for allocating and deallocating
+  this buffer, whose *size* is given by :cpp:func:`codec::buffer_size`.
+
+----
+
+.. cpp:function:: void codec::close()
+
+  Close codec for (de)compression.
+
+----
+
+.. cpp:function:: zfp_mode codec::mode() const
+
+  Currently selected :ref:`compression mode <modes>`.  See :c:enum:`zfp_mode`.
+
+----
+
+.. cpp:function:: double codec::rate() const
+
+  Rate in compressed bits/value when :ref:`fixed-rate mode <mode-fixed-rate>`
+  is selected.  See :c:func:`zfp_stream_rate`.
+
+----
+
+.. cpp:function:: uint codec::precision() const
+
+  Precision in uncompressed bits/value when
+  :ref:`fixed-precision mode <mode-fixed-precision>` is selected.  See
+  :c:func:`zfp_stream_precision`.
+
+----
+
+.. cpp:function:: double codec::accuracy() const
+
+  Accuracy as absolute error tolerance when
+  :ref:`fixed-accuracy mode <mode-fixed-accuracy>` is selected.  See
+  :c:func:`zfp_stream_accuracy`.
+
+----
+
+.. cpp:function:: void codec::params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const
+
+  Compression parameters for any compression mode.  These pointer parameters
+  may be :code:`null` if only a subset of parameters is requested.
+  See :c:func:`zfp_stream_params`.
+
+----
+
+.. cpp:function:: void codec::set_reversible()
+
+  Enable :ref:`reversible mode <mode-reversible>`.
+
+----
+
+.. cpp:function:: double codec::set_rate(double rate, bool align)
+
+  Set desired *rate* in number of compressed bits/value.  When *align* = true,
+  blocks are word aligned, as needed for random access writes.  Return
+  the closest rate supported.  See :c:func:`zfp_stream_set_rate`.
+
+----
+
+.. cpp:function:: uint codec::set_precision(uint precision)
+
+  Set precision in number of uncompressed bits/value.  Return the actual
+  precision selected.  See :c:func:`zfp_stream_set_precision`.
+
+----
+
+.. cpp:function:: double codec::set_accuracy(double tolerance)
+
+  Set accuracy as absolute error tolerance.  Return the closest tolerance
+  supported.  See :c:func:`zfp_stream_set_accuracy`.
+
+----
+
+.. cpp:function:: bool codec::set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+
+  Set expert mode parameters.  Return :code:`true` on success.
+  See :c:func:`zfp_stream_set_params`.
+
+----
+
+.. cpp:function:: bool codec::set_thread_safety(bool safety)
+
+  Enable or disable thread safety.  This function is called whenever |zfp|
+  is built with OpenMP support and when the number of mutable or immutable
+  :ref:`private views <private_immutable_view>` of an array changes.  When
+  two or more private views of an array are accessed by separate threads,
+  multiple blocks may be compressed or decompressed simultaneously.  The
+  codec then has to take care that there are no race conditions on the data
+  structures (e.g., :c:type:`bitstream`) used for (de)compression.
+
+----
+
+.. cpp:function:: size_t codec::size_bytes(uint mask = ZFP_DATA_ALL) const
+
+  Return storage size of components of codec data structure indicated by
+  *mask*.  The mask is constructed via bitwise OR of
+  :ref:`predefined constants <data-macros>`.
+
+----
+
+.. cpp:function:: static size_t codec::alignment()
+
+  Memory alignment in number of bytes required by codec.
+
+.. cpp:var:: static const zfp_type codec::type;
+
+  :c:type:`Scalar type <zfp_type>` compressed by codec.
+
+----
+
+.. cpp:function:: size_t codec::encode_block(bitstream_offset offset, const Scalar* block) const
+
+  Encode contiguous *block* of |4powd| scalars and store at specified bit
+  *offset* within compressed-data buffer.  Return the number of bits of
+  compressed storage for the block, excluding any necessary padding.  This
+  method must flush any buffered compressed data without counting any padding
+  (e.g., for byte alignment) in the compressed size (unless the codec requires
+  alignment of the bit offsets).
+
+----
+
+.. cpp:function:: size_t codec::decode_block(bitstream_offset offset, Scalar* block) const
+
+  Decode contiguous *block* of |4powd| scalars from specified bit *offset*
+  within compressed-data buffer (see :cpp:func:`codec::encode_block`).
+  Return number of bits of compressed data decoded, excluding any padding
+  bits, i.e., the same value reported in encoding.
+
+----
+
+.. cpp:function:: size_t codec1::encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+.. cpp:function:: size_t codec2::encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+.. cpp:function:: size_t codec3::encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+.. cpp:function:: size_t codec4::encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+
+  Encode contiguous *block* of data of given *shape* and store at specified
+  bit *offset* within compressed-data buffer.  Return the number of bits of
+  compressed storage for the block (see also :cpp:func:`codec::encode_block`).
+
+  The *shape* is a (2 |times| *d*)-bit encoding of the size of the
+  *d*-dimensional block.  For each successive pair of bits *s* of *shape*,
+  the block size in the corresponding dimension is *n* = 4 - *s*, where
+  0 |leq| *s* |leq| 3.  Thus, *shape* = 0 implies a full block of |4powd|
+  values.  The size of the fastest varying dimension is specified in the
+  least significant bits of *shape*.
+
+----
+
+.. cpp:function:: size_t codec1::decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+.. cpp:function:: size_t codec2::decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+.. cpp:function:: size_t codec3::decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+.. cpp:function:: size_t codec4::decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+
+  Decode contiguous *block* of data of given *shape* from specified bit
+  *offset* within compressed-data buffer (see also
+  :cpp:func:`codec1::encode_block`).  Return number of bits of compressed
+  data decoded, excluding any padding bits, i.e., the same value reported
+  in encoding.
+
+----
+
+.. cpp:function:: size_t codec1::encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx) const
+.. cpp:function:: size_t codec2::encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+.. cpp:function:: size_t codec3::encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+.. cpp:function:: size_t codec4::encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+
+  Encode block of data stored at *p* with strides *sx*, *sy*, *sz*,
+  and *sw*.  See :c:type:`zfp_field` for information on strided storage.
+  The *shape*, *offset*, and return value are as in
+  :cpp:func:`codec1::encode_block`.
+
+----
+
+.. cpp:function:: size_t codec1::decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx) const
+.. cpp:function:: size_t codec2::decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+.. cpp:function:: size_t codec3::decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+.. cpp:function:: size_t codec4::decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+
+  Decode block to strided storage pointed to by *p* with strides *sx*, *sy*,
+  *sz*, and *sw*.  See :c:type:`zfp_field` for information on strided storage.
+  The *shape*, *offset*, and return value are as in 
+  :cpp:func:`codec1::decode_block`.
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 00000000..0536c39e
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+#
+# zfp documentation build configuration file, created by
+# sphinx-quickstart on Mon Jul  3 18:31:07 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.imgmath', 'sphinxfortran.fortran_domain']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'zfp'
+copyright = u'2014-2023, LLNL-CODE-663824'
+author = u'Peter Lindstrom, Danielle Asher'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.0'
+# The full version, including alpha/beta/rc tags.
+release = u'1.0.1'
+
+# The release date (as the RTD server is in another time zone).
+today = u'Dec 15, 2023'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = 'en'
+
+# Enable automatic numbering of figures referenced by :numref:.
+numfig = True
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pyramid'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+html_static_path = []
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# This is required for the alabaster theme
+# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
+#html_sidebars = {
+#    '**': [
+#        'about.html',
+#        'navigation.html',
+#        'relations.html',  # needs 'show_related': True theme option to display
+#        'searchbox.html',
+#        'donate.html',
+#    ]
+#}
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'zfpdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+
+    'inputenc' : '\\usepackage[utf8x]{inputenc}',
+    'utf8extra': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'zfp.tex', u'zfp Documentation',
+     u'\shortstack[l]{Peter Lindstrom\\\\Danielle Asher}', 'manual'),
+]
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'zfp', u'zfp Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'zfp', u'zfp Documentation',
+     author, 'zfp', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+
diff --git a/docs/source/contributors.rst b/docs/source/contributors.rst
new file mode 100644
index 00000000..45e945c2
--- /dev/null
+++ b/docs/source/contributors.rst
@@ -0,0 +1,24 @@
+.. include:: defs.rst
+.. _contributors:
+
+Contributors
+============
+
+* |zfp| development team
+
+  - Peter Lindstrom
+  - Danielle Asher
+
+* Major contributors
+
+  - Chuck Atkins
+  - Stephen Herbein
+  - Mark Kim
+  - Matt Larsen
+  - Mark Miller
+  - Markus Salasoo
+  - David Wade
+  - Haiying Xu
+
+For a full list of contributors, see the
+`GitHub Contributors <https://github.com/LLNL/zfp/graphs/contributors>`__ page.
diff --git a/docs/source/defs.rst b/docs/source/defs.rst
new file mode 100644
index 00000000..b0210c1f
--- /dev/null
+++ b/docs/source/defs.rst
@@ -0,0 +1,42 @@
+.. |times| unicode:: 0x00d7
+.. |minus| unicode:: 0x2212
+.. |leq| unicode:: 0x2264
+.. |geq| unicode:: 0x2265
+.. |approx| unicode:: 0x2248
+.. |sqrt| unicode:: 0x221a
+.. |check| unicode:: 0x2713
+.. |reg| unicode:: 0x00ae
+.. |tm| unicode:: 0x2122
+.. |zfp| replace:: zfp
+.. |cfp| replace:: cfp
+.. |zforp| replace:: zFORp
+.. |zfpy| replace:: zfPy
+.. |libzfp| replace:: :file:`libzfp`
+.. |libcfp| replace:: :file:`libcfp`
+.. |libzforp| replace:: :file:`libzFORp`
+.. |zfpcmd| replace:: :program:`zfp`
+.. |testzfp| replace:: :program:`testzfp`
+.. |4powd| replace:: 4\ :sup:`d`
+.. |4by4| replace:: 4 |times| 4
+.. |4by4by4| replace:: 4 |times| 4 |times| 4
+.. |4by4by4by4| replace:: 4 |times| 4 |times| 4 |times| 4
+.. |proxyrelease| replace:: 0.5.2
+.. |omprelease| replace:: 0.5.3
+.. |dcrelease| replace:: 0.5.3
+.. |4drelease| replace:: 0.5.4
+.. |viewsrelease| replace:: 0.5.4
+.. |cudarelease| replace:: 0.5.4
+.. |cfprelease| replace:: 0.5.4
+.. |revrelease| replace:: 0.5.5
+.. |zforprelease| replace:: 0.5.5
+.. |zfpyrelease| replace:: 0.5.5
+.. |csizerelease| replace:: 0.5.5
+.. |crpirelease| replace:: 1.0.0
+.. |raiterrelease| replace:: 1.0.0
+.. |64bitrelease| replace:: 1.0.0
+.. |boolrelease| replace:: 1.0.0
+.. |4darrrelease| replace:: 1.0.0
+.. |fieldrelease| replace:: 1.0.0
+.. |carrrelease| replace:: 1.0.0
+.. |cpprelease| replace:: 1.0.0
+.. |verrelease| replace:: 1.0.0
diff --git a/docs/source/directions.rst b/docs/source/directions.rst
new file mode 100644
index 00000000..adff6246
--- /dev/null
+++ b/docs/source/directions.rst
@@ -0,0 +1,85 @@
+.. include:: defs.rst
+
+.. _directions:
+
+Future Directions
+=================
+
+|zfp| is actively being developed and plans have been made to add a number of
+important features, including:
+
+- **Tagging of missing values**.  |zfp| currently assumes that arrays are
+  dense, i.e., each array element stores a valid numerical value.  In many
+  science applications this is not the case.  For instance, in climate
+  modeling, ocean temperature is not defined over land.  In other
+  applications, the domain is not rectangular but irregular and embedded in a
+  rectangular array.  Such examples of sparse arrays demand a mechanism to tag
+  values as missing or indeterminate.  Current solutions often rely on tagging
+  missing values as NaNs or special, often very large sentinel values outside
+  the normal range, which can lead to poor compression and complete loss of
+  accuracy in nearby valid values.  See FAQ :ref:`#7 <q-missing>`.
+
+- **Support for NaNs and infinities**.  Similar to missing values, some
+  applications store special IEEE floating-point values that are supported
+  by |zfp| only in :ref:`reversible mode <mode-reversible>`.
+  In fact, for all lossy compression modes, the presence of such values will
+  currently result in undefined behavior and loss of data for all values
+  within a block that contains non-finite values.
+
+- **Support for more general data types**.  |zfp| currently does not
+  directly support half and quad precision floating point.  Nor is there
+  support for 8- and 16-bit integers.  With the emergence of new number
+  representations like *posits* and *bfloat16*, we envision the need for
+  a more general interface and a single unified |zfp| representation that
+  would allow for *conversion* between |zfp| and *any* number representation.
+  We are working on developing an uncompressed interchange format that acts
+  like an intermediary between |zfp| and other number formats.  This format
+  decouples the |zfp| compression pipeline from the external number type and
+  allows new number formats to be supported via user-defined conversion
+  functions to and from the common interchange format.
+
+- **Progressive decompression**.  Streaming large data sets from remote
+  storage for visualization can be time consuming, even when the data is
+  compressed.  Progressive streaming allows the data to be reconstructed
+  at reduced precision over the entire domain, with quality increasing
+  progressively as more data arrives.  The low-level bit stream interface
+  already supports progressive access by interleaving bits across blocks
+  (see FAQ :ref:`#13 <q-progressive>`), but |zfp| lacks a high-level API
+  for generating and accessing progressive streams.
+
+- **Parallel compression**.  |zfp|'s data partitioning into blocks invites
+  opportunities for data parallelism on multithreaded platforms by dividing
+  the blocks among threads.  An OpenMP implementation of parallel
+  compression is available that produces compressed streams that
+  are identical to serially compressed streams.  However, parallel
+  decompression is not yet supported.  |zfp| also supports compression and
+  decompression on the GPU via CUDA.  However, only fixed-rate mode is
+  so far supported.
+
+- **Variable-rate arrays**.  |zfp| currently offers only fixed-rate
+  compressed arrays with random-access write support; |zfp| |carrrelease|
+  further provides read-only variable-rate arrays.  Fixed-rate arrays waste
+  bits in smooth regions with little information content while too few bits
+  may be allocated to accurately preserve sharp features such as shocks and
+  material interfaces, which tend to drive the physics in numerical
+  simulations.  A candidate solution has been developed for variable-rate
+  arrays that support read-write random access with modest storage overhead.
+  We expect to release this capability in the near future.
+
+- **Array operations**.  |zfp|'s compressed arrays currently support basic
+  indexing and initialization, but lack array-wise operations such as
+  arithmetic, reductions, etc.  Some such operations can exploit the
+  higher precision (than IEEE-754) supported by |zfp|, as well as accelerated
+  blockwise computations that need not fully decompress and convert the
+  |zfp| representation to IEEE-754.
+
+- **Language bindings**.  The main compression codec is written in C89 to
+  facilitate calls from other languages.  |zfp|'s compressed arrays, on
+  the other hand, are written in C++.  |zfp| |cfprelease| and |zforprelease|
+  add C wrappers around compressed arrays and Fortran and Python bindings to
+  the high-level C API.  Work is planned to provide additional language
+  bindings for C, C++, Fortran, and Python to expose the majority of |zfp|'s
+  capabilities through all of these programming languages.
+
+Please `contact us <mailto:zfp@llnl.gov>`__ with requests for
+features not listed above.
diff --git a/docs/source/disclaimer.inc b/docs/source/disclaimer.inc
new file mode 100644
index 00000000..58a69ba0
--- /dev/null
+++ b/docs/source/disclaimer.inc
@@ -0,0 +1,9 @@
+.. note::
+  In multidimensional arrays, the order in which dimensions are specified
+  is important.  In |zfp|, the memory layout convention is such that *x*
+  varies faster than *y*, which varies faster than *z*, and hence *x* should
+  map to the innermost (rightmost) array dimension in a C array and to the
+  leftmost dimension in a Fortran array.  Getting the order of dimensions
+  right is crucial for good compression and accuracy.  See the discussion of
+  :ref:`dimensions and strides <indexing>` and FAQ :ref:`#0 <q-layout>` for
+  further information.
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
new file mode 100644
index 00000000..c73084d0
--- /dev/null
+++ b/docs/source/examples.rst
@@ -0,0 +1,205 @@
+.. include:: defs.rst
+
+Code Examples
+=============
+
+The :file:`examples` directory includes ten programs that make use of the
+compressor.
+
+.. _ex-simple:
+
+Simple Compressor
+-----------------
+
+The :program:`simple` program is a minimal example that shows how to call
+the compressor and decompressor on a double-precision 3D array.  Without
+the :code:`-d` option, it will compress the array and write the compressed
+stream to standard output.  With the :code:`-d` option, it will instead
+read the compressed stream from standard input and decompress the array::
+
+    simple > compressed.zfp
+    simple -d < compressed.zfp
+
+For a more elaborate use of the compressor, see the
+:ref:`zfp utility <zfpcmd>`.
+
+.. _ex-array:
+
+Compressed-Array C++ Classes
+----------------------------
+
+The :program:`array` program shows how to declare, write to, and read from
+|zfp|'s compressed-array C++ objects (in this case, 2D double-precision
+arrays), which is essentially as straightforward as working with STL vectors.
+This example initializes a 2D array with a linear ramp of 12 |times| 8 = 96
+values using only four bits of storage per value, which using uncompressed
+storage would not be enough to distinguish more than 16 different values.
+For more advanced compressed-array features, see the
+:ref:`tutorial <tut-arrays>`.
+
+.. _ex-diffusion:
+
+Diffusion Solver
+----------------
+
+The :program:`diffusion` example is a simple forward Euler solver for the
+heat equation on a 2D regular grid, and is intended to show how to declare
+and work with |zfp|'s compressed arrays, as well as give an idea of how
+changing the compression parameters and cache size affects the error in the
+solution and solution time.  The usage is::
+
+    diffusion [options]
+      -a <tolerance> : absolute error tolerance (requires -c)
+      -b <blocks> : cache size in number of 4x4 blocks
+      -c : use read-only arrays (needed for -a, -p, -R)
+      -d : use double-precision tiled arrays
+      -f : use single-precision tiled arrays
+      -h : use half-precision tiled arrays
+      -i : traverse arrays using iterators instead of integer indices
+      -j : use OpenMP parallel execution (requires -r)
+      -n <nx> <ny> : grid dimensions
+      -p <precision> : precision in uncompressed bits/value (requires -c)
+      -r <rate> : rate in compressed bits/value
+      -R : reversible mode (requires -c)
+      -t <nt> : number of time steps
+
+Here *rate* specifies the exact number of compressed bits to store per
+double-precision floating-point value; *nx* and *ny* specify the grid size
+(default = 128 |times| 128); *nt* specifies the number of time steps to take
+(the default is to run until time *t* = 1); and *blocks* is the number of
+uncompressed blocks to cache (default = *nx* / 2).  The :code:`-i` option
+enables array traversal via iterators instead of indices.
+
+The :code:`-j` option enables OpenMP parallel execution, which makes use
+of both mutable and immutable :ref:`private views <private_immutable_view>`
+for thread-safe array access.  Note that this example has not been
+optimized for parallel performance, but rather serves to show how to
+work with |zfp|'s compressed arrays in a multithreaded setting.
+
+This example also illustrates how :ref:`read-only arrays <carray_classes>`
+(:code:`-c`) may be used in conjunction with fixed-rate (:code:`-r`),
+fixed-precision (:code:`-p`), fixed-accuracy (:code:`-a`),
+or reversible (:code:`-R`) mode.
+
+The output lists for each time step the current rate of the state array and
+in parentheses any additional storage, e.g., for the block
+:ref:`cache <caching>` and :ref:`index <index>` data structures, both in bits
+per array element.  Running diffusion with the following arguments::
+
+    diffusion -r 8
+    diffusion -r 12
+    diffusion -r 16
+    diffusion -r 24
+    diffusion
+
+should result in this final output::
+
+    sum=0.995170 error=4.044954e-07
+    sum=0.998151 error=1.237837e-07
+    sum=0.998345 error=1.212734e-07
+    sum=0.998346 error=1.212716e-07
+    sum=0.998346 error=1.212716e-07
+
+For speed and quality comparison, the solver solves the same problem using
+uncompressed double-precision row-major arrays when compression parameters
+are omitted.  If one of :code:`-h`, :code:`-f`, :code:`-d` is specified,
+uncompressed tiled arrays are used.  These arrays are based on the |zfp|
+array classes but make use of the :ref:`generic codec <codec>`, which
+stores blocks as uncompressed scalars of the specified type (:code:`half`,
+:code:`float`, or :code:`double`) while utilizing a double-precision block
+cache (like |zfp|'s compressed arrays).
+
+The :program:`diffusionC` program is the same example written entirely
+in C using the |cfp| :ref:`wrappers <cfp>` around the C++ compressed array
+classes.
+
+.. _ex-speed:
+
+Speed Benchmark
+---------------
+
+The :program:`speed` program takes two optional parameters::
+
+    speed [rate] [blocks]
+
+It measures the throughput of compression and decompression of 3D
+double-precision data (in megabytes of uncompressed data per second).
+By default, a rate of 1 bit/value and two million blocks are
+processed.
+
+.. _ex-pgm:
+
+PGM Image Compression
+---------------------
+
+The :program:`pgm` program illustrates how |zfp| can be used to compress
+grayscale images in the
+`pgm format <http://netpbm.sourceforge.net/doc/pgm.html>`_.  The usage is::
+
+    pgm <param> <input.pgm >output.pgm
+
+If :code:`param` is positive, it is interpreted as the rate in bits per pixel,
+which ensures that each block of |4by4| pixels is compressed to a fixed
+number of bits, as in texture compression codecs.
+If :code:`param` is negative, then fixed-precision mode is used with precision
+:code:`-param`, which tends to give higher quality for the same rate.  This
+use of |zfp| is not intended to compete with existing texture and image
+compression formats, but exists merely to demonstrate how to compress 8-bit
+integer data with |zfp|.  See FAQs :ref:`#20 <q-relerr>` and
+:ref:`#21 <q-lossless>` for information on the effects of setting the
+precision.
+
+.. _ex-ppm:
+
+PPM Image Compression
+---------------------
+
+The :program:`ppm` program is analogous to the :program:`pgm` example, but
+has been designed for compressing color images in the
+`ppm format <http://netpbm.sourceforge.net/doc/ppm.html>`_.  Rather than
+compressing RGB channels independently, ppm exploits common strategies for
+color image compression such as color channel decorrelation and chroma
+subsampling.
+
+The usage is essentially the same as for :ref:`pgm <ex-pgm>`::
+
+    ppm <param> <input.ppm >output.ppm
+
+where a positive :code:`param` specifies the rate in bits per pixel; when
+negative, it specifies the precision (number of bit planes to encode) in
+fixed-precision mode.
+
+.. _ex-inplace:
+
+In-place Compression
+--------------------
+
+The :program:`inplace` example shows how one might use zfp to perform in-place
+compression and decompression when memory is at a premium.  Here the
+floating-point array is overwritten with compressed data, which is later
+decompressed back in place.  This example also shows how to make use of
+some of the low-level features of zfp, such as its low-level, block-based
+compression API and bit stream functions that perform seeks on the bit
+stream.  The program takes one optional argument::
+
+    inplace [tolerance]
+
+which specifies the fixed-accuracy absolute tolerance to use during
+compression.  Please see FAQ :ref:`#19 <q-inplace>` for more on the
+limitations of in-place compression.
+
+.. _ex-iterators:
+
+Iterators
+---------
+
+The :program:`iterator` example illustrates how to use |zfp|'s
+compressed-array iterators and pointers for traversing arrays.  For
+instance, it gives an example of sorting a 1D compressed array
+using :cpp:func:`std::sort`.  This example takes no command-line
+options.
+
+The :program:`iteratorC` example illustrates the equivalent |cfp|
+iterator operations.  It closely follows the usage shown in the 
+:program:`iterator` example with some minor differences. It 
+likewise takes no command-line options.
diff --git a/docs/source/execution.rst b/docs/source/execution.rst
new file mode 100644
index 00000000..7a69cf2d
--- /dev/null
+++ b/docs/source/execution.rst
@@ -0,0 +1,310 @@
+.. include:: defs.rst
+
+.. index::
+   single: Parallel execution
+.. _execution:
+
+Parallel Execution
+==================
+
+As of |zfp| |omprelease|, parallel compression (but not decompression) is
+supported on multicore processors via `OpenMP <http://www.openmp.org>`_
+threads.
+|zfp| |cudarelease| adds `CUDA <https://developer.nvidia.com/about-cuda>`_
+support for fixed-rate compression and decompression on the GPU.
+
+Since |zfp| partitions arrays into small independent blocks, a
+large amount of data parallelism is inherent in the compression scheme that
+can be exploited.  In principle, concurrency is limited only by the number
+of blocks that make up an array, though in practice each thread is
+responsible for compressing a *chunk* of several contiguous blocks.
+
+.. note::
+  |zfp| parallel compression is confined to shared memory on a single
+  compute node or GPU.  No effort is made to coordinate compression across
+  distributed memory on networked compute nodes, although |zfp|'s fine-grained
+  partitioning of arrays should facilitate distributed parallel compression.
+
+This section describes the |zfp| parallel compression algorithm and explains
+how to configure |libzfp| and enable parallel compression at run time via
+its :ref:`high-level C API <hl-api>`.
+
+.. note::
+  Parallel compression is not supported via the :ref:`low-level API <ll-api>`,
+  which ignores all execution policy settings and always executes in serial.
+
+.. _exec-policies:
+
+Execution Policies
+------------------
+
+|zfp| supports multiple *execution policies*, which dictate how (e.g.,
+sequentially, in parallel) and where (e.g., on the CPU or GPU) arrays are
+compressed.  Currently three execution policies are available:
+``serial``, ``omp``, and ``cuda``.  The default mode is
+``serial``, which ensures sequential compression on a single thread.
+The ``omp`` and ``cuda`` execution policies allow for data-parallel
+compression on multiple threads.
+
+The execution policy is set by :c:func:`zfp_stream_set_execution` and
+pertains to a particular :c:type:`zfp_stream`.  Hence, each stream
+(and array) may use a policy suitable for that stream.  For instance,
+very small arrays are likely best compressed in serial, while parallel
+compression is best reserved for very large arrays that can take the
+most advantage of concurrent execution.
+
+As outlined in FAQ :ref:`#23 <q-parallel>`, the final compressed stream
+is independent of execution policy.
+
+
+Execution Parameters
+--------------------
+
+Each execution policy allows tailoring the execution via its associated
+*execution parameters*.  Examples include number of threads, chunk size,
+scheduling, etc.  The ``serial`` and ``cuda`` policies have no
+parameters.  The subsections below discuss the ``omp`` parameters.
+
+Whenever the execution policy is changed via
+:c:func:`zfp_stream_set_execution`, its parameters (if any) are initialized
+to their defaults, overwriting any prior setting.
+
+
+OpenMP Thread Count
+^^^^^^^^^^^^^^^^^^^
+
+By default, the number of threads to use is given by the current setting
+of the OpenMP internal control variable *nthreads-var*.  Unless the
+calling thread has explicitly requested a thread count via the OpenMP
+API, this control variable usually defaults to the number of threads
+supported by the hardware (e.g., the number of available cores).
+
+To set the number of requested threads to be used by |zfp|, which may
+differ from the thread count of encapsulating or surrounding OpenMP
+parallel regions, call :c:func:`zfp_stream_set_omp_threads`.
+
+The user is advised to call the |zfp| API functions to modify OpenMP
+behavior rather than make direct OpenMP calls.  For instance, use
+:c:func:`zfp_stream_set_omp_threads` rather than
+:code:`omp_set_num_threads()`.  To indicate that the current OpenMP
+settings should be used, for instance as determined by the global
+OpenMP environment variable :envvar:`OMP_NUM_THREADS`, pass a thread
+count of zero (the default setting) to :c:func:`zfp_stream_set_omp_threads`.
+
+Note that |zfp| does not modify *nthreads-var* or other control variables
+but uses a :code:`num_threads` clause on the OpenMP :code:`#pragma` line.
+Hence, no OpenMP state is changed and any subsequent OpenMP code is not
+impacted by |zfp|'s parallel compression.
+
+
+.. index::
+   single: Chunks
+.. _chunks:
+
+OpenMP Chunk Size
+^^^^^^^^^^^^^^^^^
+
+The *d*-dimensional array is partitioned into *chunks*, with each chunk
+representing a contiguous sequence of :ref:`blocks <algorithm>` of |4powd|
+array elements each.  Chunks represent the unit of parallel work assigned
+to a thread.  By default, the array is partitioned so that each thread
+processes one chunk.  However, the user may override this behavior by
+setting the chunk size (in number of |zfp| blocks) via
+:c:func:`zfp_stream_set_omp_chunk_size`.  See FAQ :ref:`#25 <q-omp-perf>`
+for a discussion of chunk sizes and parallel performance.
+
+
+OpenMP Scheduling
+^^^^^^^^^^^^^^^^^
+
+|zfp| does not specify how to schedule chunk processing.  The schedule
+used is given by the OpenMP *def-sched-var* internal control variable.
+If load balance is poor, it may be improved by using smaller chunks,
+which may or may not impact performance depending on the OpenMP schedule
+in use.  Future versions of |zfp| may allow specifying how threads are
+mapped to chunks, whether to use static or dynamic scheduling, etc.
+
+
+.. _exec-mode:
+
+Fixed- vs. Variable-Rate Compression
+------------------------------------
+
+Following partitioning into chunks, |zfp| assigns each chunk to a thread.
+If there are more chunks than threads supported, chunks are processed in
+unspecified order.
+
+In :ref:`variable-rate mode <modes>`, there is no way to predict the exact
+number of bits that each chunk compresses to.  Therefore, |zfp| allocates
+a temporary memory buffer for each chunk.  Once all chunks have been
+compressed, they are concatenated into a single bit stream in serial,
+after which the temporary buffers are deallocated.
+
+In :ref:`fixed-rate mode <mode-fixed-rate>`, the final location of each
+chunk's bit stream is known ahead of time, and |zfp| may not have to
+allocate temporary buffers.  However, if the chunks are not aligned on
+:ref:`word boundaries <bs-api>`, then race conditions may occur.  In other
+words, for chunk size *C*, rate *R*, and word size *W*, the rate and chunk
+size must be such that *C* |times| |4powd| |times| *R* is a multiple of *W*
+to avoid temporary buffers.  Since *W* is a small power of two no larger
+than 64, this is usually an easy requirement to satisfy.
+
+When chunks are whole multiples of the word size, no temporary buffers
+are allocated and the threads write compressed data directly to the
+target buffer.
+The CUDA implementation uses atomics to avoid race conditions, and therefore
+does not need temporary buffers, regardless of chunk alignment.
+
+
+Using OpenMP
+------------
+
+In order to use OpenMP compression, |zfp| must be built with OpenMP
+support.  If built with CMake, OpenMP support is automatically enabled when
+available.  To manually disable OpenMP support, see the
+:c:macro:`ZFP_WITH_OPENMP` macro.
+
+To avoid compilation errors on systems with spotty OpenMP support
+(e.g., macOS), OpenMP is by default disabled in GNU builds.  To enable
+OpenMP, see :ref:`gnu_builds` and the :c:macro:`ZFP_WITH_OPENMP` macro.
+
+
+Using CUDA
+----------
+
+CUDA support is by default disabled.  Enabling it requires an installation
+of CUDA and a compatible host compiler.  Furthermore, the
+:c:macro:`ZFP_WITH_CUDA` macro must be set and |zfp| must be built with
+CMake.  See :c:macro:`ZFP_WITH_CUDA` for further details.
+
+Device Memory Management
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The CUDA version of |zfp| supports both host and device memory.  If device
+memory is allocated for fields or compressed streams, this is automatically
+detected and handled in a consistent manner.  For example, with compression,
+if host memory pointers are provided for both the field and compressed
+stream, then device memory will transparently be allocated and the uncompressed
+data will be copied to the GPU.  Once compression completes, the compressed
+stream is copied back to the host and device memory is deallocated.  If both
+pointers are device pointers, then no copies are made.  Additionally, any
+combination of mixing host and device pointers is supported.
+
+.. _cuda-limitations:
+
+CUDA Limitations
+^^^^^^^^^^^^^^^^
+
+The CUDA implementation has a number of limitations:
+
+* Only the :ref:`fixed-rate mode <mode-fixed-rate>` mode is supported.
+  Other modes will be supported in a future release.
+* 4D arrays are not supported.
+* :ref:`Headers <header>` are not supported.  Any header already present in
+  the stream will be silently overwritten on compression.
+* |zfp| must be built with a :c:macro:`ZFP_BIT_STREAM_WORD_SIZE` of 64 bits.
+* Although :ref:`strides <field>` are supported, fields must be contiguous
+  when stored in host memory, i.e., with no unused memory addresses between
+  the minimum and maximum address spanned by the field (see
+  :c:func:`zfp_field_is_contiguous`).  This requirement avoids having to copy
+  and allocate more temporary memory than needed to hold the array if it were
+  not strided.  Note that the strides can still be arbitrary as long as they
+  serve only to permute the array elements.  Moreover, this restriction
+  applies only to the CUDA execution policy and the case where the
+  uncompressed field resides on the host.
+
+We expect to address these limitations over time.
+
+
+Setting the Execution Policy
+----------------------------
+
+Enabling parallel compression at run time is often as simple as
+calling :c:func:`zfp_stream_set_execution`
+::
+
+    if (zfp_stream_set_execution(stream, zfp_exec_omp)) {
+      // use OpenMP parallel compression
+      ...
+      zfpsize = zfp_compress(stream, field);
+    }
+
+before calling :c:func:`zfp_compress`.  Replacing :code:`zfp_exec_omp`
+with :code:`zfp_exec_cuda` enables CUDA execution.  If OpenMP or CUDA is
+disabled or not supported, then the return value of functions setting these
+execution policies and parameters will indicate failure.  Execution
+parameters are optional and may be set using the functions discussed above.
+
+The source code for the |zfpcmd| command-line tool includes further examples
+on how to set the execution policy.  To use parallel compression and
+decompression in this tool, see the :option:`-x` command-line option.
+
+.. note::
+  As of |zfp| |cudarelease|, the execution policy refers to both
+  compression and decompression.  The OpenMP implementation does not
+  yet support decompression, and hence :c:func:`zfp_decompress` will
+  fail if the execution policy is not reset to :code:`zfp_exec_serial`
+  before calling the decompressor.  Similarly, the CUDA implementation
+  supports only fixed-rate mode and will fail if other compression modes
+  are specified.
+
+The following table summarizes which execution policies are supported
+with which :ref:`compression modes <modes>`:
+
+  +---------------------------------+---------+---------+---------+
+  | (de)compression mode            | serial  | OpenMP  | CUDA    |
+  +===============+=================+=========+=========+=========+
+  |               | fixed rate      | |check| | |check| | |check| |
+  |               +-----------------+---------+---------+---------+
+  |               | fixed precision | |check| | |check| |         |
+  | compression   +-----------------+---------+---------+---------+
+  |               | fixed accuracy  | |check| | |check| |         |
+  |               +-----------------+---------+---------+---------+
+  |               | reversible      | |check| | |check| |         |
+  +---------------+-----------------+---------+---------+---------+
+  |               | fixed rate      | |check| |         | |check| |
+  |               +-----------------+---------+---------+---------+
+  |               | fixed precision | |check| |         |         |
+  | decompression +-----------------+---------+---------+---------+
+  |               | fixed accuracy  | |check| |         |         |
+  |               +-----------------+---------+---------+---------+
+  |               | reversible      | |check| |         |         |
+  +---------------+-----------------+---------+---------+---------+
+
+:c:func:`zfp_compress` and :c:func:`zfp_decompress` both return zero if the
+current execution policy is not supported for the requested compression
+mode.
+
+
+Parallel Compression
+--------------------
+
+Once the execution policy and parameters have been selected, compression
+is executed by calling :c:func:`zfp_compress` from a single thread.  This
+function in turn inspects the execution policy given by the
+:c:type:`zfp_stream` argument and dispatches the appropriate function
+for executing compression.
+
+
+Parallel Decompression
+----------------------
+
+Parallel decompression is in principle possible using the same strategy
+as used for compression.  However, in |zfp|'s
+:ref:`variable-rate modes <modes>`, the compressed blocks do not occupy
+fixed storage, and therefore the decompressor needs to be instructed
+where each compressed block resides in the bit stream to enable
+parallel decompression.  Because the |zfp| bit stream does not currently
+store such information, variable-rate parallel decompression is not yet
+supported, though plans are to make such functionality available in the
+near future.
+
+The CUDA implementation supports fixed-rate decompression.  OpenMP
+fixed-rate decompression has been implemented and will be released in the
+near future.
+
+Future versions of |zfp| will allow efficient encoding of block sizes and/or
+offsets to allow each thread to quickly locate the blocks it is responsible
+for decompressing, which will allow for variable-rate compression and
+decompression.  Such capabilities are already present in the implementation
+of the |zfp| :ref:`read-only arrays <carray_classes>`.
diff --git a/docs/source/faq.rst b/docs/source/faq.rst
new file mode 100644
index 00000000..b0362993
--- /dev/null
+++ b/docs/source/faq.rst
@@ -0,0 +1,1270 @@
+.. include:: defs.rst
+
+.. cpp:namespace:: zfp
+
+FAQ
+===
+
+The following is a list of answers to frequently asked questions.  For
+questions not answered here or elsewhere in the documentation, please
+`e-mail us <mailto:zfp@llnl.gov>`__.
+
+Questions answered in this FAQ:
+
+  0. :ref:`Do zfp arrays use C or Fortran order? <q-layout>`
+  #. :ref:`Can zfp compress vector fields? <q-vfields>`
+  #. :ref:`Should I declare a 2D array as zfp::array1d a(nx * ny, rate)? <q-array2d>`
+  #. :ref:`How can I initialize a zfp compressed array from disk? <q-read>`
+  #. :ref:`Can I use zfp to represent dense linear algebra matrices? <q-matrix>`
+  #. :ref:`Can zfp compress logically regular but geometrically irregular data? <q-structured>`
+  #. :ref:`Does zfp handle infinities, NaNs,and subnormal floating-point numbers? <q-valid>`
+  #. :ref:`Can zfp handle data with some missing values? <q-missing>`
+  #. :ref:`Can I use zfp to store integer data? <q-integer>`
+  #. :ref:`Can I compress 32-bit integers using zfp? <q-int32>`
+  #. :ref:`Why does zfp corrupt memory if my allocated buffer is too small? <q-overrun>`
+  #. :ref:`Are zfp compressed streams portable across platforms? <q-portability>`
+  #. :ref:`How can I achieve finer rate granularity? <q-granularity>`
+  #. :ref:`Can I generate progressive zfp streams? <q-progressive>`
+  #. :ref:`How do I initialize the decompressor? <q-init>`
+  #. :ref:`Must I use the same parameters during compression and decompression? <q-same>`
+  #. :ref:`Do strides have to match during compression and decompression? <q-strides>`
+  #. :ref:`Why does zfp sometimes not respect my error tolerance? <q-tolerance>`
+  #. :ref:`Why is the actual rate sometimes not what I requested? <q-rate>`
+  #. :ref:`Can zfp perform compression in place? <q-inplace>`
+  #. :ref:`Can zfp bound the point-wise relative error? <q-relerr>`
+  #. :ref:`Does zfp support lossless compression? <q-lossless>`
+  #. :ref:`Why is my actual, measured error so much smaller than the tolerance? <q-abserr>`
+  #. :ref:`Are parallel compressed streams identical to serial streams? <q-parallel>`
+  #. :ref:`Are zfp arrays and other data structures thread-safe? <q-thread-safety>`
+  #. :ref:`Why does parallel compression performance not match my expectations? <q-omp-perf>`
+  #. :ref:`Why are compressed arrays so slow? <q-1d-speed>`
+  #. :ref:`Do compressed arrays use reference counting? <q-ref-count>`
+  #. :ref:`How large a buffer is needed for compressed storage? <q-max-size>`
+  #. :ref:`How can I print array values? <q-printf>`
+  #. :ref:`What is known about zfp compression errors? <q-err-dist>`
+
+-------------------------------------------------------------------------------
+
+.. _q-layout:
+
+Q0: *Do zfp arrays use C or Fortran order?*
+
+*This is such an important question that we added it as question zero to our
+FAQ, but do not let this C'ism fool you.*
+
+A: |zfp| :ref:`compressed-array classes <arrays>` and uncompressed
+:ref:`fields <field>` assume that the leftmost index varies fastest, which
+often is referred to as Fortran order.  By convention, |zfp| uses *x* (or *i*)
+to refer to the leftmost index, then *y* (or *j*), and so on.
+
+.. warning::
+  It is critical that the order of dimensions is specified correctly to
+  achieve good compression and accuracy.  If the order of dimensions is
+  transposed, |zfp| will still compress the data, but with no indication
+  that the order was wrong.  Compression ratio and/or accuracy will likely
+  suffer significantly, however.  Please see
+  :ref:`this section <p-dimensions>` for further discussion.
+
+In C order, the rightmost index varies fastest (e.g., *x* in
+:code:`arr[z][y][x]`), meaning that if we increment the rightmost index we
+move to the next consecutive address in memory.  If an uncompressed array,
+:code:`arr`, is stored in C order, we would for compatibility with |zfp|
+let *x* be the rightmost index in :code:`arr` but the leftmost index in the
+compressed |zfp| array, :code:`zarr`, e.g.,::
+
+  const size_t nx = 5;
+  const size_t ny = 3;
+  const size_t nz = 2;
+  float arr[nz][ny][nx] = { ... };
+  zfp::array3<float> zarr(nx, ny, nz, rate, &a[0][0][0]);
+
+Then :code:`arr[z][y][x]` and :code:`zarr(x, y, z)` refer to the same element,
+as do :code:`(&arr[0][0][0])[sx * x + sy * y + sz * z]` and
+:code:`zarr[sx * x + sy * y + sz * z]`, where
+::
+
+  ptrdiff_t sx = &arr[0][0][1] - &arr[0][0][0]; // sx = 1
+  ptrdiff_t sy = &arr[0][1][0] - &arr[0][0][0]; // sy = nx = 5
+  ptrdiff_t sz = &arr[1][0][0] - &arr[0][0][0]; // sz = nx * ny = 15
+
+Here *sx*, *sy*, and *sz* are the *strides* along the three dimensions,
+with *sx* < *sy* < *sz*.
+
+Of course, C vs. Fortran ordering matters only for multidimensional arrays
+and when the array dimensions (*nx*, *ny*, *nz*) are not all equal.
+
+Note that |zfp| :ref:`fields <field>` also support strides, which can be
+used to represent more general layouts than C and Fortran order, including
+non-contiguous storage, reversed dimensions via negative strides, and
+other advanced layouts.  With the default strides, however, it is correct
+to think of |zfp| as using Fortran order.
+
+For uncompressed data stored in C order, one easily translates to |zfp|
+Fortran order by reversing the order of dimensions or by specifying
+appropriate :ref:`strides <field>`.  We further note that |zfp| provides
+:ref:`nested views <nested_view>` of arrays that support C indexing syntax,
+e.g., :code:`view[z][y][x]` corresponds to :code:`arr(x, y, z)`.
+
+.. note::
+  The |zfp| :ref:`NumPy interface <zfpy>` uses the strides of the NumPy array
+  to infer the correct layout.  Although NumPy arrays use C order by default,
+  |zfp| handles such arrays correctly regardless of their memory layout.  The
+  actual order of dimensions for compressed storage are, however, reversed so
+  that NumPy arrays in C order are traversed sequentially during compression.
+
+Why does |zfp| use Fortran order when C is today a far more common language?
+This choice is somewhat arbitrary yet has strong proponents in either camp,
+similar to the preference between :ref:`little and big endian <q-portability>`
+byte order.  We believe that a single 2D array storing an (*x*, *y*) image is
+most naturally extended to a sequence of *nt* time-varying images by
+*appending* (not prepending) a time dimension *t* as (*x*, *y*, *t*).  This
+is the convention used in mathematics, e.g., we use (*x*, *y*) coordinates in
+2D and (*x*, *y*, *z*) coordinates in 3D.  Using Fortran order, each time
+slice, *t*, is still a 2D contiguous image, while C order
+(:code:`arr[x][y][t]`) would suggest that appending the *t* dimension now
+gives us *nx* 2D arrays indexed by (*y*, *t*), even though without the *t*
+dimension the images would be indexed by (*x*, *y*).
+
+-------------------------------------------------------------------------------
+
+.. _q-vfields:
+
+Q1: *Can zfp compress vector fields?*
+
+I have a 2D vector field
+::
+
+  double velocity[ny][nx][2];
+
+of dimensions *nx* |times| *ny*.  Can I use a 3D |zfp| array to store this as::
+
+  array3d velocity(2, nx, ny, rate);
+
+A: Although this could be done, zfp assumes that consecutive values are
+related.  The two velocity components (*vx*, *vy*) are almost assuredly
+independent and would not be correlated.  This will severely hurt the
+compression rate or quality.  Instead, consider storing *vx* and *vy* as
+two separate 2D scalar arrays::
+
+  array2d vx(nx, ny, rate);
+  array2d vy(nx, ny, rate);
+
+or as
+::
+
+  array2d velocity[2] = {array2d(nx, ny, rate), array2d(nx, ny, rate)};
+
+-------------------------------------------------------------------------------
+
+.. _q-array2d:
+
+Q2: *Should I declare a 2D array as zfp::array1d a(nx * ny, rate)?*
+
+I have a 2D scalar field of dimensions *nx* |times| *ny* that I allocate as
+::
+
+  double* a = new double[nx * ny];
+
+and index as
+::
+
+  a[x + nx * y]
+
+Should I use a corresponding zfp array
+::
+
+  array1d a(nx * ny, rate);
+
+to store my data in compressed form?
+
+A: Although this is certainly possible, if the scalar field exhibits
+coherence in both spatial dimensions, then far better results can be
+achieved by using a 2D array::
+
+  array2d a(nx, ny, rate);
+
+Although both compressed arrays can be indexed as above, the 2D array can
+exploit smoothness in both dimensions and improve the quality dramatically
+for the same rate.
+
+Since |zfp| 0.5.2, proxy pointers are also available that act much like
+the flat :code:`double*`.
+
+-------------------------------------------------------------------------------
+
+.. _q-read:
+
+Q3: *How can I initialize a zfp compressed array from disk?*
+
+I have a large, uncompressed, 3D data set::
+
+  double a[nz][ny][nx];
+
+stored on disk that I would like to read into a compressed array.  This data
+set will not fit in memory uncompressed.  What is the best way of doing this?
+
+A: Using a |zfp| array::
+
+  array3d a(nx, ny, nz, rate);
+
+the most straightforward (but perhaps not best) way is to read one
+floating-point value at a time and copy it into the array::
+
+  for (size_t z = 0; z < nz; z++)
+    for (size_t y = 0; y < ny; y++)
+      for (size_t x = 0; x < nx; x++) {
+        double f;
+        if (fread(&f, sizeof(f), 1, file) == 1)
+          a(x, y, z) = f;
+        else {
+          // handle I/O error
+        }
+      }
+
+Note, however, that if the array cache is not large enough, then this may
+compress blocks before they have been completely filled.  Therefore it is
+recommended that the cache holds at least one complete layer of blocks,
+i.e., (*nx* / 4) |times| (*ny* / 4) blocks in the example above.
+
+To avoid inadvertent evictions of partially initialized blocks, it is better
+to buffer four layers of *nx* |times| *ny* values each at a time, when
+practical, and to completely initialize one block after another, which is
+facilitated using |zfp|'s iterators::
+
+  double* buffer = new double[nx * ny * 4];
+  int zmin = -4;
+  for (zfp::array3d::iterator it = a.begin(); it != a.end(); it++) {
+    int x = it.i();
+    int y = it.j();
+    int z = it.k();
+    if (z > zmin + 3) {
+      // read another layer of blocks
+      if (fread(buffer, sizeof(*buffer), nx * ny * 4, file) != nx * ny * 4) {
+        // handle I/O error
+      }
+      zmin += 4;
+    }
+    a(x, y, z) = buffer[x + nx * (y + ny * (z - zmin))];
+  }
+
+Iterators have been available since |zfp| 0.5.2.
+
+-------------------------------------------------------------------------------
+
+.. _q-matrix:
+
+Q4: *Can I use zfp to represent dense linear algebra matrices?*
+
+A: Yes, but your mileage may vary.  Dense matrices, unlike smooth scalar
+fields, rarely exhibit correlation between adjacent rows and columns.  Thus,
+the quality or compression ratio may suffer.
+
+-------------------------------------------------------------------------------
+
+.. _q-structured:
+
+Q5: *Can zfp compress logically regular but geometrically irregular data?*
+
+My data is logically structured but irregularly sampled, e.g., it is
+rectilinear, curvilinear, or Lagrangian, or uses an irregular spacing of
+quadrature points.  Can I still use zfp to compress it?
+
+A: Yes, as long as the data is (or can be) represented as a logical
+multidimensional array, though your mileage may vary.  |zfp| has been designed
+for uniformly sampled data, and compression will in general suffer the more
+irregular the sampling is.
+
+-------------------------------------------------------------------------------
+
+.. _q-valid:
+
+Q6: *Does zfp handle infinities, NaNs,and subnormal floating-point numbers?*
+
+A: Yes, but only in :ref:`reversible mode <mode-reversible>`.
+
+|zfp|'s lossy compression modes currently support only finite
+floating-point values.  If a block contains a NaN or an infinity, undefined
+behavior is invoked due to the C math function :c:func:`frexp` being
+undefined for non-numbers.  Subnormal numbers are, however, handled correctly.
+
+-------------------------------------------------------------------------------
+
+.. _q-missing:
+
+Q7: *Can zfp handle data with some missing values?*
+
+My data has some missing values that are flagged by very large numbers, e.g.
+1e30.  Is that OK?
+
+A: Although all finite numbers are "correctly" handled, such large sentinel
+values are likely to pollute nearby values, because all values within a block
+are expressed with respect to a common largest exponent.  The presence of
+very large values may result in complete loss of precision of nearby, valid
+numbers.  Currently no solution to this problem is available, but future
+versions of |zfp| will likely support a bit mask to tag values that should be
+excluded from compression.
+
+-------------------------------------------------------------------------------
+
+.. _q-integer:
+
+Q8: *Can I use zfp to store integer data?*
+
+Can I use zfp to store integer data such as 8-bit quantized images or 16-bit
+digital elevation models?
+
+A: Yes (as of version 0.4.0), but the data has to be promoted to 32-bit signed
+integers first.  This should be done one block at a time using an appropriate
+:code:`zfp_promote_*_to_int32` function call (see :ref:`ll-utilities`).  Future
+versions of |zfp| may provide a high-level interface that automatically
+performs promotion and demotion.
+
+Note that the promotion functions shift the low-precision integers into the
+most significant bits of 31-bit (not 32-bit) integers and also convert unsigned
+to signed integers.  Do use these functions rather than simply casting 8-bit
+integers to 32 bits to avoid wasting compressed bits to encode leading zeros.
+Moreover, in fixed-precision mode, set the precision relative to the precision
+of the (unpromoted) source data.
+
+As of version 0.5.1, integer data is supported both by the low-level API and
+high-level calls :c:func:`zfp_compress` and :c:func:`zfp_decompress`.
+
+-------------------------------------------------------------------------------
+
+.. _q-int32:
+
+Q9: *Can I compress 32-bit integers using zfp?*
+
+I have some 32-bit integer data.  Can I compress it using |zfp|'s 32-bit
+integer support?
+
+A: Yes, this can safely be done in :ref:`reversible mode <mode-reversible>`.
+
+In other (lossy) modes, the answer depends.
+|zfp| compression of 32-bit and 64-bit integers requires that each
+integer *f* have magnitude \|\ *f*\ \| < 2\ :sup:`30` and
+\|\ *f*\ \| < 2\ :sup:`62`, respectively.  To handle signed integers that
+span the entire range |minus|\ 2\ :sup:`31` |leq| x < 2\ :sup:`31`, or
+unsigned integers 0 |leq| *x* < 2\ :sup:`32`, the data has to be promoted to
+64 bits first.
+
+As with floating-point data, the integers should ideally represent a
+quantized continuous function rather than, say, categorical data or set of
+indices.  Depending on compression settings and data range, the integers may
+or may not be losslessly compressed.  If fixed-precision mode is used, the
+integers may be stored at less precision than requested.
+See :ref:`Q21 <q-lossless>` for more details on precision and lossless
+compression.
+
+-------------------------------------------------------------------------------
+
+.. _q-overrun:
+
+Q10: *Why does zfp corrupt memory if my allocated buffer is too small?*
+
+Why does |zfp| corrupt memory rather than return an error code if not enough
+memory is allocated for the compressed data?
+
+A: This is for performance reasons.  |zfp| was primarily designed for fast
+random access to fixed-rate compressed arrays, where checking for buffer
+overruns is unnecessary.  Adding a test for every compressed byte output
+would significantly compromise performance.
+
+One way around this problem (when not in fixed-rate mode) is to use the
+:c:data:`maxbits` parameter in conjunction with the maximum precision or
+maximum absolute error parameters to limit the size of compressed blocks.
+Finally, the function :c:func:`zfp_stream_maximum_size` returns a conservative
+buffer size that is guaranteed to be large enough to hold the compressed data
+and the optional header.
+
+-------------------------------------------------------------------------------
+
+.. index::
+   single: Endianness
+.. _q-portability:
+
+Q11: *Are zfp compressed streams portable across platforms?*
+
+Are |zfp| compressed streams portable across platforms?  Are there, for
+example, endianness issues?
+
+A: Yes, |zfp| can write portable compressed streams.  To ensure portability
+across different endian platforms, the bit stream must however be written
+in increments of single bytes on big endian processors (e.g., PowerPC, SPARC),
+which is achieved by compiling |zfp| with an 8-bit (single-byte) word size::
+
+  -DBIT_STREAM_WORD_TYPE=uint8
+
+See :c:macro:`BIT_STREAM_WORD_TYPE`.  Note that on little endian processors
+(e.g., Intel x86-64 and AMD64), the word size does not affect the bit stream
+produced, and thus the default word size may be used.  By default, |zfp| uses
+a word size of 64 bits, which results in the coarsest rate granularity but
+fastest (de)compression.  If cross-platform portability is not needed, then
+the maximum word size is recommended (but see also :ref:`Q12 <q-granularity>`).
+
+When using 8-bit words, |zfp| produces a compressed stream that is byte order
+independent, i.e., the exact same compressed sequence of bytes is generated
+on little and big endian platforms.  When decompressing such streams,
+floating-point and integer values are recovered in the native byte order of
+the machine performing decompression.  The decompressed values can be used
+immediately without the need for byte swapping and without having to worry
+about the byte order of the computer that generated the compressed stream.
+
+Finally, |zfp| assumes that the floating-point format conforms to IEEE 754.
+Issues may arise on architectures that do not support IEEE floating point.
+
+-------------------------------------------------------------------------------
+
+.. _q-granularity:
+
+Q12: *How can I achieve finer rate granularity?*
+
+A: For *d*-dimensional data, |zfp| supports a rate granularity of 1 / |4powd|
+bits, i.e., the rate can be specified in increments of a fraction of a bit.
+Such fine rate selection is always available for sequential compression
+(e.g., when calling :c:func:`zfp_compress`).
+
+Unlike in sequential compression, |zfp|'s
+:ref:`read-write compressed-array classes <array_classes>` require
+random-access writes, which are supported only at the granularity of whole
+words.  By default, a word is 64 bits, which gives a rate granularity of
+64 / |4powd| in *d* dimensions, i.e., 16 bits in 1D, 4 bits in 2D, 1 bit
+in 3D, and 0.25 bits in 4D.
+:ref:`Read-only compressed arrays <carray_classes>` support the same fine
+granularity as sequential compression.
+
+To achieve finer granularity, build |zfp| with a smaller (but as large as
+possible) stream word size, e.g.::
+
+  -DBIT_STREAM_WORD_TYPE=uint8
+
+gives the finest possible granularity, but at the expense of (de)compression
+speed.  See :c:macro:`BIT_STREAM_WORD_TYPE`.
+
+-------------------------------------------------------------------------------
+
+.. _q-progressive:
+
+Q13: *Can I generate progressive zfp streams?*
+
+A: Yes, but it requires some coding effort.  There is currently no high-level
+support for progressive |zfp| streams.  To implement progressive fixed-rate
+streams, the fixed-length bit streams should be interleaved among the blocks
+that make up an array.  For instance, if a 3D array uses 1024 bits per block,
+then those 1024 bits could be broken down into, say, 16 pieces of 64 bits
+each, resulting in 16 discrete quality settings.  By storing the blocks
+interleaved such that the first 64 bits of all blocks are contiguous,
+followed by the next 64 bits of all blocks, etc., one can achieve progressive
+decompression by setting the :c:member:`zfp_stream.maxbits` parameter (see
+:c:func:`zfp_stream_set_params`) to the number of bits per block received so
+far.
+
+To enable interleaving of blocks, |zfp| must first be compiled with::
+
+  -DBIT_STREAM_STRIDED
+
+to enable strided bit stream access.  In the example above, if the stream
+word size is 64 bits and there are *n* blocks, then::
+
+  stream_set_stride(stream, m, n);
+
+implies that after every *m* 64-bit words have been decoded, the bit stream
+is advanced by *m* |times| *n* words to the next set of m 64-bit words
+associated with the block.
+
+-------------------------------------------------------------------------------
+
+.. _q-init:
+
+Q14: *How do I initialize the decompressor?*
+
+A: The :c:type:`zfp_stream` and :c:type:`zfp_field` objects usually need to
+be initialized with the same values as they had during compression (but see
+:ref:`Q15 <q-same>` for exceptions).
+These objects hold the compression mode and parameters, and field data like
+the scalar type and dimensions.  By default, these parameters are not stored
+with the compressed stream (the "codestream") and prior to |zfp| 0.5.0 had to
+be maintained separately by the application.
+
+Since version 0.5.0, functions exist for reading and writing a 12- to 19-byte
+header that encodes compression and field parameters.  For applications that
+wish to embed only the compression parameters, e.g., when the field dimensions
+are already known, there are separate functions that encode and decode this
+information independently.
+
+-------------------------------------------------------------------------------
+
+.. _q-same:
+
+Q15: *Must I use the same parameters during compression and decompression?*
+
+A: Not necessarily.  When decompressing one block at a time, it is possible
+to use more tightly constrained :c:type:`zfp_stream` parameters during
+decompression than were used during compression.  For instance, one may use a
+smaller :c:member:`zfp_stream.maxbits`,
+smaller :c:member:`zfp_stream.maxprec`, or larger :c:member:`zfp_stream.minexp`
+during decompression to process fewer compressed bits than are stored, and to
+decompress the array more quickly at a lower precision.  This may be useful
+in situations where the precision and accuracy requirements are not known a
+priori, thus forcing conservative settings during compression, or when the
+compressed stream is used for multiple purposes.  For instance, visualization
+usually has less stringent precision requirements than quantitative data
+analysis.  This feature of decompressing to a lower precision is particularly
+useful when the stream is stored progressively (see :ref:`Q13 <q-progressive>`).
+
+Note that one may not use less constrained parameters during decompression,
+e.g., one cannot ask for more than :c:member:`zfp_stream.maxprec` bits of
+precision when decompressing.  Furthermore, the parameters must agree between
+compression and decompression when calling the high-level API function
+:c:func:`zfp_decompress`.
+
+Currently float arrays have a different compressed representation from
+compressed double arrays due to differences in exponent width.  It is not
+possible to compress a double array and then decompress (demote) the result
+to floats, for instance.  Future versions of the |zfp| codec may use a unified
+representation that does allow this.
+
+-------------------------------------------------------------------------------
+
+.. _q-strides:
+
+Q16: *Do strides have to match during compression and decompression?*
+
+A: No.  For instance, a 2D vector field::
+
+  float in[ny][nx][2];
+
+could be compressed as two scalar fields with strides *sx* = 2,
+*sy* = 2 |times| *nx*, and with pointers :code:`&in[0][0][0]` and
+:code:`&in[0][0][1]` to the first value of each scalar field.  These two
+scalar fields can later be decompressed as non-interleaved fields::
+
+  float out[2][ny][nx];
+
+using strides *sx* = 1, *sy* = *nx* and pointers :code:`&out[0][0][0]`
+and :code:`&out[1][0][0]`.
+
+-------------------------------------------------------------------------------
+
+.. _q-tolerance:
+
+Q17: *Why does zfp sometimes not respect my error tolerance?*
+
+A: First, |zfp| does not support
+:ref:`fixed-accuracy mode <mode-fixed-accuracy>` for integer data and
+will ignore any tolerance requested via :c:func:`zfp_stream_set_accuracy`
+or associated :ref:`expert mode <mode-expert>` parameter settings.  So this
+FAQ pertains to floating-point data only.
+
+The short answer is that, given finite precision, the |zfp| and IEEE
+floating-point number systems represent distinct subsets of the reals
+(or, in case of |zfp|, blocks of reals).  Although these subsets have
+significant overlap, they are not equal.  Consequently, there are some
+combinations of floating-point values that |zfp| cannot represent exactly;
+conversely, there are some |zfp| blocks that cannot be represented exactly
+as IEEE floating point.  If the user-specified tolerance is smaller than the
+difference between the IEEE floating-point representation to be compressed
+and its closest |zfp| representation, then the tolerance necessarily will
+be violated (except in :ref:`reversible mode <mode-reversible>`).  In
+practice, absolute tolerances have to be extremely small relative to the
+numbers being compressed for this issue to occur, however.
+
+Note that this issue is not particular to |zfp| but occurs in the conversion
+between any two number systems of equal precision; we may just as well fault
+IEEE floating point for not being able to represent all |zfp| blocks
+accurately enough!  By analogy, not all 32-bit integers can be represented
+exactly in 32-bit floating point.  The integer 123456789 is one example; the
+closest float is 123456792.  And, obviously, not all floats (e.g., 0.5) can
+be represented exactly as integers.
+
+To further demonstrate this point, let us consider a concrete example.  |zfp|
+does not store each floating-point scalar value independently but represents
+a group of values (4, 16, 64, or 256 values, depending on dimensionality) as
+linear combinations like averages by evaluating arithmetic expressions.
+Just like in uncompressed IEEE floating-point arithmetic, both representation
+error and roundoff error in the least significant bit(s) often occur.
+
+To illustrate this, consider compressing the following 1D array of four
+floats
+::
+
+  float f[4] = { 1, 1e-1, 1e-2, 1e-3 };
+
+using the |zfp| command-line tool::
+
+  zfp -f -1 4 -a 0 -i input.dat -o output.dat
+
+In spite of an error tolerance of zero, the reconstructed values are::
+
+  float g[4] = { 1, 1e-1, 9.999998e-03, 9.999946e-04 };
+
+with a (computed) maximum error of 5.472e-9.  Because f[3] = 1e-3 can only
+be approximately represented in radix-2 floating-point, the actual error
+is even smaller: 5.424e-9.  This reconstruction error is primarily due to
+|zfp|'s block-floating-point representation, which expresses the four values
+in a block relative to a single, common binary exponent.  Such exponent
+alignment occurs also in regular IEEE floating-point operations like addition.
+For instance,
+::
+
+  float x = (f[0] + f[3]) - 1;
+
+should of course result in :code:`x = f[3] = 1e-3`, but due to exponent
+alignment a few of the least significant bits of f[3] are lost in the
+rounded result of the addition, giving :code:`x = 1.0000467e-3` and a
+roundoff error of 4.668e-8.  Similarly,
+::
+
+  float sum = f[0] + f[1] + f[2] + f[3];
+
+should return :code:`sum = 1.111`, but is computed as 1.1110000610.  Moreover,
+the value 1.111 cannot even be represented exactly in (radix-2) floating-point;
+the closest float is 1.1109999.  Thus the computed error
+::
+
+  float error = sum - 1.111f;
+
+which itself has some roundoff error, is 1.192e-7.
+
+*Phew*!  Note how the error introduced by |zfp| (5.472e-9) is in fact one to
+two orders of magnitude smaller than the roundoff errors (4.668e-8 and
+1.192e-7) introduced by IEEE floating point in these computations.  This lower
+error is in part due to |zfp|'s use of 30-bit significands compared to IEEE's
+24-bit single-precision significands.  Note that data sets with a large dynamic
+range, e.g., where adjacent values differ a lot in magnitude, are more
+susceptible to representation errors.
+
+The moral of the story is that error tolerances smaller than machine epsilon
+(relative to the data range) cannot always be satisfied by |zfp|.  Nor are such
+tolerances necessarily meaningful for representing floating-point data that
+originated in floating-point arithmetic expressions, since accumulated
+roundoff errors are likely to swamp compression errors.  Because such
+roundoff errors occur frequently in floating-point arithmetic, insisting on
+lossless compression on the grounds of accuracy is tenuous at best.
+
+-------------------------------------------------------------------------------
+
+.. _q-rate:
+
+Q18: *Why is the actual rate sometimes not what I requested?*
+
+A: In principle, |zfp| allows specifying the size of a compressed block in
+increments of single bits, thus allowing very fine-grained tuning of the
+bit rate.  There are, however, cases when the desired rate does not exactly
+agree with the effective rate, and users are encouraged to check the return
+value of :c:func:`zfp_stream_set_rate`, which gives the actual rate.
+
+There are several reasons why the requested rate may not be honored.  First,
+the rate is specified in bits/value, while |zfp| always represents a block
+of |4powd| values in *d* dimensions, i.e., using
+*N* = |4powd| |times| *rate* bits.  *N* must be an integer number of bits,
+which constrains the actual rate to be a multiple of 1 / |4powd|.  The actual
+rate is computed by rounding |4powd| times the desired rate.
+
+Second, if the array dimensions are not multiples of four, then |zfp| pads the
+dimensions to the next higher multiple of four.  Thus, the total number of
+bits for a 2D array of dimensions *nx* |times| *ny* is computed in terms of
+the number of blocks *bx* |times| *by*::
+
+  bitsize = (4 * bx) * (4 * by) * rate
+
+where *nx* |leq| 4 |times| bx < *nx* + 4 and
+*ny* |leq| 4 |times| *by* < *ny* + 4.  When amortizing bitsize over the
+*nx* |times| *ny* values, a slightly higher rate than requested may result.
+
+Third, to support updating compressed blocks, as is needed by |zfp|'s
+compressed array classes, the user may request write random access to the
+fixed-rate stream.  To support this, each block must be aligned on a stream
+word boundary (see :ref:`Q12 <q-granularity>`), and therefore the rate when
+write random access is requested must be a multiple of *wordsize* / |4powd|
+bits.  By default *wordsize* = 64 bits.  Even when write random access is
+not requested, the compressed stream is written in units of *wordsize*.
+Hence, once the stream is flushed, either by a :c:func:`zfp_compress` or
+:c:func:`zfp_stream_flush` call, to output any buffered bits, its size
+will be a multiple of *wordsize* bits.
+
+Fourth, for floating-point data, each block must hold at least the common
+exponent and one additional bit, which places a lower bound on the rate.
+
+Finally, the user may optionally include a header with each array.  Although
+the header is small, it must be accounted for in the rate.  The function
+:c:func:`zfp_stream_maximum_size` conservatively includes space for a header,
+for instance.
+
+Aside from these caveats, |zfp| is guaranteed to meet the exact rate specified.
+
+-------------------------------------------------------------------------------
+
+.. _q-inplace:
+
+Q19: *Can zfp perform compression in place?*
+
+A: Because the compressed data tends to be far smaller than the uncompressed
+data, it is natural to ask if the compressed stream can overwrite the
+uncompressed array to avoid having to allocate separate storage for the
+compressed stream.  |zfp| does allow for the possibility of such in-place
+compression, but with several caveats and restrictions:
+
+  1. A bitstream must be created whose buffer points to the beginning of
+     uncompressed (and to be compressed) storage.
+
+  2. The array must be compressed using |zfp|'s low-level API.  In particular,
+     the data must already be partitioned and organized into contiguous blocks
+     so that all values of a block can be pulled out once and then replaced
+     with the corresponding shorter compressed representation.
+
+  3. No one compressed block can occupy more space than its corresponding
+     uncompressed block so that the not-yet compressed data is not overwritten.
+     This is usually easily accomplished in fixed-rate mode, although the
+     expert interface also allows guarding against this in all modes using the
+     :c:member:`zfp_stream.maxbits` parameter.  This parameter should be set to
+     :code:`maxbits = 4^d * sizeof(type) * 8`, where *d* is the array
+     dimensionality (1, 2, 3, or 4) and where *type* is the scalar type of the
+     uncompressed data.
+
+  4. No header information may be stored in the compressed stream.
+
+In-place decompression can also be achieved, but in addition to the above
+constraints requires even more care:
+
+  1. The data must be decompressed in reverse block order, so that the last
+     block is decompressed first to the end of the block array.  This requires
+     the user to maintain a pointer to uncompressed storage and to seek via
+     :c:func:`stream_rseek` to the proper location in the compressed stream
+     where the block is stored.
+
+  2. The space allocated to the compressed stream must be large enough to
+     also hold the uncompressed data.
+
+An :ref:`example <ex-inplace>` is provided that shows how in-place compression
+can be done.
+
+-------------------------------------------------------------------------------
+
+.. _q-relerr:
+
+Q20: *Can zfp bound the point-wise relative error?*
+
+A: Yes, but with some caveats.  First, we define the relative error in a value
+*f* approximated by *g* as \|\ *f* - *g*\ \| / \|\ *f*\ \|, which converges to
+\|\ log(*f* / *g*)\ \| = \|\ log(*f*) - \ log(*g*)\| as *g* approaches *f*,
+where log(*f*) denotes the natural logarithm of *f*.
+Below, we discuss three strategies for relative error control that may be
+applicable depending on the properties of the underlying floating-point data.
+
+If all floating-point values to be compressed are normalized, i.e., with no
+nonzero subnormal values smaller in magnitude than
+2\ :sup:`-126` |approx| 10\ :sup:`-38` (for floats) or
+2\ :sup:`-1022` |approx| 10\ :sup:`-308` (for doubles), then the relative error
+can be bounded using |zfp|'s :ref:`expert mode <mode-expert>` settings by
+invoking :ref:`reversible mode <mode-reversible>`.  This is achieved by
+truncating (zeroing) some number of least significant bits of all
+floating-point values and then losslessly compressing the result.  The
+*q* least significant bits of *n*-bit floating-point numbers (*n* = 32
+for floats and *n* = 64 for doubles) are truncated by |zfp| by specifying a
+maximum precision of *p* = *n* |minus| *q*.  The resulting point-wise relative
+error is then at most 2\ :sup:`q - 23` (for floats) or 2\ :sup:`q - 52`
+(for doubles).
+
+.. note::
+  For large enough *q*, floating-point exponent bits will be discarded,
+  in which case the bound no longer holds, but then the relative error
+  is already above 100%.  Also, as mentioned, the bound does not hold
+  for subnormals; however, such values are likely too small for relative
+  errors to be meaningful.
+
+To bound the relative error, set the expert mode parameters to::
+
+  minbits = 0
+  maxbits = 0
+  maxprec = p
+  minexp = ZFP_MIN_EXP - 1 = -1075
+
+For example, using the |zfpcmd| command-line tool, set the parameters using
+:option:`-c` :code:`0 0 p -1075`.
+
+Note that while the above approach respects the error bound when the
+above conditions are met, it uses |zfp| for a purpose it was not designed
+for, and the compression ratio may not be competitive with those obtained
+using compressors designed to bound the relative error.
+
+Other forms of relative error control can be achieved using |zfp|'s lossy
+compression modes.  In :ref:`fixed-accuracy mode <mode-fixed-accuracy>`,
+the *absolute error* \|\ *f* - *g*\ \| is bounded by a user-specified error
+tolerance.  For a field whose values are all positive (or all negative), we
+may pre-transform values by taking the natural logarithm, replacing
+each value *f* with log(*f*) before compression, and then exponentiating
+values after decompression.  This ensures that
+\|\ log(*f*) - log(*g*)\ \| = \|\ log(*f* / *g*)\ \| is bounded.  (Note,
+however, that many implementations of the math library make no guarantees
+on the accuracy of the logarithm function.)  For fields whose values are
+signed, an approximate bound can be achieved by using
+log(*f*) |approx| asinh(*f* / 2), where asinh is the inverse of the
+hyperbolic sine function, which is defined for both positive and negative
+numbers.  One benefit of this approach is that it de-emphasizes the
+importance of relative errors for small values that straddle zero, where
+relative errors rarely make sense, e.g., because of round-off and other
+errors already present in the data.
+
+Finally, in :ref:`fixed-precision mode <mode-fixed-precision>`, the
+precision of |zfp| transform coefficients is fixed, resulting in an error
+that is no more than a constant factor of the largest (in magnitude)
+value, *fmax*, within the same |zfp| block.  This can be thought of as a
+weaker version of relative error, where the error is measured relative
+to values in a local neighborhood.
+
+In fixed-precision mode, |zfp| cannot bound the point-wise relative error
+due to its use of a block-floating-point representation, in which all
+values within a block are represented in relation to a single common
+exponent.  For a high enough dynamic range within a block, there may
+simply not be enough precision available to guard against loss.  For
+instance, a block containing the values 2\ :sup:`0` = 1 and 2\ :sup:`-n`
+would require a precision of *n* + 3 bits to represent losslessly, and
+|zfp| uses at most 64-bit integers to represent values.  Thus, if
+*n* |geq| 62, then 2\ :sup:`-n` is replaced with 0, which is a 100%
+relative error.  Note that such loss also occurs when, for instance,
+2\ :sup:`0` and 2\ :sup:`-n` are added using floating-point arithmetic
+(see also :ref:`Q17 <q-tolerance>`).
+
+As alluded to, it is possible to bound the error relative to the largest
+value, *fmax*, within a block, which if the magnitude of values does not
+change too rapidly may serve as a reasonable proxy for point-wise relative
+errors.
+
+One might then ask if using |zfp|'s fixed-precision mode with *p* bits of
+precision ensures that the block-wise relative error is at most
+2\ :sup:`-p` |times| *fmax*.  This is, unfortunately, not the case, because
+the requested precision, *p*, is ensured only for the transform coefficients.
+During the inverse transform of these quantized coefficients the quantization
+error may amplify.  That being said, it is possible to derive a bound on the
+error in terms of *p* that would allow choosing an appropriate precision.
+Such a bound is derived below.
+
+Let
+::
+
+  emax = floor(log2(fmax))
+
+be the largest base-2 exponent within a block.  For transform coefficient
+precision, *p*, one can show that the maximum absolute error, *err*, is
+bounded by::
+
+  err <= k(d) * (2^emax / 2^p) <= k(d) * (fmax / 2^p)
+
+Here *k*\ (*d*) is a constant that depends on the data dimensionality *d*::
+
+  k(d) = 20 * (15/4)^(d-1)
+
+so that in 1D, 2D, 3D, and 4D we have::
+
+  k(1) = 20
+  k(2) = 125
+  k(3) = 1125/4
+  k(4) = 16876/16
+
+Thus, to guarantee *n* bits of accuracy in the decompressed data, we need
+to choose a higher precision, *p*, for the transform coefficients::
+
+  p(n, d) = n + ceil(log2(k(d))) = n + 2 * d + 3
+
+so that
+::
+
+  p(n, 1) = n + 5
+  p(n, 2) = n + 7
+  p(n, 3) = n + 9
+  p(n, 4) = n + 11
+
+This *p* value should be used in the call to
+:c:func:`zfp_stream_set_precision`.
+
+Note, again, that some values in the block may have leading zeros when
+expressed relative to 2\ :sup:`emax`, and these leading zeros are counted
+toward the *n*-bit precision.  Using decimal to illustrate this, suppose
+we used 4-digit precision for a 1D block containing these four values::
+
+  -1.41421e+1 ~ -1.414e+1 = -1414 * (10^1 / 1000)
+  +2.71828e-1 ~ +0.027e+1 =   +27 * (10^1 / 1000)
+  +3.14159e-6 ~ +0.000e+1 =     0 * (10^1 / 1000)
+  +1.00000e+0 ~ +0.100e+1 =  +100 * (10^1 / 1000)
+
+with the values in the middle column aligned to the common base-10 exponent
++1, and with the values on the right expressed as scaled integers.  These
+are all represented using four digits of precision, but some of those digits
+are leading zeros.
+
+-------------------------------------------------------------------------------
+
+.. _q-lossless:
+
+Q21: *Does zfp support lossless compression?*
+
+A: Yes.  As of |zfp| |revrelease|, bit-for-bit lossless compression is
+supported via the :ref:`reversible compression mode <mode-reversible>`.
+This mode supports both integer and floating-point data.
+
+In addition, it is sometimes possible to ensure lossless compression using
+|zfp|'s fixed-precision and fixed-accuracy modes.  For integer data, |zfp|
+can with few exceptions ensure lossless compression in
+:ref:`fixed-precision mode <mode-fixed-precision>`.
+For a given *n*-bit integer type (*n* = 32 or *n* = 64), consider compressing
+*p*-bit signed integer data, with the sign bit counting toward the precision.
+In other words, there are exactly 2\ :sup:`p` possible signed integers.  If
+the integers are unsigned, then subtract 2\ :sup:`p-1` first so that they
+range from |minus|\ 2\ :sup:`p-1` to 2\ :sup:`p-1` - 1.
+
+Lossless integer compression in fixed-precision mode is achieved by first
+promoting the *p*-bit integers to *n* - 1 bits (see :ref:`Q8 <q-integer>`)
+such that all integer values fall in
+[|minus|\ 2\ :sup:`30`, +2\ :sup:`30`), when *n* = 32, or in
+[|minus|\ 2\ :sup:`62`, +2\ :sup:`62`), when *n* = 64.  In other words, the
+*p*-bit integers first need to be shifted left by *n* - *p* - 1 bits.  After
+promotion, the data should be compressed in zfp's fixed-precision mode using::
+
+  q = p + 4 * d + 1
+
+bits of precision to ensure no loss, where *d* is the data dimensionality
+(1 |leq| d |leq| 4).  Consequently, the *p*-bit data can be losslessly
+compressed as long as *p* |leq| *n* - 4 |times| *d* - 1.  The table below
+lists the maximum precision *p* that can be losslessly compressed using 32-
+and 64-bit integer types.
+
+  = ==== ====
+  d n=32 n=64
+  = ==== ====
+  1 27   59
+  2 23   55
+  3 19   51
+  4 15   47
+  = ==== ====
+
+Although lossless compression is possible as long as the precision constraint
+is met, the precision needed to guarantee no loss is generally much higher
+than the precision intrinsic in the uncompressed data.  Therefore, we
+recommend using the :ref:`reversible mode <mode-reversible>` when lossless
+compression is desired.
+
+The minimum precision, *q*, given above is often larger than what
+is necessary in practice.  There are worst-case inputs that do require such
+large *q* values, but they are quite rare.
+
+The reason for expanded precision, i.e., why *q* > *p*, is that |zfp|'s
+decorrelating transform computes averages of integers, and this transform is
+applied *d* times in *d* dimensions.  Each average of two *p*-bit numbers
+requires *p* + 1 bits to avoid loss, and each transform can be thought of
+involving up to four such averaging operations.
+
+For floating-point data, fully lossless compression with |zfp| usually
+requires :ref:`reversible mode <mode-reversible>`, as the other compression
+modes are unlikely to guarantee bit-for-bit exact reconstructions.  However,
+if the dynamic range is low or varies slowly such that values
+within a |4powd| block have the same or similar exponent, then the
+precision gained by discarding the 8 or 11 bits of the common floating-point
+exponents can offset the precision lost in the decorrelating transform.  For
+instance, if all values in a block have the same exponent, then lossless
+compression is obtained using
+*q* = 26 + 4 |times| *d* |leq| 32 bits of precision for single-precision data
+and *q* = 55 + 4 |times| *d* |leq| 64 bits of precision for double-precision
+data.  Of course, the constraint imposed by the available integer precision
+*n* implies that lossless compression of such data is possible only in 1D for
+single-precision data and only in 1D and 2D for double-precision data.
+Finally, to preserve special values such as negative zero, plus and minus
+infinity, and NaNs, reversible mode is needed.
+
+-------------------------------------------------------------------------------
+
+.. _q-abserr:
+
+Q22: *Why is my actual, measured error so much smaller than the tolerance?*
+
+A: For two reasons.  The way |zfp| bounds the absolute error in
+:ref:`fixed-accuracy mode <mode-fixed-accuracy>` is by keeping all transform
+coefficient bits whose place value exceeds the tolerance while discarding the
+less significant bits.  Each such bit has a place value that is a power of
+two, and therefore the tolerance must first be rounded down to the next
+smaller power of two, which itself will introduce some slack.  This possibly
+lower, effective tolerance is returned by the
+:c:func:`zfp_stream_set_accuracy` call.
+
+Second, the quantized coefficients are then put through an inverse transform.
+This linear transform will combine signed quantization errors that, in the
+worst case, may cause them to add up and increase the error, even though the
+average (RMS) error remains the same, i.e., some errors cancel while others
+compound.  For *d*-dimensional data, *d* such inverse transforms are applied,
+with the possibility of errors cascading across transforms.  To account for
+the worst possible case, zfp has to conservatively lower its internal error
+tolerance further, once for each of the *d* transform passes.
+
+Unless the data is highly oscillatory or noisy, the error is not likely to
+be magnified much, leaving an observed error in the decompressed data that
+is much lower than the prescribed tolerance.  In practice, the observed
+maximum error tends to be about 4-8 times lower than the error tolerance
+for 3D data, while the difference is smaller for 2D and 1D data.
+
+We recommend experimenting with tolerances and evaluating what error levels
+are appropriate for each application, e.g., by starting with a low,
+conservative tolerance and successively doubling it.  The distribution of
+errors produced by |zfp| is approximately Gaussian (see
+:ref:`Q30 <q-err-dist>`), so even if the maximum error may seem large at
+an individual grid point, most errors tend to be much smaller and tightly
+clustered around zero.
+
+-------------------------------------------------------------------------------
+
+.. _q-parallel:
+
+Q23: *Are parallel compressed streams identical to serial streams?*
+
+A: Yes, it matters not what execution policy is used; the final compressed
+stream produced by :c:func:`zfp_compress` depends only on the uncompressed
+data and compression settings.
+
+To support future parallel decompression, in particular variable-rate
+streams, it will be necessary to also store an index of where (at what
+bit offset) each compressed block is stored in the stream.  Extensions to the
+current |zfp| format are being considered to support parallel decompression.
+
+Regardless, the execution policy and parameters such as number of threads
+do not need to be the same for compression and decompression.
+
+-------------------------------------------------------------------------------
+
+.. _q-thread-safety:
+
+Q24: *Are zfp's compressed arrays and other data structures thread-safe?*
+
+A: Yes, compressed arrays can be made thread-safe; no, data structures
+like :c:type:`zfp_stream` and :c:type:`bitstream` are not necessarily
+thread-safe.  As of |zfp| |viewsrelease|, thread-safe read and write access
+to compressed arrays via OpenMP threads is provided through the use of
+:ref:`private views <private_immutable_view>`, although these come with
+certain restrictions and requirements such as the need for the user to
+enforce cache coherence.  Please see the documentation on
+:ref:`views <views>` for further details.
+
+As far as C objects, |zfp|'s parallel OpenMP compressor assigns one
+:c:type:`zfp_stream` per thread, each of which uses its own private
+:c:type:`bitstream`.  Users who wish to make parallel calls to |zfp|'s
+:ref:`low-level functions <ll-api>` are advised to consult the source
+files :file:`ompcompress.c` and :file:`parallel.c`.
+
+Finally, the |zfp| API is thread-safe as long as multiple threads do not
+simultaneously call API functions and pass the same :c:type:`zfp_stream`
+or :c:type:`bitstream` object.
+
+-------------------------------------------------------------------------------
+
+.. _q-omp-perf:
+
+Q25: *Why does parallel compression performance not match my expectations?*
+
+A: |zfp| partitions arrays into chunks and assigns each chunk to an OpenMP
+thread.  A chunk is a sequence of consecutive *d*-dimensional blocks, each
+composed of |4powd| values.  If there are fewer chunks than threads, then
+full processor utilization will not be achieved.
+
+The number of chunks is by default set to the number of threads, but can
+be modified by the user via :c:func:`zfp_stream_set_omp_chunk_size`.
+One reason for using more chunks than threads is to provide for better
+load balance.  If compression ratios vary significantly across the array,
+then threads that process easy-to-compress blocks may finish well ahead
+of threads in charge of difficult-to-compress blocks.  By breaking chunks
+into smaller units, OpenMP is given the opportunity to balance the load
+better (though the effect of using smaller chunks depends on OpenMP
+thread scheduling).  If chunks are too small, however, then the overhead
+of allocating and initializing chunks and assigning threads to them may
+dominate.  Experimentation with chunk size may improve performance, though
+chunks ought to be at least several hundred blocks each.
+
+In variable-rate mode, compressed chunk sizes are not known ahead of time.
+Therefore the compressed chunks must be concatenated into a single stream
+following compression.  This task is performed sequentially on a single
+thread, and will inevitably limit parallel efficiency.
+
+Other reasons for poor parallel performance include compressing arrays
+that are too small to offset the overhead of thread creation and
+synchronization.  Arrays should ideally consist of thousands of blocks
+to offset the overhead of setting up parallel compression.
+
+-------------------------------------------------------------------------------
+
+.. _q-1d-speed:
+
+Q26: *Why are compressed arrays so slow?*
+
+A: This is likely due to the use of a very small cache.  Prior to |zfp|
+|csizerelease|, all arrays used two 'layers' of blocks as default cache
+size, which is reasonable for 2D and higher-dimensional arrays (as long
+as they are not too 'skinny').  In 1D, however, this implies that the
+cache holds only two blocks, which is likely to cause excessive thrashing.
+
+As of version |csizerelease|, the default cache size is roughly proportional
+to the square root of the total number of array elements, regardless of
+array dimensionality.  While this tends to reduce thrashing, we suggest
+experimenting with larger cache sizes of at least a few kilobytes to ensure
+acceptable performance.
+
+Note that compressed arrays constructed with the
+:ref:`default constructor <array_ctor_default>` will
+have an initial cache size of only one block.  Therefore, users should call
+:cpp:func:`array::set_cache_size` after :ref:`resizing <array_resize>`
+such arrays to ensure a large enough cache.
+
+Depending on factors such as rate, cache size, array access pattern,
+array access primitive (e.g., indices vs. iterators), and arithmetic
+intensity, we usually observe an application slow-down of 1-10x when
+switching from uncompressed to compressed arrays.
+
+-------------------------------------------------------------------------------
+
+.. _q-ref-count:
+
+Q27: *Do compressed arrays use reference counting?*
+
+A: It is possible to reference compressed-array elements via proxy
+:ref:`references <references>` and :ref:`pointers <pointers>`, through
+:ref:`iterators <iterators>`, and through :ref:`views <views>`.  Such
+indirect references are valid only during the lifetime of the underlying
+array.  No reference counting and garbage collection is used to keep the
+array alive if there are external references to it.  Such references
+become invalid once the array is destructed, and dereferencing them will
+likely lead to segmentation faults.
+
+-------------------------------------------------------------------------------
+
+.. _q-max-size:
+
+Q28: *How large a buffer is needed for compressed storage?*
+
+A: :c:func:`zfp_compress` requires that memory has already been allocated to
+hold the compressed data.  But often the compressed size is data dependent
+and not known a priori.  The function :c:func:`zfp_stream_maximum_size`
+returns a buffer size that is guaranteed to be large enough.  This function,
+which should be called *after* setting the desired compression mode and
+parameters, computes the largest possible compressed data size based on the
+current compression settings and array size.  Note that by the pigeonhole
+principle, any (lossless) compressor must expand at least one input, so this
+buffer size may be larger than the size of the uncompressed input data.
+:c:func:`zfp_compress` returns the actual number of bytes of compressed
+storage.
+
+When compressing individual blocks using the :ref:`low-level API <ll-api>`,
+it is useful to know the maximum number of bits that a compressed block
+can occupy.  In addition to the :c:macro:`ZFP_MAX_BITS` macro, the following
+table lists the maximum block size (in bits) for each scalar type, whether
+:ref:`reversible mode <mode-reversible>` is used, and block dimensionality.
+
+  +--------+---------+-------+-------+-------+-------+
+  | type   | rev.    |   1D  |   2D  |   3D  |   4D  |
+  +========+=========+=======+=======+=======+=======+
+  |        |         |   131 |   527 |  2111 |  8447 |
+  | int32  +---------+-------+-------+-------+-------+
+  |        | |check| |   136 |   532 |  2116 |  8452 |
+  +--------+---------+-------+-------+-------+-------+
+  |        |         |   140 |   536 |  2120 |  8456 |
+  | float  +---------+-------+-------+-------+-------+
+  |        | |check| |   146 |   542 |  2126 |  8462 |
+  +--------+---------+-------+-------+-------+-------+
+  |        |         |   259 |  1039 |  4159 | 16639 |
+  | int64  +---------+-------+-------+-------+-------+
+  |        | |check| |   265 |  1045 |  4165 | 16645 |
+  +--------+---------+-------+-------+-------+-------+
+  |        |         |   271 |  1051 |  4171 | 16651 |
+  | double +---------+-------+-------+-------+-------+
+  |        | |check| |   278 |  1058 |  4178 | 16658 |
+  +--------+---------+-------+-------+-------+-------+
+
+-------------------------------------------------------------------------------
+
+.. _q-printf:
+
+Q29: *How can I print array values?*
+
+Consider the following seemingly reasonable piece of code::
+
+  #include <cstdio>
+  #include "zfp/array1.hpp"
+
+  int main()
+  {
+    zfp::array1<double> a(100, 16.0);
+    printf("%f\n", a[0]); // does not compile
+    return 0;
+  }
+
+The compiler will complain about :code:`a[0]` being a non-POD object.  This
+is because :code:`a[0]` is a :ref:`proxy reference <references>` object
+rather than a :code:`double`.  To make this work, :code:`a[0]` must be
+explicitly converted to :code:`double`, e.g., using a cast::
+
+    printf("%f\n", (double)a[0]);
+
+For similar reasons, one may not use :code:`scanf` to initialize the value
+of :code:`a[0]` because :code:`&a[0]` is a :ref:`proxy pointer <pointers>`
+object, not a :code:`double*`.  Rather, one must use a temporary variable,
+e.g.
+::
+
+  double t;
+  scanf("%lf", &t);
+  a[0] = t;
+
+Note that using :code:`iostream`, expressions like
+::
+
+  std::cout << a[0] << std::endl;
+
+do work, but
+::
+
+  std::cin >> a[0];
+
+does not.
+
+-------------------------------------------------------------------------------
+
+.. _q-err-dist:
+
+Q30: *What is known about zfp compression errors?*
+
+A: Significant effort has been spent on characterizing compression errors
+resulting from |zfp|, as detailed in the following publications:
+
+* P. Lindstrom,
+  "`Error Distributions of Lossy Floating-Point Compressors <https://www.osti.gov/servlets/purl/1526183>`__,"
+  JSM 2017 Proceedings.
+* J. Diffenderfer, A. Fox, J. Hittinger, G. Sanders, P. Lindstrom,
+  "`Error Analysis of ZFP Compression for Floating-Point Data <http://doi.org/10.1137/18M1168832>`__,"
+  SIAM Journal on Scientific Computing, 2019.
+* D. Hammerling, A. Baker, A. Pinard, P. Lindstrom,
+  "`A Collaborative Effort to Improve Lossy Compression Methods for Climate Data <http://doi.org/10.1109/DRBSD-549595.2019.00008>`__,"
+  5th International Workshop on Data Analysis and Reduction for Big Scientific Data, 2019.
+* A. Fox, J. Diffenderfer, J. Hittinger, G. Sanders, P. Lindstrom.
+  "`Stability Analysis of Inline ZFP Compression for Floating-Point Data in Iterative Methods <http://doi.org/10.1137/19M126904X>`__,"
+  SIAM Journal on Scientific Computing, 2020.
+
+In short, |zfp| compression errors are roughly normally distributed as a
+consequence of the central limit theorem, and can be bounded.  Because the
+error distribution is normal and because the worst-case error is often much
+larger than errors observed in practice, it is common that measured errors
+are far smaller than the absolute error tolerance specified in
+:ref:`fixed-accuracy mode <mode-fixed-accuracy>`
+(see :ref:`Q22 <q-abserr>`).
+
+It is known that |zfp| errors can be slightly biased and correlated (see
+:numref:`zfp-rounding` and the third paper above).  Recent work has been
+done to combat such issues by supporting optional
+:ref:`rounding modes <rounding>`.
+
+.. _zfp-rounding:
+.. figure:: zfp-rounding.pdf
+  :figwidth: 90 %
+  :align: center
+  :alt: "zfp rounding modes"
+
+  |zfp| errors are normally distributed.  This figure illustrates the
+  agreement between theoretical (lines) and observed (dots) error
+  distributions (*X*, *Y*, *Z*, *W*) for 1D blocks.  Without proper rounding
+  (left), errors are biased and depend on the relative location within a |zfp|
+  block, resulting in errors not centered on zero.  With proper rounding
+  (right), errors are both smaller and unbiased.
diff --git a/docs/source/high-level-api.rst b/docs/source/high-level-api.rst
new file mode 100644
index 00000000..7819bb41
--- /dev/null
+++ b/docs/source/high-level-api.rst
@@ -0,0 +1,1047 @@
+.. include:: defs.rst
+
+.. _hl-api:
+
+High-Level C API
+================
+
+The |libzfp| C API provides functionality for sequentially compressing and
+decompressing whole integer and floating-point arrays or single blocks.  It
+is broken down into a :ref:`high-level API <hl-api>` and a
+:ref:`low-level API <ll-api>`.  The high-level API handles compression of
+entire arrays and supports a variety of back-ends (e.g., serial, OpenMP).
+The low-level API exists for processing individual, possibly partial blocks
+as well as reduced-precision integer data less than 32 bits wide.
+Both C APIs are declared in :file:`zfp.h`.
+
+The following sections are available:
+
+* :ref:`hl-macros`
+* :ref:`hl-types`
+* :ref:`hl-data`
+* :ref:`hl-functions`
+
+  * :ref:`hl-func-bitstream`
+  * :ref:`hl-func-stream`
+  * :ref:`hl-func-exec`
+  * :ref:`hl-func-config`
+  * :ref:`hl-func-field`
+  * :ref:`hl-func-codec`
+
+.. _hl-macros:
+
+Macros
+------
+
+.. _version-id:
+
+.. c:macro:: ZFP_VERSION_MAJOR
+.. c:macro:: ZFP_VERSION_MINOR
+.. c:macro:: ZFP_VERSION_PATCH
+.. c:macro:: ZFP_VERSION_TWEAK
+
+  Macros identifying the |zfp| library version
+  (*major*.\ *minor*.\ *patch*.\ *tweak*).  :c:macro:`ZFP_VERSION_TWEAK`
+  is new as of |zfp| |verrelease| and is used to mark intermediate develop
+  versions (unofficial releases).
+
+----
+
+.. c:macro:: ZFP_VERSION_DEVELOP
+
+  Macro signifying that the current version is an intermediate version that
+  differs from the last official release.  This macro is undefined for
+  official releases; when defined, its value equals 1.  Note that this
+  macro may be defined even if the four :ref:`version identifiers <version-id>`
+  have not changed.  Available as of |zfp| |verrelease|.
+
+----
+
+.. c:macro:: ZFP_VERSION
+
+  A single integer constructed from the four
+  :ref:`version identifiers <version-id>`.  This integer can be generated by
+  :c:macro:`ZFP_MAKE_VERSION` or :c:macro:`ZFP_MAKE_FULLVERSION`.  Its value
+  equals the global constant :c:data:`zfp_library_version`.
+
+.. note::
+  Although :c:macro:`ZFP_VERSION` increases monotonically with release date
+  and with the four :ref:`version identifiers <version-id>` it depends on,
+  the mapping to :c:macro:`ZFP_VERSION` changed with the introduction of
+  :c:macro:`ZFP_VERSION_TWEAK` in |zfp| |verrelease|.
+
+  Going forward, we recommend using :c:macro:`ZFP_MAKE_VERSION` or
+  :c:macro:`ZFP_MAKE_FULLVERSION` in conditional code that depends on
+  :c:macro:`ZFP_VERSION`, e.g.,
+  :code:`#if ZFP_VERSION >= ZFP_MAKE_VERSION(1, 0, 0)`.
+  Note that such constructions should not be used with older versions of
+  |zfp|, e.g., :code:`if (zfp_library_version == ZFP_MAKE_VERSION(0, 5, 5))`
+  will not give the expected result with binary versions of |libzfp| before
+  version |verrelease|.
+
+----
+
+.. c:macro:: ZFP_VERSION_STRING
+
+  :c:macro:`ZFP_VERSION_STRING` is a string literal composed of the four
+  :ref:`version identifiers <version-id>`.  It is a component of
+  :c:data:`zfp_version_string`.
+
+----
+
+.. c:macro:: ZFP_MAKE_VERSION(major, minor, patch)
+.. c:macro:: ZFP_MAKE_VERSION_STRING(major, minor, patch)
+
+  Utility macros for constructing :c:macro:`ZFP_VERSION` and
+  :c:macro:`ZFP_VERSION_STRING`, respectively.  Available as of
+  |zfp| |verrelease|, these macros may be used by applications to test
+  for a certain |zfp| version number, e.g.,
+  :code:`#if ZFP_VERSION >= ZFP_MAKE_VERSION(1, 0, 0)`.
+
+----
+
+.. c:macro:: ZFP_MAKE_FULLVERSION(major, minor, patch, tweak)
+.. c:macro:: ZFP_MAKE_FULLVERSION_STRING(major, minor, patch, tweak)
+
+  Utility macros for constructing :c:macro:`ZFP_VERSION` and
+  :c:macro:`ZFP_VERSION_STRING`, respectively.  Includes tweak version 
+  used by intermediate develop versions.  Available as of
+  |zfp| |verrelease|, these macros may be used by applications to test
+  for a certain |zfp| version number, e.g.,
+  :code:`#if ZFP_VERSION >= ZFP_MAKE_FULLVERSION(1, 0, 0, 2)`.
+
+----
+
+.. c:macro:: ZFP_CODEC
+
+  Macro identifying the version of the compression CODEC.  See also
+  :c:data:`zfp_codec_version`.
+
+----
+
+.. c:macro:: ZFP_MIN_BITS
+.. c:macro:: ZFP_MAX_BITS
+.. c:macro:: ZFP_MAX_PREC
+.. c:macro:: ZFP_MIN_EXP
+
+  Default compression parameter settings that impose no constraints.
+  The largest possible compressed block size, corresponding to 4D blocks
+  of doubles, is given by :c:macro:`ZFP_MAX_BITS`.  See also
+  :c:type:`zfp_stream`.
+
+----
+
+.. c:macro:: ZFP_META_NULL
+
+  Null representation of the 52-bit encoding of field metadata.  This value
+  is returned by :c:func:`zfp_field_metadata` when the field metadata cannot
+  be encoded in 64 bits, such as when the array dimensions are too large
+  (see :ref:`limitations`).  In addition to signaling error, this value
+  is guaranteed not to represent valid metadata.
+
+----
+
+.. _header-macros:
+
+The :code:`ZFP_HEADER` bit mask specifies which portions of a header to output
+(if any).  The constants below should be bitwise ORed together.  Use
+:c:macro:`ZFP_HEADER_FULL` to output all header information available.  The
+compressor and decompressor must agree on which parts of the header to
+read/write.  See :c:func:`zfp_read_header` and :c:func:`zfp_write_header`
+for how to read and write header information.
+
+.. c:macro:: ZFP_HEADER_MAGIC
+
+  Magic constant that identifies the data as a |zfp| stream compressed using
+  a particular CODEC version.
+
+.. c:macro:: ZFP_HEADER_META
+
+  Array size and scalar type information stored in the :c:type:`zfp_field`
+  struct.
+
+.. c:macro:: ZFP_HEADER_MODE
+
+  Compression mode and parameters stored in the :c:type:`zfp_stream` struct.
+
+.. c:macro:: ZFP_HEADER_FULL
+
+  Full header information (bitwise OR of all :code:`ZFP_HEADER` constants).
+
+----
+
+.. c:macro:: ZFP_MAGIC_BITS
+.. c:macro:: ZFP_META_BITS
+.. c:macro:: ZFP_MODE_SHORT_BITS
+.. c:macro:: ZFP_MODE_LONG_BITS
+.. c:macro:: ZFP_HEADER_MAX_BITS
+.. c:macro:: ZFP_MODE_SHORT_MAX
+
+  Number of bits used by each portion of the header.  These macros are
+  primarily informational and should not be accessed by the user through
+  the high-level API.  For most common compression parameter settings,
+  only :c:macro:`ZFP_MODE_SHORT_BITS` bits of header information are stored
+  to encode the mode (see :c:func:`zfp_stream_mode`).
+
+----
+
+.. _data-macros:
+
+The :code:`ZFP_DATA` bit mask specifies which portions of array data
+structures to compute total storage size for.  These constants should be
+bitwise ORed together.  Use :c:macro:`ZFP_DATA_ALL` to count all storage used.
+
+.. c:macro:: ZFP_DATA_UNUSED
+
+  Allocated but unused data.
+
+.. c:macro:: ZFP_DATA_PADDING
+
+  Padding for alignment purposes.
+  
+.. c:macro:: ZFP_DATA_META
+
+  Class members and other fixed-size storage.
+
+.. c:macro:: ZFP_DATA_MISC
+
+  Miscellaneous uncategorized storage.
+
+.. c:macro:: ZFP_DATA_PAYLOAD
+
+  Compressed data encoding array elements.
+
+.. c:macro:: ZFP_DATA_INDEX
+
+  Block :ref:`index <index>` information.
+
+.. c:macro:: ZFP_DATA_CACHE
+
+  Uncompressed :ref:`cached <caching>` data.
+
+.. C:macro:: ZFP_DATA_HEADER
+
+  :ref:`header` information.
+
+.. c:macro:: ZFP_DATA_ALL
+
+  All storage (bitwise OR of all :code:`ZFP_DATA` constants).
+
+----
+
+.. c:macro:: ZFP_ROUND_FIRST
+.. c:macro:: ZFP_ROUND_NEVER
+.. c:macro:: ZFP_ROUND_LAST
+
+  Available rounding modes for :c:macro:`ZFP_ROUNDING_MODE`, which
+  specifies at build time how |zfp| performs rounding in lossy compression
+  mode.
+
+
+.. _hl-types:
+
+Types
+-----
+
+.. c:type:: zfp_stream
+
+  The :c:type:`zfp_stream` struct encapsulates all information about the
+  compressed stream for a single block or a collection of blocks that
+  represent an array.  See the section on :ref:`compression modes <modes>`
+  for a description of the members of this struct.
+  ::
+
+    typedef struct {
+      uint minbits;       // minimum number of bits to store per block
+      uint maxbits;       // maximum number of bits to store per block
+      uint maxprec;       // maximum number of bit planes to store
+      int minexp;         // minimum floating point bit plane number to store
+      bitstream* stream;  // compressed bit stream
+      zfp_execution exec; // execution policy and parameters
+    } zfp_stream;
+
+----
+
+.. c:type:: zfp_execution
+
+  The :c:type:`zfp_stream` also stores information about how to execute
+  compression, e.g., sequentially or in parallel.  The execution is determined
+  by the policy and any policy-specific parameters such as number of
+  threads. 
+  ::
+
+    typedef struct {
+      zfp_exec_policy policy; // execution policy (serial, omp, cuda, ...)
+      void* params;           // execution parameters
+    } zfp_execution;
+
+.. warning::
+    As of |zfp| |verrelease| :c:type:`zfp_execution` replaces the former
+    :code:`zfp_exec_params` with a :code:`void*` to the associated
+    :code:`zfp_exec_params` type (e.g., :c:type:`zfp_exec_params_omp`) to
+    limit ABI-breaking changes due to future extensions to |zfp| execution
+    policies.
+
+----
+
+.. c:type:: zfp_exec_policy
+
+  Currently three execution policies are available: serial, OpenMP parallel,
+  and CUDA parallel.
+  ::
+
+    typedef enum {
+      zfp_exec_serial = 0, // serial execution (default)
+      zfp_exec_omp    = 1, // OpenMP multi-threaded execution
+      zfp_exec_cuda   = 2  // CUDA parallel execution
+    } zfp_exec_policy;
+
+----
+
+.. c:type:: zfp_exec_params_omp
+
+  Execution parameters for OpenMP parallel compression.  These are
+  initialized to default values.  When nonzero, they indicate the number
+  of threads to request for parallel compression and the number of
+  consecutive blocks to assign to each thread.
+  ::
+
+    typedef struct {
+      uint threads;    // number of requested threads
+      uint chunk_size; // number of blocks per chunk
+    } zfp_exec_params_omp;
+
+----
+
+.. _mode_struct:
+.. c:type:: zfp_mode
+
+  Enumerates the compression modes.
+  ::
+
+    typedef enum {
+      zfp_mode_null            = 0, // an invalid configuration of the 4 params
+      zfp_mode_expert          = 1, // expert mode (4 params set manually)
+      zfp_mode_fixed_rate      = 2, // fixed rate mode
+      zfp_mode_fixed_precision = 3, // fixed precision mode
+      zfp_mode_fixed_accuracy  = 4, // fixed accuracy mode
+      zfp_mode_reversible      = 5  // reversible (lossless) mode
+    } zfp_mode;
+
+----
+
+.. _config_struct:
+.. c:type:: zfp_config
+
+  Encapsulates compression mode and parameters (if any).
+  ::
+
+    typedef struct {
+      zfp_mode mode;      // compression mode */
+      union {
+        double rate;      // compressed bits/value (negative for word alignment)
+        uint precision;   // uncompressed bits/value
+        double tolerance; // absolute error tolerance
+        struct {
+          uint minbits;   // min number of compressed bits/block
+          uint maxbits;   // max number of compressed bits/block
+          uint maxprec;   // max number of uncompressed bits/value
+          int minexp;     // min floating point bit plane number to store
+        } expert;         // expert mode arguments
+      } arg;              // arguments corresponding to compression mode
+    } zfp_config;
+
+----
+
+.. c:type:: zfp_type
+
+  Enumerates the scalar types supported by the compressor and describes the
+  uncompressed array.  The compressor and decompressor must use the same
+  :c:type:`zfp_type`, e.g., one cannot compress doubles and decompress to
+  floats or integers.
+  ::
+
+    typedef enum {
+      zfp_type_none   = 0, // unspecified type
+      zfp_type_int32  = 1, // 32-bit signed integer
+      zfp_type_int64  = 2, // 64-bit signed integer
+      zfp_type_float  = 3, // single precision floating point
+      zfp_type_double = 4  // double precision floating point
+    } zfp_type;
+
+----
+
+.. _field:
+.. index::
+   single: Strided Arrays
+.. c:type:: zfp_field
+
+  The uncompressed array is described by the :c:type:`zfp_field` struct, which
+  encodes the array's scalar type, dimensions, and memory layout.
+  ::
+
+    typedef struct {
+      zfp_type type;            // scalar type (e.g., int32, double)
+      size_t nx, ny, nz, nw;    // sizes (zero for unused dimensions)
+      ptrdiff_t sx, sy, sz, sw; // strides (zero for contiguous array a[nw][nz][ny][nx])
+      void* data;               // pointer to array data
+    } zfp_field;
+
+  For example, a static multidimensional C array declared as
+  ::
+
+    double array[n1][n2][n3][n4];
+
+  would be described by a :c:type:`zfp_field` with members
+  ::
+
+    type = zfp_type_double;
+    nx = n4; ny = n3; nz = n2; nw = n1;
+    sx = 1; sy = n4; sz = n3 * n4; sw = n2 * n3 * n4;
+    data = &array[0][0][0][0];
+
+  The strides, when nonzero, specify how the array is laid out in memory.
+  Strides can be used in case multiple fields are stored interleaved via
+  "array of struct" (AoS) rather than "struct of array" (SoA) storage,
+  or if the dimensions should be transposed during (de)compression.
+  Strides may even be negative, allowing one or more dimensions to be
+  traversed in reverse order.  Given 4D array indices (*x*, *y*, *z*, *w*),
+  the corresponding array element is stored at
+  ::
+
+    data[x * sx + y * sy + z * sz + w * sw]
+
+  where :code:`data` is a pointer to the first array element.
+
+
+.. _new-field:
+.. warning::
+  The :c:type:`zfp_field` struct was modified in |zfp| |64bitrelease| to
+  use :code:`size_t` and :code:`ptrdiff_t` for array dimensions and
+  strides, respectively, to support 64-bit addressing of very large arrays
+  (previously, :code:`uint` and :code:`int` were used).  This ABI
+  incompatible change may require rebuilding applications that use |zfp|
+  and may in some cases also require code changes to handle pointers to
+  :code:`size_t` instead of pointers to :code:`uint` (see
+  :c:func:`zfp_field_size`, for instance).
+
+.. _indexing:
+.. index::
+   single: C order
+   single: Fortran order
+.. warning::
+  It is paramount that the field dimensions, *nx*, *ny*, *nz*, and *nw*,
+  and strides, *sx*, *sy*, *sz*, and *sw*, be correctly mapped to how the
+  uncompressed array is laid out in memory.  Although compression will
+  still succeed if array dimensions are accidentally transposed, compression
+  ratio and/or accuracy may suffer greatly.  Since the leftmost index, *x*,
+  is assumed to vary fastest, |zfp| can be thought of as assuming
+  Fortran ordering.  For C ordered arrays, the user should transpose
+  the dimensions or specify strides to properly describe the memory layout.
+  See :ref:`this FAQ <q-layout>` for further details.
+
+.. c:type:: zfp_bool
+
+  :c:type:`zfp_bool` is new as of |zfp| |boolrelease|.  Although merely
+  an alias for :code:`int`, this type serves to document that a return
+  value or function parameter should be treated as Boolean.  Two enumerated
+  constants are available::
+
+    enum {
+      zfp_false = 0,
+      zfp_true = !zfp_false
+    };
+
+  The reason why :c:type:`zfp_bool` is not an enumerated type itself is
+  that in C++ this would require an explicit cast between the :code:`bool`
+  type resulting from logical expressions, e.g.,
+  :code:`zfp_bool done = static_cast<zfp_bool>(queue.empty() && work == 0)`.
+  Such casts from :code:`bool` to a non-enumerated :code:`int` are not
+  necessary.
+
+  The |zfp| |boolrelease| API has changed to use :c:type:`zfp_bool` in
+  place of :code:`int` where appropriate; this change should not affect
+  existing code.
+
+.. _hl-data:
+
+Constants
+---------
+
+.. c:var:: const uint zfp_codec_version
+
+  The version of the compression CODEC implemented by this version of the |zfp|
+  library.  The library can decompress files generated by the same CODEC only.
+  To ensure that the :file:`zfp.h` header matches the binary library linked to,
+  :c:data:`zfp_codec_version` should match :c:macro:`ZFP_CODEC`.
+
+----
+
+.. c:var:: const uint zfp_library_version
+
+  The library version.  The binary library and headers are compatible if
+  :c:data:`zfp_library_version` matches :c:macro:`ZFP_VERSION`.
+
+----
+
+.. c:var:: const char* const zfp_version_string
+
+  A constant string representing the |zfp| library version and release date.
+  One can search for this string in executables and libraries that link to
+  |libzfp| when built as a static library.
+
+.. _hl-functions:
+
+Functions
+---------
+
+
+.. c:function:: size_t zfp_type_size(zfp_type type)
+
+  Return byte size of the given scalar type, e.g.,
+  :code:`zfp_type_size(zfp_type_float) = 4`.
+
+.. _hl-func-bitstream:
+
+Compressed Stream
+^^^^^^^^^^^^^^^^^
+
+.. c:function:: zfp_stream* zfp_stream_open(bitstream* stream)
+
+  Allocate compressed stream and associate it with bit stream for reading
+  and writing bits to/from memory.  *stream* may be :c:macro:`NULL` and
+  attached later via :c:func:`zfp_stream_set_bit_stream`.
+
+----
+
+.. c:function:: void zfp_stream_close(zfp_stream* stream)
+
+  Close and deallocate compressed stream.  This does not affect the
+  attached bit stream.
+
+----
+
+.. c:function:: void zfp_stream_rewind(zfp_stream* stream)
+
+  Rewind bit stream to beginning for compression or decompression.
+
+----
+
+.. c:function:: bitstream* zfp_stream_bit_stream(const zfp_stream* stream)
+
+  Return bit stream associated with compressed stream (see
+  :c:func:`zfp_stream_set_bit_stream`).
+
+----
+
+.. c:function:: void zfp_stream_set_bit_stream(zfp_stream* stream, bitstream* bs)
+
+  Associate bit stream with compressed stream.
+
+----
+
+.. c:function:: size_t zfp_stream_compressed_size(const zfp_stream* stream)
+
+  Number of bytes of compressed storage.  This function returns the
+  current byte offset within the bit stream from the beginning of the
+  bit stream memory buffer.  To ensure all buffered compressed data has
+  been output call :c:func:`zfp_stream_flush` first.
+
+----
+
+.. c:function:: size_t zfp_stream_maximum_size(const zfp_stream* stream, const zfp_field* field)
+
+  Conservative estimate of the compressed byte size for the compression
+  parameters stored in *stream* and the array whose scalar type and dimensions
+  are given by *field*.  This function may be used to determine how large a
+  memory buffer to allocate to safely hold the entire compressed array.
+  The buffer may then be resized (using :code:`realloc()`) after the actual
+  number of bytes is known, as returned by :c:func:`zfp_compress`.
+
+
+.. _hl-func-stream:
+
+Compression Parameters
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. c:function:: zfp_mode zfp_stream_compression_mode(const zfp_stream* stream)
+
+  Return compression mode associated with compression parameters. Return
+  :code:`zfp_mode_null` when compression parameters are invalid.
+
+----
+
+.. c:function:: void zfp_stream_set_reversible(zfp_stream* stream)
+
+  Enable :ref:`reversible <mode-reversible>` (lossless) compression.
+
+----
+
+.. c:function:: double zfp_stream_rate(const zfp_stream* stream, uint dims)
+
+  Return rate in compressed bits per value if *stream* is in
+  :ref:`fixed-rate mode <mode-fixed-rate>` (see :c:func:`zfp_stream_set_rate`),
+  else zero.  *dims* is the dimensionality of the compressed data.
+
+----
+
+.. c:function:: double zfp_stream_set_rate(zfp_stream* stream, double rate, zfp_type type, uint dims, zfp_bool align)
+
+  Set *rate* for :ref:`fixed-rate mode <mode-fixed-rate>` in compressed bits
+  per value.  The target scalar *type* and array *dimensionality* are needed
+  to correctly translate the rate to the number of bits per block.  The
+  Boolean *align* should be :code:`zfp_true` if
+  :ref:`word alignment <q-portability>` is needed, e.g., to support random
+  access writes of blocks for |zfp|'s :ref:`compressed arrays <arrays>`.
+  Such alignment may further constrain the rate.  The closest supported rate
+  is returned, which may differ from the requested rate.
+
+----
+
+.. c:function:: uint zfp_stream_precision(const zfp_stream* stream)
+
+  Return precision in uncompressed bits per value if *stream* is in
+  :ref:`fixed-precision mode <mode-fixed-precision>` (see
+  :c:func:`zfp_stream_set_precision`), else zero.
+
+----
+
+.. c:function:: uint zfp_stream_set_precision(zfp_stream* stream, uint precision)
+
+  Set *precision* for :ref:`fixed-precision mode <mode-fixed-precision>`.
+  The precision specifies how many uncompressed bits per value to store,
+  and indirectly governs the relative error.  The actual precision is
+  returned, e.g., in case the desired precision is out of range.  To
+  preserve a certain floating-point mantissa or integer precision in the
+  decompressed data, see FAQ :ref:`#21 <q-lossless>`.
+
+----
+
+.. c:function:: double zfp_stream_accuracy(const zfp_stream* stream)
+
+  Return accuracy as an absolute error tolerance if *stream* is in
+  :ref:`fixed-accuracy mode <mode-fixed-accuracy>` (see
+  :c:func:`zfp_stream_set_accuracy`), else zero.
+
+----
+
+.. c:function:: double zfp_stream_set_accuracy(zfp_stream* stream, double tolerance)
+
+  Set absolute error *tolerance* for
+  :ref:`fixed-accuracy mode <mode-fixed-accuracy>`.  The tolerance ensures
+  that values in the decompressed array differ from the input array by no
+  more than this tolerance (in all but exceptional circumstances; see
+  FAQ :ref:`#17 <q-tolerance>`).  This compression mode should be used only
+  with floating-point (not integer) data.
+
+----
+
+.. c:function:: uint64 zfp_stream_mode(const zfp_stream* stream)
+
+  Return compact encoding of compression parameters.  If the return value
+  is no larger than :c:macro:`ZFP_MODE_SHORT_MAX`, then the least significant
+  :c:macro:`ZFP_MODE_SHORT_BITS` (12 in the current version) suffice to
+  encode the parameters.  Otherwise all 64 bits are needed, and the low
+  :c:macro:`ZFP_MODE_SHORT_BITS` bits will be all ones.  Thus, this
+  variable-length encoding can be used to economically encode and decode
+  the compression parameters, which is especially important if the parameters
+  are to vary spatially over small regions.  Such spatially adaptive coding
+  would have to be implemented via the :ref:`low-level API <ll-api>`.
+
+----
+
+.. c:function:: zfp_mode zfp_stream_set_mode(zfp_stream* stream, uint64 mode)
+
+  Set all compression parameters from compact integer representation.
+  See :c:func:`zfp_stream_mode` for how to encode the parameters.  Return
+  the mode associated with the newly-set compression parameters.  If the
+  decoded compression parameters are invalid, they are not set and the
+  function returns :code:`zfp_mode_null`.
+
+----
+
+.. c:function:: void zfp_stream_params(const zfp_stream* stream, uint* minbits, uint* maxbits, uint* maxprec, int* minexp)
+
+  Query :ref:`compression parameters <mode-expert>`.  For any parameter not
+  needed, pass :c:macro:`NULL` for the corresponding pointer.
+
+----
+
+.. c:function:: zfp_bool zfp_stream_set_params(zfp_stream* stream, uint minbits, uint maxbits, uint maxprec, int minexp)
+
+  Set all compression parameters directly.  See the section on
+  :ref:`expert mode <mode-expert>` for a discussion of the parameters.
+  The return value is :code:`zfp_true` upon success.
+
+
+.. _hl-func-exec:
+
+Execution Policy
+^^^^^^^^^^^^^^^^
+
+.. c:function:: zfp_exec_policy zfp_stream_execution(const zfp_stream* stream)
+
+  Return current :ref:`execution policy <execution>`.
+
+----
+
+.. c:function:: uint zfp_stream_omp_threads(const zfp_stream* stream)
+
+  Return number of OpenMP threads to request for compression.
+  See :c:func:`zfp_stream_set_omp_threads`.
+
+----
+
+.. c:function:: uint zfp_stream_omp_chunk_size(const zfp_stream* stream)
+
+  Return number of blocks to compress together per OpenMP thread.
+  See :c:func:`zfp_stream_set_omp_chunk_size`.
+
+----
+
+.. c:function:: zfp_bool zfp_stream_set_execution(zfp_stream* stream, zfp_exec_policy policy)
+
+  Set :ref:`execution policy <execution>`.  If different from the previous
+  policy, initialize the execution parameters to their default values.
+  :code:`zfp_true` is returned if the execution policy is supported.
+
+----
+
+.. c:function:: zfp_bool zfp_stream_set_omp_threads(zfp_stream* stream, uint threads)
+
+  Set the number of OpenMP threads to use during compression.  If *threads*
+  is zero, then the number of threads is given by the value of the OpenMP
+  *nthreads-var* internal control variable when :c:func:`zfp_compress` is
+  called (usually the maximum number available).  This function also sets
+  the execution policy to OpenMP.  Upon success, :code:`zfp_true` is returned.
+
+----
+
+.. c:function:: zfp_bool zfp_stream_set_omp_chunk_size(zfp_stream* stream, uint chunk_size)
+
+  Set the number of consecutive blocks to compress together per OpenMP thread.
+  If zero, use one chunk per thread.  This function also sets the execution
+  policy to OpenMP.  Upon success, :code:`zfp_true` is returned.
+
+
+.. _hl-func-config:
+
+Compression Configuration
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These functions encode a desired compression mode and associated parameters
+(if any) in a single struct, e.g., for configuring |zfp|'s
+:ref:`read-only array classes <carray_classes>`.
+
+.. c:function:: zfp_config zfp_config_none()
+
+  Unspecified configuration.
+
+----
+
+.. c:function:: zfp_config zfp_config_rate(double rate, zfp_bool align)
+
+  :ref:`Fixed-rate mode <mode-fixed-rate>` using *rate* compressed bits per
+  value.  When *align* is true, word alignment is enforced to further
+  constrain the rate (see :c:func:`zfp_stream_set_rate`).
+
+----
+
+.. c:function:: zfp_config zfp_config_precision(uint precision)
+
+  :ref:`Fixed-precision mode <mode-fixed-precision>` using *precision*
+  uncompressed bits per value (see also :c:func:`zfp_stream_set_precision`).
+
+----
+
+.. c:function:: zfp_config zfp_config_accuracy(double tolerance)
+
+  :ref:`Fixed-accuracy mode <mode-fixed-accuracy>` with absolute error no
+  larger than *tolerance* (see also :c:func:`zfp_stream_set_accuracy`).
+
+----
+
+.. c:function:: zfp_config zfp_config_reversible()
+
+  :ref:`Reversible (lossless) mode <mode-reversible>` (see also
+  :c:func:`zfp_stream_set_reversible`).
+
+----
+
+.. c:function:: zfp_config zfp_config_expert(uint minbits, uint maxbits, uint maxprec, int minexp)
+
+  :ref:`Expert mode <mode-expert>` with given parameters (see also
+  :c:func:`zfp_stream_set_params`).
+
+
+.. _hl-func-field:
+
+Array Metadata
+^^^^^^^^^^^^^^
+
+.. c:function:: zfp_field* zfp_field_alloc()
+
+  Allocates and returns a default initialized :c:type:`zfp_field` struct.
+  The caller must free this struct using :c:func:`zfp_field_free`.
+
+----
+
+.. c:function:: zfp_field* zfp_field_1d(void* pointer, zfp_type type, size_t nx)
+
+  Allocate and return a field struct that describes an existing 1D array,
+  :code:`a[nx]`, of *nx* uncompressed scalars of given *type* stored at
+  *pointer*, which may be :c:macro:`NULL` and specified later.
+
+----
+
+.. c:function:: zfp_field* zfp_field_2d(void* pointer, zfp_type type, size_t nx, size_t ny)
+
+  Allocate and return a field struct that describes an existing 2D array,
+  :code:`a[ny][nx]`, of *nx* |times| *ny* uncompressed scalars of given
+  *type* stored at *pointer*, which may be :c:macro:`NULL` and specified
+  later.
+
+----
+
+.. c:function:: zfp_field* zfp_field_3d(void* pointer, zfp_type type, size_t nx, size_t ny, size_t nz)
+
+  Allocate and return a field struct that describes an existing 3D array,
+  :code:`a[nz][ny][nx]`, of *nx* |times| *ny* |times| *nz* uncompressed
+  scalars of given *type* stored at *pointer*, which may be :c:macro:`NULL`
+  and specified later.
+
+----
+
+.. c:function:: zfp_field* zfp_field_4d(void* pointer, zfp_type type, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Allocate and return a field struct that describes an existing 4D array,
+  :code:`a[nw][nz][ny][nx]`, of *nx* |times| *ny* |times| *nz* |times| *nw*
+  uncompressed scalars of given *type* stored at *pointer*, which may be
+  :c:macro:`NULL` and specified later.
+
+----
+
+.. c:function:: void zfp_field_free(zfp_field* field)
+
+  Free :c:type:`zfp_field` struct previously allocated by one of the functions
+  above.
+
+----
+
+.. c:function:: void* zfp_field_pointer(const zfp_field* field)
+
+  Return pointer to the first scalar in the field with index
+  *x* = *y* = *z* = *w* = 0.
+
+----
+
+.. c:function:: void* zfp_field_begin(const zfp_field* field)
+
+  Return pointer to the lowest memory address occupied by the field.
+  Equals :c:func:`zfp_field_pointer` if all strides are positive.
+  Available since |zfp| |fieldrelease|.
+
+----
+
+.. c:function:: zfp_type zfp_field_type(const zfp_field* field)
+
+  Return array scalar type.
+
+----
+
+.. c:function:: uint zfp_field_precision(const zfp_field* field)
+
+  Return scalar precision in number of bits, e.g., 32 for
+  :code:`zfp_type_float`.
+
+----
+
+.. c:function:: uint zfp_field_dimensionality(const zfp_field* field)
+
+  Return array dimensionality (1, 2, 3, or 4).
+
+----
+
+.. c:function:: size_t zfp_field_size(const zfp_field* field, size_t* size)
+
+  Return total number of scalars stored in the array, e.g.,
+  *nx* |times| *ny* |times| *nz* for a 3D array.  If *size* is not
+  :c:macro:`NULL`, then store the number of scalars for each dimension,
+  e.g., :code:`size[0] = nx; size[1] = ny; size[2] = nz` for a 3D array.
+
+----
+
+.. c:function:: size_t zfp_field_size_bytes(const zfp_field* field)
+
+  Return number of bytes spanned by the field payload data.  This includes
+  gaps in memory in case the field layout, as given by the strides, is not
+  contiguous (see :c:func:`zfp_field_is_contiguous`).  Available since
+  |zfp| |fieldrelease|.
+
+----
+
+.. c:function:: size_t zfp_field_blocks(const zfp_field* field)
+
+  Return total number of *d*-dimensional blocks (whether partial or whole)
+  spanning the array.  Each whole block consists of |4powd| scalars.
+  Available since |zfp| |fieldrelease|.
+
+----
+
+.. c:function:: zfp_bool zfp_field_stride(const zfp_field* field, ptrdiff_t* stride)
+
+  Return :code:`zfp_false` if the array is stored contiguously as
+  :code:`a[nx]`, :code:`a[ny][nx]`, :code:`a[nz][ny][nx]`, or
+  :code:`a[nw][nz][ny][nx]` depending on dimensionality.  Return
+  :code:`zfp_true` if the array is strided and laid out differently in memory.
+  If *stride* is not :c:macro:`NULL`, then store the stride for each
+  dimension, e.g., :code:`stride[0] = sx; stride[1] = sy; stride[2] = sz;`
+  for a 3D array.  See :c:type:`zfp_field` for more information on strides.
+  Return false if the array is stored contiguously (the default) as
+  :code:`a[nx]`, :code:`a[ny][nx]`, :code:`a[nz][ny][nx]`, or
+  :code:`a[nw][nz][ny][nx]` depending on dimensionality.  Return true if
+  nonzero strides have been specified.
+
+----
+
+.. c:function:: zfp_bool zfp_field_is_contiguous(const zfp_field* field)
+
+  Return true if the field occupies a contiguous portion of memory.  Note
+  that the field layout may be contiguous even if a raster order traversal
+  does not visit memory in a monotonically increasing or decreasing order,
+  e.g., if the layout is simply a permutation of the default layout.
+  Available since |zfp| |fieldrelease|.
+
+----
+
+.. c:function:: uint64 zfp_field_metadata(const zfp_field* field)
+
+  Return 52-bit compact encoding of the scalar type and array dimensions.
+  This function returns :c:macro:`ZFP_META_NULL` on failure, e.g., if the
+  array dimensions are :ref:`too large <limitations>` to be encoded in 52
+  bits.
+
+----
+
+.. _zfp_field_set:
+
+.. c:function:: void zfp_field_set_pointer(zfp_field* field, void* pointer)
+
+  Set pointer to first scalar in the array.
+
+----
+
+.. c:function:: zfp_type zfp_field_set_type(zfp_field* field, zfp_type type)
+
+  Set array scalar type.
+
+----
+
+.. c:function:: void zfp_field_set_size_1d(zfp_field* field, size_t nx)
+
+  Specify dimensions of 1D array :code:`a[nx]`.
+
+----
+
+.. c:function:: void zfp_field_set_size_2d(zfp_field* field, size_t nx, size_t ny)
+
+  Specify dimensions of 2D array :code:`a[ny][nx]`.
+
+----
+
+.. c:function:: void zfp_field_set_size_3d(zfp_field* field, size_t nx, size_t ny, size_t nz)
+
+  Specify dimensions of 3D array :code:`a[nz][ny][nx]`.
+
+----
+
+.. c:function:: void zfp_field_set_size_4d(zfp_field* field, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Specify dimensions of 4D array :code:`a[nw][nz][ny][nx]`.
+
+----
+
+.. c:function:: void zfp_field_set_stride_1d(zfp_field* field, ptrdiff_t sx)
+
+  Specify stride for 1D array: :code:`sx = &a[1] - &a[0]`.
+
+----
+
+.. c:function:: void zfp_field_set_stride_2d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy)
+
+  Specify strides for 2D array:
+  :code:`sx = &a[0][1] - &a[0][0]; sy = &a[1][0] - &a[0][0]`.
+
+----
+
+.. c:function:: void zfp_field_set_stride_3d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+
+  Specify strides for 3D array:
+  :code:`sx = &a[0][0][1] - &a[0][0][0]; sy = &a[0][1][0] - &a[0][0][0]; sz = &a[1][0][0] - &a[0][0][0]`.
+
+----
+
+.. c:function:: void zfp_field_set_stride_4d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Specify strides for 4D array:
+  :code:`sx = &a[0][0][0][1] - &a[0][0][0][0];
+  sy = &a[0][0][1][0] - &a[0][0][0][0];
+  sz = &a[0][1][0][0] - &a[0][0][0][0];
+  sw = &a[1][0][0][0] - &a[0][0][0][0]`.
+
+----
+
+.. c:function:: zfp_bool zfp_field_set_metadata(zfp_field* field, uint64 meta)
+
+  Specify array scalar type and dimensions from compact 52-bit representation.
+  Return :code:`zfp_true` upon success.  See :c:func:`zfp_field_metadata` for
+  how to encode *meta*.
+
+
+.. _hl-func-codec:
+
+Compression and Decompression
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. c:function:: size_t zfp_compress(zfp_stream* stream, const zfp_field* field)
+
+  Compress the whole array described by *field* using parameters given by
+  *stream*.  Then flush the stream to emit any buffered bits and align the
+  stream on a word boundary.  The resulting byte offset within the bit stream
+  is returned, which equals the total number of bytes of compressed storage
+  if the stream was rewound before the :c:func:`zfp_compress` call.  Zero is
+  returned if compression failed.
+
+----
+
+.. c:function:: size_t zfp_decompress(zfp_stream* stream, zfp_field* field)
+
+  Decompress from *stream* to array described by *field* and align the stream
+  on the next word boundary.  Upon success, the nonzero return value is the
+  same as would be returned by a corresponding :c:func:`zfp_compress` call,
+  i.e., the current byte offset or the number of compressed bytes consumed.
+  Zero is returned if decompression failed.
+
+----
+
+.. _zfp-header:
+.. c:function:: size_t zfp_write_header(zfp_stream* stream, const zfp_field* field, uint mask)
+
+  Write an optional variable-length header to the stream that encodes
+  compression parameters, array metadata, etc.  The header information written
+  is determined by the bit *mask* (see :c:macro:`macros <ZFP_HEADER_MAGIC>`).
+  Unlike in :c:func:`zfp_compress`, no word alignment is enforced.  See the
+  :ref:`limitations <limitations>` section for limits on the maximum array
+  size supported by the header.  The return value is the number of bits
+  written, or zero upon failure.
+
+----
+
+.. c:function:: size_t zfp_read_header(zfp_stream* stream, zfp_field* field, uint mask)
+
+  Read header if one was previously written using :c:func:`zfp_write_header`.
+  The *stream* and *field* data structures are populated with the information
+  stored in the header, as specified by the bit *mask* (see
+  :c:macro:`macros <ZFP_HEADER_MAGIC>`).  The caller must ensure that *mask*
+  agrees between header read and write calls.  The return value is the number
+  of bits read, or zero upon failure.
diff --git a/docs/source/index.inc b/docs/source/index.inc
new file mode 100644
index 00000000..e6e2232f
--- /dev/null
+++ b/docs/source/index.inc
@@ -0,0 +1,157 @@
+.. index::
+   single: Index
+.. _index:
+
+Index
+-----
+
+.. cpp:namespace:: zfp
+
+To support random access, |zfp| arrays must know where each block is stored
+in memory.  For fixed-rate arrays, the number of compressed bits per block
+is constant, and the bit offset to each block can be quickly computed.  For
+variable-rate arrays, the compressed block size is data dependent, and
+additional information must be stored to index the blocks.  Toward this end,
+|zfp| arrays make use of an index class that reports the offset and
+size (in number of bits) of each block.  The :cpp:class:`zfp::array`
+and :cpp:class:`zfp::const_array` classes take such an index class as a
+template parameter.  This index class is new as of |zfp| |carrrelease|, which
+introduced variable-rate arrays.
+
+Because |zfp| is designed primarily for very large arrays, the bit offset
+may exceed 32 bits.  A straightforward implementation stores the bit
+offset to each block as a 64-bit integer, with the block size given by
+the difference of consecutive offsets.  However, this overhead of
+64 bits/block may exceed the payload compressed data for low-dimensional
+arrays or in applications like visualization that may store less than one
+bit per value (amortized).  It is therefore important to consider more
+compact representations of the block index.
+
+|zfp| provides multiple index classes in the :code:`zfp::index` namespace
+that balance storage size, range of representable block offsets and sizes,
+and speed of access:
+
+.. cpp:namespace:: zfp::index
+
+* :cpp:class:`implicit`: Used for fixed-rate storage where only the
+  fixed number of bits per block is kept.  This is the default index for
+  fixed-rate arrays.
+
+* :cpp:class:`verbatim`: This and subsequent classes support variable-rate
+  storage.  A full 64-bit offset is stored per block.
+
+* :cpp:class:`hybrid4`: Four consecutive offsets are encoded together.
+  The top 32 bits of a 44-bit base offset are stored, with the 12 least
+  significant bits of this base set to zero.  Four unsigned 16-bit deltas
+  from the base offset complete the representation.  The default for
+  variable-rate arrays, this index offers a good tradeoff between storage,
+  offset range, and speed.
+
+* :cpp:class:`hybrid8`: Eight consecutive offsets are encoded together
+  as two 64-bit words that store the offset to the first block (the base
+  offset) and the sizes of the first seven blocks, from which the eight
+  offsets are derived as a prefix sum.  One 64-bit word holds the 8 least
+  significant bits of the base offset and block sizes.  The other word
+  holds another 2 (*d* - 1) bits for the seven block sizes plus the top
+  78 - 14 *d* bits of the base offset, where 1 |leq| *d* |leq| 4 is the
+  data dimensionality.
+
+Properties of these index classes are summarized in :numref:`index-classes`.
+
+.. _index-classes:
+.. table:: Properties of index classes.  Storage is measured in amortized
+           bits/block; offset and size denote supported ranges
+           in number of bits.
+
+  +-------------+----------+---------+-------------+-----------+--------+
+  | index class | variable | storage | offset      | size      | speed  |
+  |             | rate     |         |             |           |        |
+  +=============+==========+=========+=============+===========+========+
+  | implicit    |          |     0   |     64      |    64     |  high  |
+  +-------------+----------+---------+-------------+-----------+--------+
+  | verbatim    | |check|  |    64   |     64      |    64     |  high  |
+  +-------------+----------+---------+-------------+-----------+--------+
+  | hybrid4     | |check|  |    24   |     44      |    16     | medium |
+  +-------------+----------+---------+-------------+-----------+--------+
+  | hybrid8     | |check|  |    16   | 86 - 14 *d* | 6 + 2 *d* |   low  |
+  +-------------+----------+---------+-------------+-----------+--------+
+
+This section documents the API that prospective block indices must support to
+interface with the |zfp| compressed-array classes.
+
+.. cpp:class:: index
+
+  Fictitious class encapsulating the index API.
+
+----
+
+.. cpp:function:: index::index(size_t blocks)
+
+  Construct index supporting the given number of *blocks*.
+
+----
+
+.. cpp:function:: size_t index::size_bytes(uint mask = ZFP_DATA_ALL) const
+
+----
+
+.. cpp:function:: bitstream_size index::range() const
+
+  Range of bit offsets spanned by index.  This equals the total number of
+  bits of compressed-array data.
+
+----
+
+.. cpp:function:: size_t index::block_size(size_t block_index) const
+
+  Size of compressed block in number of bits.
+
+----
+
+.. cpp:function:: bitstream_offset index::block_offset(size_t block_index) const
+
+  Bit offset to compressed block data.
+
+----
+
+.. cpp:function:: void resize(size_t blocks)
+
+  Resize index to accommodate requested number of blocks.  Any stored
+  index data is destroyed.
+
+----
+
+.. cpp:function:: void clear()
+
+  Clear all data stored by index.
+
+----
+
+.. cpp:function:: void flush()
+
+  Flush any buffered index data.  This method is called after all blocks
+  have been compressed, e.g., in :cpp:func:`array::set`.
+
+----
+
+.. cpp:function:: void set_block_size(size_t size)
+
+  Set a fixed compressed block size in number of bits for all blocks.  This
+  method is called when fixed-rate mode is selected.
+
+----
+
+.. cpp:function:: void set_block_size(size_t block_index, size_t size)
+
+  Set compressed block size in number of bits for a single block.  For
+  variable-rate arrays, the zero-based *block_index* is guaranteed to
+  increase sequentially between calls.  This method throws an exception
+  if the index cannot support the block size or offset.  The user may
+  wish to restrict the block size, e.g., by setting :code:`maxbits` in
+  :ref:`expert mode <mode-expert>`, to guard against such overflow.
+
+----
+
+.. cpp:function:: static bool has_variable_rate()
+
+  Return true if index supports variable-sized blocks.
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 00000000..0a423ea5
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,32 @@
+.. include:: defs.rst
+
+|zfp| |release| documentation
+=============================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents
+
+   introduction
+   license
+   installation
+   algorithm
+   modes
+   execution
+   high-level-api
+   low-level-api
+   bit-stream
+   python
+   zforp
+   arrays
+   cfp
+   tutorial
+   zfpcmd
+   examples
+   testing
+   faq
+   issues
+   limitations
+   directions
+   contributors
+   versions
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
new file mode 100644
index 00000000..9009fa89
--- /dev/null
+++ b/docs/source/installation.rst
@@ -0,0 +1,440 @@
+.. include:: defs.rst
+
+.. _installation:
+
+Installation
+============
+
+|zfp| consists of four distinct parts: a compression library written in C,
+a set of C++ header files that implement compressed arrays and corresponding
+C wrappers, optional Python and Fortran bindings, and a set of C and C++
+examples and utilities.  The main compression codec is written in C and
+should conform to both the ISO C89 and C99 standards.  The C++ array classes
+are implemented entirely in header files and can be included as is, but since
+they call the compression library, applications must link with |libzfp|.
+
+|zfp| is preferably built using `CMake <https://cmake.org>`__, although the
+core library can also be built using GNU make on Linux, macOS, and MinGW.
+
+|zfp| conforms to various language standards, including C89, C99, C++98,
+C++11, and C++14.
+
+.. note::
+  |zfp| requires compiler support for 64-bit integers.
+
+.. _cmake_builds:
+
+CMake Builds
+------------
+
+To build |zfp| using `CMake <https://cmake.org>`__ on Linux or macOS, start
+a Unix shell and type::
+
+    cd zfp-1.0.1
+    mkdir build
+    cd build
+    cmake ..
+    make
+
+To also build the examples, replace the cmake line with::
+
+    cmake -DBUILD_EXAMPLES=ON ..
+
+By default, CMake builds will attempt to locate and use
+`OpenMP <http://www.openmp.org>`__.  To disable OpenMP, type::
+
+    cmake -DZFP_WITH_OPENMP=OFF ..
+
+To build |zfp| using Visual Studio on Windows, start a DOS shell
+and type::
+
+    cd zfp-1.0.1
+    mkdir build
+    cd build
+    cmake ..
+    cmake --build . --config Release
+
+This builds |zfp| in release mode.  Replace 'Release' with 'Debug' to
+build |zfp| in debug mode.  See the instructions for Linux on how to
+change the cmake line to also build the example programs.
+
+
+.. _gnu_builds:
+
+GNU Builds 
+----------
+
+To build |zfp| using `gcc <https://gcc.gnu.org>`__ without
+`OpenMP <http://www.openmp.org>`__, type::
+
+    cd zfp-1.0.1
+    gmake
+
+This builds |libzfp| as a static library as well as the |zfp|
+command-line utility.  To enable OpenMP parallel compression, type::
+
+    gmake ZFP_WITH_OPENMP=1
+
+.. note::
+  GNU builds expose only limited functionality of |zfp|.  For instance,
+  CUDA and Python support are not included.  For full functionality,
+  build |zfp| using CMake.
+
+
+Testing
+-------
+
+To test that |zfp| is working properly, type::
+
+    ctest
+
+or using GNU make::
+
+    gmake test
+
+If the GNU build or regression tests fail, it is possible that some of
+the macros in the file :file:`Config` have to be adjusted.  Also, the tests
+may fail due to minute differences in the computed floating-point fields
+being compressed, which will be indicated by checksum errors.  If most
+tests succeed and the failures result in byte sizes and error values
+reasonably close to the expected values, then it is likely that the
+compressor is working correctly.
+
+
+.. index::
+   single: Build Targets
+.. _targets:
+
+Build Targets
+-------------
+
+To specify which components to build, set the macros below to
+:code:`ON` (CMake) or :code:`1` (GNU make), e.g.,
+::
+
+  cmake -DBUILD_UTILITIES=OFF -DBUILD_EXAMPLES=ON ..
+
+or using GNU make
+::
+
+  gmake BUILD_UTILITIES=0 BUILD_EXAMPLES=1
+
+Regardless of the settings below, |libzfp| will always be built.
+
+
+.. c:macro:: BUILD_ALL
+
+  Build all subdirectories; enable all options (except
+  :c:macro:`BUILD_SHARED_LIBS`).
+  Default: off.
+
+
+.. c:macro:: BUILD_CFP
+
+  Build |libcfp| for C bindings to the compressed-array classes.
+  Default: off.
+
+
+.. c:macro:: BUILD_ZFPY
+
+  Build |zfpy| for Python bindings to the C API.
+
+  CMake will attempt to automatically detect the Python installation to use.
+  If CMake finds multiple Python installations, it will use the newest one.
+  To specify a specific Python installation to use, set
+  :c:macro:`PYTHON_LIBRARY` and :c:macro:`PYTHON_INCLUDE_DIR` on the
+  cmake line::
+
+      cmake -DBUILD_ZFPY=ON -DPYTHON_LIBRARY=/path/to/lib/libpython2.7.so -DPYTHON_INCLUDE_DIR=/path/to/include/python2.7 ..
+
+  CMake default: off.
+  GNU make default: off and ignored.
+
+
+.. c:macro:: BUILD_ZFORP
+
+  Build |libzforp| for Fortran bindings to the C API.  Requires Fortran
+  standard 2018 or later.  GNU make users may specify the Fortran compiler
+  to use via
+  ::
+
+      gmake BUILD_ZFORP=1 FC=/path/to/fortran-compiler
+
+  Default: off.
+
+
+.. c:macro:: BUILD_UTILITIES
+
+  Build |zfpcmd| command-line utility for compressing binary files.
+  Default: on.
+
+
+.. c:macro:: BUILD_EXAMPLES
+
+  Build code examples.
+  Default: off.
+
+
+.. c:macro:: BUILD_TESTING
+
+  Build |testzfp| tests.
+  Default: on.
+
+
+.. c:macro:: BUILD_TESTING_FULL
+
+  Build all unit tests.
+  Default: off.
+
+
+.. c:macro:: BUILD_SHARED_LIBS
+
+  Build shared objects (:file:`.so`, :file:`.dylib`, or :file:`.dll` files).
+  CMake default: on.
+  GNU make default: off.
+
+.. note::
+  On macOS, add :code:`OS=mac` when building shared libraries with GNU make.
+
+.. index::
+   single: Configuration
+.. _config:
+
+
+Configuration
+-------------
+
+The behavior of |zfp| can be configured at compile time via a set of macros
+in the same manner that :ref:`build targets <targets>` are specified, e.g.,
+::
+
+    cmake -DZFP_WITH_OPENMP=OFF ..
+
+
+.. c:macro:: ZFP_INT64
+.. c:macro:: ZFP_INT64_SUFFIX
+.. c:macro:: ZFP_UINT64
+.. c:macro:: ZFP_UINT64_SUFFIX
+
+  64-bit signed and unsigned integer types and their literal suffixes.
+  Platforms on which :code:`long int` is 32 bits wide may require
+  :code:`long long int` as type and :code:`ll` as suffix.  These macros
+  are relevant **only** when compiling in C89 mode.  When compiling in
+  C99 mode, integer types are taken from :file:`stdint.h`.
+  Defaults: :code:`long int`, :code:`l`, :code:`unsigned long int`, and
+  :code:`ul`, respectively.
+
+
+.. c:macro:: ZFP_WITH_OPENMP
+
+  CMake and GNU make macro for enabling or disabling OpenMP support.  CMake
+  builds will by default enable OpenMP when available.  Set this macro to
+  0 or OFF to disable OpenMP support.  For GNU builds, OpenMP is disabled by
+  default.  Set this macro to 1 or ON to enable OpenMP support.  See also
+  OMPFLAGS in :file:`Config` in case the compiler does not recognize
+  ``-fopenmp``.  For example, Apple clang requires
+  ``OMPFLAGS=-Xclang -fopenmp``, ``LDFLAGS=-lomp``, and an installation of
+  ``libomp``.
+  CMake default: on.
+  GNU make default: off.
+
+
+.. c:macro:: ZFP_WITH_CUDA
+
+  CMake macro for enabling or disabling CUDA support for
+  GPU compression and decompression.  When enabled, CUDA and a compatible
+  host compiler must be installed.  For a full list of compatible compilers,
+  please consult the
+  `NVIDIA documentation <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/>`__.
+  If a CUDA installation is in the user's path, it will be
+  automatically found by CMake.  Alternatively, the CUDA binary directory 
+  can be specified using the :envvar:`CUDA_BIN_DIR` environment variable.
+  CMake default: off.
+  GNU make default: off and ignored.
+
+.. _rounding:
+.. c:macro:: ZFP_ROUNDING_MODE
+
+  **Experimental feature**.  By default, |zfp| coefficients are truncated,
+  not rounded, which can result in biased errors (see
+  FAQ :ref:`#30 <q-err-dist>`).  To counter this, two rounding modes are
+  available: :code:`ZFP_ROUND_FIRST` (round during compression; analogous
+  to mid-tread quantization) and :code:`ZFP_ROUND_LAST` (round during
+  decompression; analogous to mid-riser quantization).  With
+  :code:`ZFP_ROUND_LAST`, the values returned on decompression are slightly
+  modified (and usually closer to the original values) without impacting the
+  compressed data itself.  This rounding mode works with all
+  :ref:`compression modes <modes>`.
+  With :code:`ZFP_ROUND_FIRST`, the values are modified before compression,
+  thus impacting the compressed stream.  This rounding mode tends to be more
+  effective at reducing bias, but is invoked only with
+  :ref:`fixed-precision <mode-fixed-precision>` and
+  :ref:`fixed-accuracy <mode-fixed-accuracy>` compression modes.
+  Both of these rounding modes break the regression tests since they alter
+  the compressed or decompressed representation, but they may be used with
+  libraries built with the default rounding mode, :code:`ZFP_ROUND_NEVER`,
+  and versions of |zfp| that do not support a rounding mode with no adverse
+  effects.
+  Note: :c:macro:`ZFP_ROUNDING_MODE` is currently supported only by the
+  :code:`serial` and :code:`omp` :ref:`execution policies <execution>`.
+  Default: :code:`ZFP_ROUND_NEVER`.
+
+.. c:macro:: ZFP_WITH_TIGHT_ERROR
+
+  **Experimental feature**.  When enabled, this feature takes advantage of the
+  error reduction associated with proper rounding; see
+  :c:macro:`ZFP_ROUNDING_MODE`.  The reduced error due to rounding
+  allows the tolerance in :ref:`fixed-accuracy mode <mode-fixed-accuracy>`
+  to be satisfied using fewer bits of compressed data.  As a result, when
+  enabled, the observed maximum absolute error is closer to the tolerance and
+  the compression ratio is increased.  This feature requires the rounding mode
+  to be :code:`ZFP_ROUND_FIRST` or :code:`ZFP_ROUND_LAST` and is supported
+  only by the :code:`serial` and :code:`omp`
+  :ref:`execution policies <execution>`.
+  Default: undefined/off.
+
+.. c:macro:: ZFP_WITH_DAZ
+
+  When enabled, blocks consisting solely of subnormal floating-point numbers
+  (tiny numbers close to zero) are treated as blocks of all zeros
+  (DAZ = denormals-are-zero).  The main purpose of this option is to avoid the
+  potential for floating-point overflow in the |zfp| implementation that may
+  occur in step 2 of the
+  :ref:`lossy compression algorithm <algorithm-lossy>` when converting to
+  |zfp|'s block-floating-point representation (see
+  `Issue #119 <https://github.com/LLNL/zfp/issues/119>`__).
+  Such overflow tends to be benign but loses all precision and usually
+  results in "random" subnormals upon decompression.  When enabled, compressed
+  streams may differ slightly but are decompressed correctly by libraries
+  built without this option.  This option may break some regression tests.
+  Note: :c:macro:`ZFP_WITH_DAZ` is currently ignored by all
+  :ref:`execution policies <execution>` other than :code:`serial` and
+  :code:`omp`.
+  Default: undefined/off.
+
+.. c:macro:: ZFP_WITH_ALIGNED_ALLOC
+
+  Use aligned memory allocation in an attempt to align compressed blocks
+  on hardware cache lines.
+  Default: undefined/off.
+
+
+.. c:macro:: ZFP_WITH_CACHE_TWOWAY
+
+  Use a two-way skew-associative rather than direct-mapped cache.  This
+  incurs some overhead that may be offset by better cache utilization.
+  Default: undefined/off.
+
+
+.. c:macro:: ZFP_WITH_CACHE_FAST_HASH
+
+  Use a simpler hash function for cache line lookup.  This is faster but may
+  lead to more collisions.
+  Default: undefined/off.
+
+
+.. c:macro:: ZFP_WITH_CACHE_PROFILE
+
+  Enable cache profiling to gather and print statistics on cache hit and miss
+  rates.
+  Default: undefined/off.
+
+
+.. c:macro:: BIT_STREAM_WORD_TYPE
+
+  Unsigned integer type used for buffering bits.  Wider types tend to give
+  higher performance at the expense of lower
+  :ref:`bit rate granularity <q-granularity>`.  For portability of compressed
+  files between little and big endian platforms,
+  :c:macro:`BIT_STREAM_WORD_TYPE` should be set to :c:type:`uint8`.
+  Default: :c:type:`uint64`.
+
+
+.. c:macro:: ZFP_BIT_STREAM_WORD_SIZE
+
+  CMake macro for indirectly setting :c:macro:`BIT_STREAM_WORD_TYPE`.  Valid
+  values are 8, 16, 32, 64.
+  Default: 64.
+
+
+.. c:macro:: BIT_STREAM_STRIDED
+
+  Enable support for strided bit streams that allow for non-contiguous memory
+  layouts, e.g., to enable progressive access.
+  Default: undefined/off.
+
+
+.. c:macro:: CFP_NAMESPACE
+
+  Macro for renaming the outermost |cfp| namespace, e.g., to avoid name
+  clashes.
+  Default: :code:`cfp`.
+
+
+.. c:macro:: PYTHON_LIBRARY
+
+  Path to the Python library, e.g., :file:`/usr/lib/libpython2.7.so`.
+  CMake default: undefined/off.
+  GNU make default: off and ignored.
+
+
+.. c:macro:: PYTHON_INCLUDE_DIR
+
+  Path to the Python include directory, e.g., :file:`/usr/include/python2.7`.
+  CMake default: undefined/off.
+  GNU make default: off and ignored.
+
+
+
+Dependencies
+------------
+
+The core |zfp| library and compressed arrays require only a C89 and C++98
+compiler.  The optional components have additional dependencies, as outlined
+in the sections below.
+
+CMake
+^^^^^
+
+CMake builds require version 3.9 or later.  CMake is available
+`here <https://cmake.org>`__.
+
+OpenMP
+^^^^^^
+
+OpenMP support requires OpenMP 2.0 or later.
+
+CUDA
+^^^^
+
+CUDA support requires CUDA 7.0 or later, CMake, and a compatible host
+compiler (see :c:macro:`ZFP_WITH_CUDA`).
+
+C/C++
+^^^^^
+
+The |zfp| C library and |cfp| C wrappers around the compressed-array
+classes conform to the C90 standard
+(`ISO/IEC 9899:1990 <https://www.iso.org/standard/17782.html>`__).
+The C++ classes conform to the C++98 standard
+(`ISO/IEC 14882:1998 <https://www.iso.org/standard/25845.html>`__).
+
+Python
+^^^^^^
+
+The optional Python bindings require CMake and the following minimum
+versions:
+
+* Python: Python 2.7 & Python 3.5
+* Cython: 0.22
+* NumPy: 1.8.0
+
+The necessary dependencies can be installed using ``pip`` and the |zfp|
+:file:`requirements.txt`::
+
+  pip install -r $ZFP_ROOT/python/requirements.txt
+
+Fortran
+^^^^^^^
+
+The optional Fortran bindings require a Fortran 2018 compiler.
diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst
new file mode 100644
index 00000000..2757eab0
--- /dev/null
+++ b/docs/source/introduction.rst
@@ -0,0 +1,216 @@
+.. include:: defs.rst
+.. _introduction:
+
+Introduction
+============
+
+|zfp| is an open-source library for representing multidimensional numerical
+arrays in compressed form to reduce storage and bandwidth requirements.
+|zfp| consists of four main components:
+
+* An **efficient number format** for representing small, fixed-size *blocks*
+  of real values.  The |zfp| format usually provides higher accuracy per bit
+  stored than conventional number formats like IEEE 754 floating point.
+
+* A set of :ref:`classes <arrays>` that implement storage and manipulation
+  of a **multidimensional array data type**.  |zfp| arrays support high-speed
+  read and write random access to individual array elements and are a
+  drop-in replacement for :code:`std::vector` and native C/C++ arrays.
+  |zfp| arrays provide accessors like :ref:`proxy pointers <pointers>`,
+  :ref:`iterators <iterators>`, and :ref:`views <views>`.  |zfp| arrays
+  allow specifying an exact memory footprint or an error tolerance.
+
+* A :ref:`C library <hl-api>` for **streaming compression** of partial or
+  whole arrays of integers or floating-point numbers, e.g., for applications
+  that read and write large data sets to and from disk.  This library
+  supports fast, parallel (de)compression via OpenMP and CUDA.
+
+* A **command-line executable** for :ref:`compressing binary files <zfpcmd>`
+  of integer or floating-point arrays, e.g., as a substitute for
+  general-purpose compressors like :code:`gzip`.
+
+As a compressor, |zfp| is primarily *lossy*, meaning that the numerical
+values are usually only approximately represented, though the user may
+specify error tolerances to limit the amount of loss.  Fully
+:ref:`lossless compression <q-lossless>`, where values are represented
+exactly, is also supported.
+
+|zfp| is primarily written in C and C++ but also includes
+:ref:`Python <zfpy>` and :ref:`Fortran <zforp>` bindings.
+|zfp| is being developed at
+`Lawrence Livermore National Laboratory <https://www.llnl.gov>`__
+and is supported by the U.S. Department of Energy's
+`Exascale Computing Project <https://www.exascaleproject.org>`__.
+|zfp| is a
+`2023 R&D 100 Award Winner <https://www.rdworldonline.com/2023-rd-100-award-winners/>`__.
+
+
+Availability
+------------
+
+|zfp| is freely available as open source on
+`GitHub <https://github.com/LLNL/zfp>`__ and is distributed under the terms
+of a permissive three-clause :ref:`BSD license <license>`.  |zfp| may be
+:ref:`installed <installation>` using CMake or GNU Make.  Installation from
+source code is recommended for users who wish to configure the internals of
+|zfp| and select which components (e.g., programming models, language
+bindings) to install.
+
+|zfp| is also available through several package managers, including
+Conda (both `C/C++ <https://anaconda.org/conda-forge/zfp>`__ and
+`Python <https://anaconda.org/conda-forge/zfpy>`__ packages are available),
+`PIP <https://pypi.org/project/zfpy>`__,
+`Spack <https://packages.spack.io/package.html?name=zfp>`__, and
+`MacPorts <https://ports.macports.org/port/zfp/details/>`__.
+`Linux packages <https://repology.org/project/zfp/versions>`__ are available
+for several distributions and may be installed, for example, using :code:`apt`
+and :code:`yum`.
+
+
+.. _app-support:
+
+Application Support
+-------------------
+
+|zfp| has been incorporated into several independently developed applications,
+plugins, and formats, such as
+
+* `Compressed file I/O <https://adios2.readthedocs.io/en/latest/operators/CompressorZFP.html>`__
+  in `ADIOS <https://www.olcf.ornl.gov/center-projects/adios/>`__.
+
+* `Compression codec <https://www.blosc.org/posts/support-lossy-zfp/>`__
+  in the `BLOSC <https://www.blosc.org>`__ meta compressor.
+
+* `H5Z-ZFP <https://github.com/LLNL/H5Z-ZFP>`__ plugin for
+  `HDF5 <https://www.hdfgroup.org/solutions/hdf5/>`__\ |reg|.  |zfp| is also one of the
+  select compressors shipped with
+  `HDF5 binaries <https://www.hdfgroup.org/downloads/hdf5/>`__.
+
+* `Compression functions <https://www.intel.com/content/www/us/en/developer/articles/technical/parallel-compression-and-decompression-in-intel-integrated-performance-primitives-zfp-.html>`__
+  for Intel\ |reg| `Integrated Performance Primitives <https://software.intel.com/en-us/intel-ipp>`__.
+
+* `Compressed MPI messages <https://doi.org/10.1109/IPDPS49936.2021.00053>`__
+  in `MVAPICH2-GDR <https://mvapich.cse.ohio-state.edu/userguide/gdr/>`__.
+
+* `Compressed file I/O <https://www.openinventor.com/en/features/oil-gas-geoscience/zfp-compression/>`__
+  in `OpenInventor <https://www.openinventor.com>`__\ |tm|.
+
+* `Compression codec <https://community.opengroup.org/osdu/platform/domain-data-mgmt-services/seismic/open-zgy/-/raw/master/doc/compress.html>`__
+  underlying the
+  `OpenZGY <https://community.opengroup.org/osdu/platform/domain-data-mgmt-services/seismic/open-zgy>`__
+  format.
+
+* `Compressed file I/O <https://topology-tool-kit.github.io/doc/html/TopologicalCompression_8cpp_source.html>`__
+  in `TTK <https://topology-tool-kit.github.io>`__.
+
+* `Third-party module <https://gitlab.kitware.com/vtk/vtk/tree/master/ThirdParty/zfp>`__
+  in `VTK <https://vtk.org>`__.
+
+* `Compression worklet <http://m.vtk.org/documentation/namespacevtkm_1_1worklet_1_1zfp.html>`__
+  in `VTK-m <http://m.vtk.org>`__.
+
+* `Compression codec <https://numcodecs.readthedocs.io/en/stable/zfpy.html>`__ in `Zarr <https://github.com/zarr-developers/zarr-python>`__ via `numcodecs <https://github.com/zarr-developers/numcodecs>`__.
+
+See
+`this list <https://computing.llnl.gov/projects/floating-point-compression/related-projects>`__
+for other software products that support |zfp|.
+
+
+Usage
+-----
+
+The typical user will interact with |zfp| via one or more of its components,
+specifically
+
+* Via the :ref:`C API <hl-api>` when doing I/O in an application or otherwise
+  performing data (de)compression online.  High-speed, parallel compression is
+  supported via OpenMP and CUDA.
+
+* Via |zfp|'s in-memory :ref:`compressed-array classes <arrays>` when
+  performing computations on very large arrays that demand random access to
+  array elements, e.g., in visualization, data analysis, or even in numerical
+  simulation.  These classes can often substitute C/C++ arrays and STL
+  vectors in applications with minimal code changes.
+
+* Via the |zfp| :ref:`command-line tool <zfpcmd>` when compressing
+  binary files offline.
+
+* Via :ref:`third-party <app-support>` I/O libraries or tools that support |zfp|.
+
+
+
+Technology
+----------
+
+|zfp| compresses *d*-dimensional (1D, 2D, 3D, and 4D) arrays of integer or
+floating-point values by partitioning the array into cubical blocks of |4powd|
+values, i.e., 4, 16, 64, or 256 values for 1D, 2D, 3D, and 4D arrays,
+respectively.  Each such block is independently compressed to a fixed-
+or variable-length bit string, and these bit strings may be concatenated
+into a single stream of bits.
+
+|zfp| usually truncates each per-block bit string to a fixed number of bits
+to meet a storage budget or to some variable length needed to meet a given
+error tolerance, as dictated by the compressibility of the data.
+The bit string representing any given block may be truncated at any point and
+still yield a valid approximation.  The early bits are most important; later
+bits progressively refine the approximation, similar to how the last few bits
+in a floating-point number have less significance than the first several bits.
+The trailing bits can usually be discarded (zeroed) with limited impact on
+accuracy.
+
+|zfp| was originally designed for floating-point arrays only but has been
+extended to also support integer data, and could for instance be used to
+compress images and quantized volumetric data.  To achieve high compression
+ratios, |zfp| generally uses lossy but optionally error-bounded compression.
+Bit-for-bit lossless compression is also possible through one of |zfp|'s
+:ref:`compression modes <modes>`.
+
+|zfp| works best for 2D-4D arrays that exhibit spatial correlation, such as
+continuous fields from physics simulations, images, regularly sampled terrain
+surfaces, etc.  Although |zfp| also provides support for 1D arrays, e.g.,
+for audio signals or even unstructured floating-point streams, the
+compression scheme has not been well optimized for this use case, and
+compression ratio and quality may not be competitive with floating-point
+compressors designed specifically for 1D streams.
+
+In all use cases, it is important to know how to use |zfp|'s
+:ref:`compression modes <modes>` as well as what the
+:ref:`limitations <limitations>` of |zfp| are.  Although it is not critical
+to understand the
+:ref:`compression algorithm <algorithm>` itself, having some familiarity with
+its major components may help understand what to expect and how |zfp|'s
+parameters influence the result.
+
+
+Resources
+---------
+
+|zfp| is based on the :ref:`algorithm <algorithm>` described in the following
+paper:
+
+.. _tvcg-paper:
+
+  | Peter Lindstrom
+  | "`Fixed-Rate Compressed Floating-Point Arrays <https://www.researchgate.net/publication/264417607_Fixed-Rate_Compressed_Floating-Point_Arrays>`__"
+  | IEEE Transactions on Visualization and Computer Graphics
+  | 20(12):2674-2683, December 2014
+  | `doi:10.1109/TVCG.2014.2346458 <http://doi.org/10.1109/TVCG.2014.2346458>`__
+
+|zfp| has evolved since the original publication; the algorithm implemented
+in the current version is described in:
+
+.. _siam-paper:
+
+  | James Diffenderfer, Alyson Fox, Jeffrey Hittinger, Geoffrey Sanders, Peter Lindstrom
+  | "`Error Analysis of ZFP Compression for Floating-Point Data <https://www.researchgate.net/publication/331162006_Error_Analysis_of_ZFP_Compression_for_Floating-Point_Data>`__"
+  | SIAM Journal on Scientific Computing
+  | 41(3):A1867-A1898, 2019
+  | `doi:10.1137/18M1168832 <http://doi.org/10.1137/18M1168832>`__
+
+For more information on |zfp|, please see the |zfp|
+`website <http://zfp.llnl.gov>`__.
+For bug reports, please consult the
+`GitHub issue tracker <https://github.com/LLNL/zfp/issues>`__.
+For questions, comments, and requests, please
+`contact us <mailto:zfp@llnl.gov>`__.
diff --git a/docs/source/issues.rst b/docs/source/issues.rst
new file mode 100644
index 00000000..031be234
--- /dev/null
+++ b/docs/source/issues.rst
@@ -0,0 +1,414 @@
+.. include:: defs.rst
+
+.. _issues:
+
+Troubleshooting
+===============
+
+This section is intended for troubleshooting problems with |zfp|, in case
+any arise, and primarily focuses on how to correctly make use of |zfp|.  If
+the decompressed data looks nothing like the original data, or if the
+compression ratios obtained seem not so impressive, then it is very likely
+that array dimensions or compression parameters have not been set correctly,
+in which case this troubleshooting guide could help.
+
+The problems addressed in this section include:
+
+  #. :ref:`Is the data dimensionality correct? <p-dimensionality>`
+  #. :ref:`Do the compressor and decompressor agree on the dimensionality? <p-agree>`
+  #. :ref:`Have the "smooth" dimensions been identified? <p-smooth>`
+  #. :ref:`Are the array dimensions correct? <p-dimensions>`
+  #. :ref:`Are the array dimensions large enough? <p-large>`
+  #. :ref:`Is the data logically structured? <p-structured>`
+  #. :ref:`Is the data set embedded in a regular grid? <p-embedded>`
+  #. :ref:`Have fill values, NaNs, and infinities been removed? <p-invalid>`
+  #. :ref:`Is the byte order correct? <p-endian>`
+  #. :ref:`Is the floating-point precision correct? <p-float-precision>`
+  #. :ref:`Is the integer precision correct? <p-int-precision>`
+  #. :ref:`Is the data provided to the zfp executable a raw binary array? <p-binary>`
+  #. :ref:`Has the appropriate compression mode been set? <p-mode>`
+
+-------------------------------------------------------------------------------
+
+.. _p-dimensionality:
+
+P1: *Is the data dimensionality correct?*
+
+This is one of the most common problems.  First, make sure that |zfp| is given
+the correct dimensionality of the data.  For instance, an audio stream is a
+1D array, an image is a 2D array, and a volume grid is a 3D array, and a
+time-varying volume is a 4D array.  Sometimes a data set is a discrete
+collection of lower-dimensional objects.  For instance, a stack of unrelated
+images (of the same size) could be represented in C as a 3D array::
+
+  imstack[count][ny][nx]
+
+but since in this case the images are unrelated, no correlation would be
+expected along the third dimension---the underlying dimensionality of the data
+is here two.  In this case, the images could be compressed one at a time, or
+they could be compressed together by treating the array dimensions as::
+
+  imstack[count * ny][nx]
+
+Note that |zfp| partitions *d*-dimensional arrays into blocks of |4powd|
+values.  If *ny* above is not a multiple of four, then some blocks of |4by4|
+pixels will contain pixels from different images, which could hurt compression
+and/or quality.  Still, this way of creating a single image by stacking multiple
+images is far preferable over linearizing each image into a 1D signal, and
+then compressing the images as::
+
+  imstack[count][ny * nx]
+
+This loses the correlation along the *y* dimension and further introduces
+discontinuities unless *nx* is a multiple of four.
+
+Similarly to the example above, a 2D vector field
+::
+
+  vfield[ny][nx][2]
+
+could be declared as a 3D array, but the *x*- and *y*-components of the
+2D vectors are likely entirely unrelated.  In this case, each component
+needs to be compressed independently, either by rearranging the data
+as two scalar fields::
+
+  vfield[2][ny][nx]
+
+or by using strides (see also FAQ :ref:`#1 <q-vfields>`).  Note that in all
+these cases |zfp| will still compress the data, but if the dimensionality is
+not correct then the compression ratio will suffer.
+
+-------------------------------------------------------------------------------
+
+.. _p-agree:
+
+P2: *Do the compressor and decompressor agree on the dimensionality?*
+
+Consider compressing a 3D array::
+
+  double a[1][1][100]
+
+with *nx* = 100, *ny* = 1, *nz* = 1, then decompressing the result to a 1D
+array::
+
+  double b[100]
+
+with *nx* = 100.  Although the arrays *a* and *b* occupy the same amount of
+memory and are in C laid out similarly, these arrays are not equivalent to
+|zfp| because their dimensionalities differ.  |zfp| uses different CODECs
+to (de)compress 1D, 2D, 3D, and 4D arrays, and the 1D decompressor expects a
+compressed bit stream that corresponds to a 1D array.
+
+What happens in practice in this case is that the array *a* is compressed
+using |zfp|'s 3D CODEC, which first pads the array to
+::
+
+  double padded[4][4][100]
+
+When this array is correctly decompressed using the 3D CODEC, the padded
+values are generated but discarded.  |zfp|'s 1D decompressor, on the other
+hand, expects 100 values, not 100 |times| 4 |times| 4 = 1600 values, and
+therefore likely returns garbage.
+
+-------------------------------------------------------------------------------
+
+.. _p-smooth:
+
+P3: *Have the "smooth" dimensions been identified?*
+
+Closely related to :ref:`P1 <p-dimensionality>` above, some fields simply do
+not vary smoothly along all dimensions, and |zfp| can do a good job
+compressing only those dimensions that exhibit some coherence.  For instance,
+consider a table of stock prices indexed by date and stock::
+
+  price[stocks][dates]
+
+One could be tempted to compress this as a 2D array, but there is likely
+little to no correlation in prices between different stocks.  Each such
+time series should be compressed independently as a 1D signal.
+
+What about time-varying images like a video sequence?  In this case, it is
+likely that there is correlation over time, and that the value of a single
+pixel varies smoothly in time.  It is also likely that each image exhibits
+smoothness along its two spatial dimensions.  So this can be treated as a
+single, 3D data set.
+
+How about time-varying volumes, such as
+::
+
+  field[nt][nz][ny][nx]
+
+As of version |4drelease|, |zfp| supports compression of 4D arrays.  Since
+all dimensions in this example are likely to be correlated, the 4D array
+can be compressed directly.  Alternatively, the data could be organized by
+the three "smoothest" dimensions and compressed as a 3D array.  Given the
+organization above, the array could be treated as 3D::
+
+  field[nt * nz][ny][nx]
+
+Again, do **not** compress this as a 3D array with the *innermost*
+dimensions unfolded::
+
+  field[nt][nz][ny * nx]
+
+-------------------------------------------------------------------------------
+
+.. _p-dimensions:
+
+P4: *Are the array dimensions correct?*
+
+This is another common problem that seems obvious, but often the dimensions
+are accidentally transposed.  Assuming that the smooth dimensions have been
+identified, it is important that the dimensions are listed in the correct
+order.  For instance, if the data (in C notation) is organized as::
+
+  field[d1][d2][d3]
+
+then the data is organized in memory (or on disk) with the d3 dimension varying
+fastest, and hence *nx* = *d3*, *ny* = *d2*, *nz* = *d1* using the |zfp| naming
+conventions for the dimensions, e.g., the :ref:`zfp executable <zfpcmd>` should
+be invoked with::
+
+  zfp -3 d3 d2 d1
+
+in this case.  Things will go horribly wrong if |zfp| in this case is called
+with *nx* = *d1*, *ny* = *d2*, *nz* = *d3*.  The entire data set will still
+compress and decompress, but compression ratio and quality will likely suffer
+greatly.  See :ref:`this FAQ <q-layout>` for more details.
+
+-------------------------------------------------------------------------------
+
+.. _p-large:
+
+P5: *Are the array dimensions large enough?*
+
+|zfp| partitions *d*-dimensional data sets into blocks of |4powd| values, e.g.,
+in 3D a block consists of |4by4by4| values.  If the dimensions are not
+multiples of four, then |zfp| will "pad" the array to the next larger multiple
+of four.  Such padding can hurt compression.  In particular, if one or more of
+the array dimensions are small, then the overhead of such padding could be
+significant.
+
+Consider compressing a collection of 1000 small 3D arrays::
+
+  field[1000][5][14][2]
+
+|zfp| would first logically pad this to a larger array::
+
+  field[1000][8][16][4]
+
+which is (8 |times| 16 |times| 4) / (5 |times| 14 |times| 2) ~ 3.66 times
+larger.  Although such padding often compresses well, this still represents
+a significant overhead.
+
+If a large array has been partitioned into smaller pieces, it may be best to
+reassemble the larger array.  Or, when possible, ensure that the sub-arrays
+have dimensions that are multiples of four.
+
+-------------------------------------------------------------------------------
+
+.. _p-structured:
+
+P6: *Is the data logically structured?*
+
+|zfp| was designed for logically structured data, i.e., Cartesian grids.  It
+works much like an image compressor does, which assumes that the data set is a
+structured array of pixels, and it assumes that values vary reasonably smoothly
+on average, just like natural images tend to contain large regions of uniform
+color or smooth color gradients, like a blue sky, smoothly varying skin tones
+of a human's face, etc.  Many data sets are not represented on a regular grid.
+For instance, an array of particle *xyz* positions::
+
+  points[count][3]
+
+is a 2D array, but does not vary smoothly in either dimension.  Furthermore,
+such unstructured data sets need not be organized in any particular order;
+the particles could be listed in any arbitrary order.  One could attempt to
+sort the particles, for example by the *x* coordinate, to promote smoothness,
+but this would still leave the other two dimensions non-smooth.
+
+Sometimes the underlying dimensions are not even known, and only the total
+number of floating-point values is known.  For example, suppose we only knew
+that the data set contained *n* = *count* |times| 3 values.  One might be
+tempted to compress this using |zfp|'s 1-dimensional compressor, but once
+again this would not work well.  Such abuse of |zfp| is much akin to trying
+to compress an image using an audio compressor like mp3, or like compressing
+an *n*-sample piece of music as an *n*-by-one sized image using an image
+compressor like JPEG.  The results would likely not be very good.
+
+Some data sets are logically structured but geometrically irregular.  Examples
+include fields stored on Lagrangian meshes that have been warped, or on
+spectral element grids, which use a non-uniform grid spacing.  |zfp| assumes
+that the data has been regularly sampled in each dimension, and the more the
+geometry of the sampling deviates from uniform, the worse compression gets.
+Note that rectilinear grids with different but uniform grid spacing in each
+dimension are fine.  If your application uses very non-uniform sampling, then
+resampling onto a uniform grid (if possible) may be advisable.
+
+Other data sets are "block structured" and consist of piecewise structured
+grids that are "glued" together.  Rather than treating such data as
+unstructured 1D streams, consider partitioning the data set into independent
+(possibly overlapping) regular grids.
+
+-------------------------------------------------------------------------------
+
+.. _p-embedded:
+
+P7: *Is the data set embedded in a regular grid?*
+
+Some applications represent irregular geometry on a Cartesian grid, and leave
+portions of the domain unspecified.  Consider, for instance, sampling the
+density of the Earth onto a Cartesian grid.  Here the density for grid points
+outside the Earth is unspecified.
+
+In this case, |zfp| does best by initializing the "background field" to all
+zeros.  In |zfp|'s :ref:`fixed-accuracy mode <mode-fixed-accuracy>`, any
+"empty" block that consists of all zeros is represented using a single bit,
+and therefore the overhead of representing empty space can be kept low.
+
+-------------------------------------------------------------------------------
+
+.. _p-invalid:
+
+P8: *Have fill values, NaNs, and infinities been removed?*
+
+It is common to signal unspecified values using what is commonly called a
+"fill value," which is a special constant value that tends to be far out of
+range of normal values.  For instance, in climate modeling the ocean
+temperature over land is meaningless, and it is common to use a very large
+temperature value such as 1e30 to signal that the temperature is undefined
+for such grid points.
+
+Very large fill values do not play well with |zfp|, because they both introduce
+artificial discontinuities and pollute nearby values by expressing them all
+with respect to the common largest exponent within their block.  Assuming
+a fill value of 1e30, the value pi in the same block would be represented as::
+
+  0.00000000000000000000000000000314159... * 1e30
+
+Given finite precision, the small fraction would likely be replaced with zero,
+resulting in complete loss of the actual value being stored.
+
+Other applications use NaNs (special not-a-number values) or infinities as
+fill values.  These are even more problematic, because they do not have a
+defined exponent.  |zfp| relies on the C function :c:func:`frexp` to compute
+the exponent of the largest (in magnitude) value within a block, but produces
+unspecified behavior if that value is not finite.  
+
+|zfp| currently has no independent mechanism for handling fill values.  Ideally
+such special values would be signalled separately, e.g., using a bit mask, 
+and then replaced with zeros to ensure that they both compress well and do
+not pollute actual data.
+
+-------------------------------------------------------------------------------
+
+.. _p-endian:
+
+P9: *Is the byte order correct?*
+
+|zfp| generally works with the native byte order (e.g., little or big endian)
+of the machine it is compiled on.  One needs only be concerned with byte order
+when reading raw, binary data into the |zfp| executable, when exchanging
+compressed files across platforms, and when varying the bit stream word size
+on big endian machines (not common).  For instance, to compress a binary
+double-precision floating-point file stored in big endian byte order on a
+little endian machine, byte swapping must first be done.  For example, on
+Linux and macOS, 8-byte doubles can be byte swapped using::
+
+  objcopy -I binary -O binary --reverse-bytes=8 big.bin little.bin
+
+See also FAQ :ref:`#11 <q-portability>` for more discussion of byte order.
+
+-------------------------------------------------------------------------------
+
+.. _p-float-precision:
+
+P10: *Is the floating-point precision correct?*
+
+Another obvious problem: Please make sure that |zfp| is told whether the data
+to compress is an array of single- (32-bit) or double-precision (64-bit)
+values, e.g., by specifying the :option:`-f` or :option:`-d` options to the
+:program:`zfp` executable or by passing the appropriate :c:type:`zfp_type`
+to the C functions.
+
+-------------------------------------------------------------------------------
+
+.. _p-int-precision:
+
+P11: *Is the integer precision correct?*
+
+|zfp| currently supports compression of 31- or 63-bit signed integers.  Shorter
+integers (e.g., bytes, shorts) can be compressed but must first be promoted
+to one of the longer types.  This should always be done using |zfp|'s functions
+for :ref:`promotion and demotion <ll-utilities>`, which both perform bit
+shifting and biasing to handle both signed and unsigned types.  It is not
+sufficient to simply cast short integers to longer integers.  See also FAQs
+:ref:`#8 <q-integer>` and :ref:`#9 <q-int32>`.
+
+-------------------------------------------------------------------------------
+
+.. _p-binary:
+
+P12: *Is the data provided to the zfp executable a raw binary array?*
+
+|zfp| expects that the input file is a raw binary array of integers or
+floating-point values in the IEEE format, e.g., written to file using
+:c:func:`fwrite`.  Do not hand |zfp| a text file containing ASCII
+floating-point numbers.  Strip the file of any header information.
+Languages like Fortran tend to store with the array its size.  No such
+metadata may be embedded in the file.
+
+-------------------------------------------------------------------------------
+
+.. _p-mode:
+
+P13: *Has the appropriate compression mode been set?*
+
+|zfp| provides three different lossy
+:ref:`modes of compression <modes>` that trade storage and accuracy,
+plus one :ref:`lossless mode <mode-reversible>`.  In
+fixed-rate mode, the user specifies the exact number of bits (often in
+increments of a fraction of a bit) of compressed storage per value (but see
+FAQ :ref:`#18 <q-rate>` for caveats).  From the user's perspective, this
+seems a very desirable feature, since it provides for a direct mechanism for
+specifying how much storage to use.  However, there is often a large quality
+penalty associated with the fixed-rate mode, because each block of |4powd|
+values is allocated the same number of bits.  In practice, the information
+content over the data set varies significantly, which means that
+easy-to-compress regions are assigned too many bits, while too few bits are
+available to faithfully represent the more challenging-to-compress regions.
+Although one of the unique features of |zfp|, its fixed-rate mode should
+primarily be used only when random access to the data is needed.
+
+|zfp| also provides a fixed-precision mode, where the user specifies how many
+uncompressed significant bits to use to represent the floating-point fraction.
+This precision may not be exactly what people might normally think of.  For
+instance, the C float type is commonly referred to as 32-bit precision.
+However, the sign bit and exponent account for nine of those bits and do
+not contribute to the number of significant bits of precision.  Furthermore,
+for normal numbers, IEEE uses a hidden implicit one bit, so most float values
+actually have 24 bits of precision.  Furthermore, |zfp| uses a
+block-floating-point representation with a single exponent per block,
+which may cause some small values to have several leading zero bits and
+therefore less precision than requested.  Thus, the effective precision
+returned by |zfp| in its fixed-precision mode may in fact vary.  In practice,
+the precision requested is only an upper bound, though typically at least one
+value within a block has the requested precision.
+
+|zfp| supports a fixed-accuracy mode, which except in rare
+circumstances (see FAQ :ref:`#17 <q-tolerance>`) ensures that the absolute
+error is bounded, i.e., the difference between any decompressed and original
+value is at most the tolerance specified by the user (but usually several
+times smaller).  Whenever possible, we recommend using this compression mode,
+which depending on how easy the data is to compress results in the smallest
+compressed stream that respects the error tolerance.
+
+As of |zfp| |revrelease|, reversible (lossless) compression is available.
+The amount of lossless reduction of floating-point data is usually quite
+limited, however, especially for double-precision data.  Unless a bit-for-bit
+exact reconstruction is needed, we strongly advocate the use of lossy
+compression.
+
+Finally, there is also an expert mode that allows the user to combine the
+constraints of fixed rate, precision, and accuracy.  See the section on
+:ref:`compression modes <modes>` for more details.
diff --git a/docs/source/iterators.inc b/docs/source/iterators.inc
new file mode 100644
index 00000000..dc80a71d
--- /dev/null
+++ b/docs/source/iterators.inc
@@ -0,0 +1,238 @@
+.. index::
+   single: Iterators
+.. _iterators:
+
+Iterators
+---------
+
+.. cpp:namespace:: zfp
+
+.. cpp:class:: array1::const_iterator
+.. cpp:class:: array2::const_iterator
+.. cpp:class:: array3::const_iterator
+.. cpp:class:: array4::const_iterator
+.. cpp:class:: array1::iterator : public array1::const_iterator
+.. cpp:class:: array2::iterator : public array2::const_iterator
+.. cpp:class:: array3::iterator : public array3::const_iterator
+.. cpp:class:: array4::iterator : public array4::const_iterator
+
+Iterators provide a mechanism for traversing a possibly
+multi-dimensional array---or a :ref:`view <views>` of a subset of an
+array---without having to track array indices or bounds.
+They are also the preferred mechanism, compared to nested index loops, for
+initializing arrays, because they sequentially visit the array one block
+at a time.  This allows all elements of a block to be initialized together
+and ensures that the block is not compressed to memory before it has been
+fully initialized, which might otherwise result in poor compression and,
+consequently, larger compression errors than when the entire block is
+initialized as a whole.  Note that the iterator traversal order differs in
+this respect from traversal by :ref:`pointers <pointers>`.
+
+Blocks are visited in raster order similarly to how individual array
+elements are indexed, that is, first by *x*, then by *y*, then by *z*,
+etc.  Within each block, elements are visited in the same raster
+order.  All |4powd| values in a block are visited before moving on to the
+next block (see :numref:`view-indexing`).
+
+As of |zfp| |raiterrelease|, all iterators provided by |zfp| are random
+access iterators (previously, multi-dimensional array iterators were only
+forward iterators).  |zfp| iterators are
+`STL <http://www.cplusplus.com/reference/stl/>`_ compliant and can
+be used in STL algorithms that support random access iterators.
+
+|zfp| |crpirelease| adds :code:`const` qualified versions of iterators,
+given by the :code:`const_iterator` class.  Such iterators are available
+also for :ref:`read-only arrays <carray_classes>`.
+
+Per STL mandate, the iterators define several types:
+
+.. cpp:namespace:: zfp::arrayANY
+
+.. cpp:type:: iterator::value_type
+
+  The scalar type associated with the array that the iterator points into.
+
+----
+
+.. cpp:type:: iterator::difference_type
+
+  Difference between two iterators in number of array elements.
+
+----
+
+.. cpp:type:: iterator::reference
+
+  The :ref:`reference <references>` type associated with the iterator's parent
+  array class.
+
+----
+
+.. cpp:type:: iterator::pointer
+
+  The :ref:`pointer <pointers>` type associated with the iterator's parent
+  array class.
+
+----
+
+.. cpp:type:: iterator::iterator_category
+
+  Type of iterator: :cpp:type:`std::random_access_iterator_tag`.
+
+For const iterators, the following additional types are defined:
+
+.. cpp:type:: const_iterator::const_reference
+
+  The immutable reference type associated with the iterator's container class.
+
+.. cpp:type:: const_iterator::const_pointer
+
+  The immutable pointer type associated with the iterator's container class.
+
+The following operations are defined on iterators:
+
+.. cpp:function:: iterator iterator::operator=(const iterator& it)
+.. cpp:function:: const_iterator const_iterator::operator=(const const_iterator& it)
+
+  Assignment (copy) operator.  Make the iterator point to the same element
+  as *it*.
+
+----
+
+.. cpp:function:: reference iterator::operator*() const
+.. cpp:function:: const_reference const_iterator::operator*() const
+
+  Dereference operator.  Return (const) reference to the value pointed to by
+  the iterator.
+
+----
+
+.. cpp:function:: reference iterator::operator[](difference_type d) const
+.. cpp:function:: const_reference const_iterator::operator[](difference_type d) const
+
+  Offset dereference operator.  Return (const) reference to the value *d*
+  elements relative to the current element in the iteration sequence (*d* may
+  be negative).  This operator executes in constant time regardless of array
+  dimensionality but is more costly than sequential iteration via
+  :cpp:func:`iterator::operator++`.
+
+----
+
+.. cpp:function:: iterator iterator::operator+(difference_type d) const
+.. cpp:function:: const_iterator const_iterator::operator+(difference_type d) const
+
+  Return a new iterator that has been incremented by *d*.
+
+----
+
+.. cpp:function:: iterator iterator::operator-(difference_type d) const
+.. cpp:function:: const_iterator const_iterator::operator-(difference_type d) const
+
+  Return a new iterator that has been decremented by *d*.
+
+----
+
+.. cpp:function:: difference_type iterator::operator-(const iterator& it) const
+.. cpp:function:: difference_type const_iterator::operator-(const const_iterator& it) const
+
+  Return difference between this iterator and *it* in number of elements.
+  The difference *p* |minus| *q* between two iterators, *p* and *q*, is
+  negative if *p* < *q*.  The iterators must refer to elements in the same
+  array.
+
+----
+
+.. cpp:function:: bool iterator::operator==(const iterator& it) const
+.. cpp:function:: bool const_iterator::operator==(const const_iterator& it) const
+
+  Return true if the two iterators point to the same element.
+
+----
+
+.. cpp:function:: bool iterator::operator!=(const iterator& it) const
+.. cpp:function:: bool const_iterator::operator!=(const const_iterator& it) const
+
+  Return true if the two iterators do not point to the same element.
+
+----
+
+.. _iter_inequalities:
+.. cpp:function:: bool iterator::operator<=(const iterator& it) const
+.. cpp:function:: bool iterator::operator>=(const iterator& it) const
+.. cpp:function:: bool iterator::operator<(const iterator& it) const
+.. cpp:function:: bool iterator::operator>(const iterator& it) const
+.. cpp:function:: bool const_iterator::operator<=(const const_iterator& it) const
+.. cpp:function:: bool const_iterator::operator>=(const const_iterator& it) const
+.. cpp:function:: bool const_iterator::operator<(const const_iterator& it) const
+.. cpp:function:: bool const_iterator::operator>(const const_iterator& it) const
+
+  Return true if the two iterators satisfy the given relationship.
+  For two iterators, *p* and *q*, within the same array, *p* < *q*
+  if and only if *q* can be reached by incrementing *p* one or more times.
+
+----
+
+.. cpp:function:: iterator& iterator::operator++()
+.. cpp:function:: const_iterator& const_iterator::operator++()
+
+  Prefix increment (:code:`++it`).  Return a reference to the
+  incremented iterator.
+
+----
+
+.. cpp:function:: iterator iterator::operator++(int)
+.. cpp:function:: const_iterator const_iterator::operator++(int)
+
+  Postfix increment (:code:`it++`).  Return the value of the iterator
+  before being incremented.
+
+----
+
+.. cpp:function:: iterator& iterator::operator--()
+.. cpp:function:: const_iterator& const_iterator::operator--()
+
+  Prefix decrement (:code:`--it`).  Return a reference to the
+  decremented iterator.
+
+----
+
+.. cpp:function:: iterator iterator::operator--(int)
+.. cpp:function:: const_iterator const_iterator::operator--(int)
+
+  Postfix decrement (:code:`it--`).  Return the value of the
+  iterator before being decremented.
+
+----
+
+.. cpp:function:: iterator iterator::operator+=(difference_type d)
+.. cpp:function:: const_iterator const_iterator::operator+=(difference_type d)
+
+  Increment iterator *d* times.  Return value of incremented iterator.
+  Although :cpp:expr:`++it` and :cpp:expr:`it += 1` are semantically
+  equivalent, the former is more efficient for multidimensional arrays.
+
+----
+
+.. cpp:function:: iterator iterator::operator-=(difference_type d)
+.. cpp:function:: const_iterator const_iterator::operator-=(difference_type d)
+
+  Decrement iterator *d* times.  Return value of decremented iterator.
+  Although :cpp:expr:`--it` and :cpp:expr:`it -= 1` are semantically
+  equivalent, the former is more efficient for multidimensional arrays.
+
+----
+
+.. cpp:function:: size_t iterator::i() const
+.. cpp:function:: size_t iterator::j() const
+.. cpp:function:: size_t iterator::k() const
+.. cpp:function:: size_t iterator::l() const
+.. cpp:function:: size_t const_iterator::i() const
+.. cpp:function:: size_t const_iterator::j() const
+.. cpp:function:: size_t const_iterator::k() const
+.. cpp:function:: size_t const_iterator::l() const
+
+  Return array index or local view index of element pointed to by the
+  iterator.
+  :cpp:func:`iterator::i` is defined for all arrays.
+  :cpp:func:`iterator::j` is defined only for 2D, 3D, and 4D arrays.
+  :cpp:func:`iterator::k` is defined only for 3D and 4D arrays.
+  :cpp:func:`iterator::l` is defined only for 4D arrays.
diff --git a/docs/source/license.rst b/docs/source/license.rst
new file mode 100644
index 00000000..1129b833
--- /dev/null
+++ b/docs/source/license.rst
@@ -0,0 +1,59 @@
+.. _license:
+
+License
+=======
+
+| Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC
+| All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the disclaimer below.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the disclaimer (as noted below) in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the LLNS/LLNL nor the names of its contributors may
+be used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY,
+LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+Notice
+------
+
+This work was produced under the auspices of the U.S. Department of
+Energy by Lawrence Livermore National Laboratory under Contract
+DE-AC52-07NA27344.
+
+This work was prepared as an account of work sponsored by an agency of
+the United States Government. Neither the United States Government nor
+Lawrence Livermore National Security, LLC, nor any of their employees
+makes any warranty, expressed or implied, or assumes any legal liability
+or responsibility for the accuracy, completeness, or usefulness of any
+information, apparatus, product, or process disclosed, or represents that
+its use would not infringe privately owned rights.
+
+Reference herein to any specific commercial product, process, or service
+by trade name, trademark, manufacturer, or otherwise does not necessarily
+constitute or imply its endorsement, recommendation, or favoring by the
+United States Government or Lawrence Livermore National Security, LLC.
+
+The views and opinions of authors expressed herein do not necessarily
+state or reflect those of the United States Government or Lawrence
+Livermore National Security, LLC, and shall not be used for advertising
+or product endorsement purposes.
diff --git a/docs/source/limitations.rst b/docs/source/limitations.rst
new file mode 100644
index 00000000..fc0dd37c
--- /dev/null
+++ b/docs/source/limitations.rst
@@ -0,0 +1,87 @@
+.. include:: defs.rst
+
+.. _limitations:
+
+Limitations
+===========
+
+|zfp| has evolved over the years from a research prototype to a production
+quality library.  However, the API and even the compression codec are
+still undergoing changes as new important features are added.
+
+Below is a list of known limitations of the current version of |zfp|.
+See the section on :ref:`directions` for a discussion of planned features
+that will address some of these limitations.
+
+- Special floating-point values like infinity and NaN are supported in
+  reversible mode but not in |zfp|'s lossy compression modes.  Subnormal
+  floating-point numbers are, however, correctly handled.  There is an
+  implicit assumption that floating point conforms to IEEE-754, though
+  extensions to other floating-point formats should be possible with
+  minor effort.
+
+- The optional |zfp| :ref:`header <zfp-header>` supports arrays with at
+  most 2\ :sup:`48` elements.  The |zfp| header limits each dimension
+  to 2\ :sup:`48/d` elements in a *d*-dimensional array, i.e.,
+  2\ :sup:`48`, 2\ :sup:`24`, 2\ :sup:`16`, and 2\ :sup:`12` for 1D through
+  4D arrays, respectively.  Note that this limitation applies only to
+  the header; array dimensions are otherwise limited only by the size
+  supported by :code:`size_t`.
+
+- The :ref:`compressed-array classes <arrays>` have additional size
+  restrictions.  The :ref:`cache <caching>` supports at most
+  2\ :sup:`p-1` - 1 blocks, where *p* is the number of bits in a :code:`uint`
+  (usually *p* = 32).  Consequently, the number of elements in a
+  *d*-dimensional compressed array is at most
+  |4powd| |times| (2\ :sup:`p-1` - 1), or about 8 billion elements for 1D
+  arrays.
+
+- Conventional pointers and references to individual array elements are
+  not available.  That is, constructions like :code:`double* ptr = &a[i];`
+  are not possible when :code:`a` is a |zfp| array.  However, as of
+  |zfp| |proxyrelease|, :ref:`proxy pointers <pointers>` are available that
+  act much like pointers to uncompressed data.  Similarly, operators :code:`[]`
+  and :code:`()` do not return regular C++ references.  Instead, a
+  :ref:`proxy reference <references>` class is used (similar to how
+  `STL bit vectors <https://en.cppreference.com/w/cpp/container/vector_bool>`__
+  are implemented).  These proxy references and pointers can, however, safely
+  be passed to functions and used where regular references and pointers can.
+
+- The :ref:`read-only array classes <carray_classes>` do not yet support
+  (de)serialization.
+
+- |zfp| can potentially provide higher precision than conventional float
+  and double arrays, but the interface currently does not expose this.
+  For example, such added precision could be useful in finite difference
+  computations, where catastrophic cancellation can be an issue when
+  insufficient precision is available.
+
+- Only single and double precision floating types are supported.
+  Generalizations to IEEE half and quad precision would be useful.  For
+  instance, compressed 64-bit-per-value storage of 128-bit quad-precision
+  numbers could greatly improve the accuracy of double-precision
+  floating-point computations using the same amount of storage.
+  The |zfp| compressed-array classes do not yet support integer scalar
+  types.
+
+- Complex-valued arrays are not directly supported.  Real and imaginary
+  components must be stored as separate arrays, which may result in lost
+  opportunities for compression, e.g., if the complex magnitude is constant
+  and only the phase varies.
+
+- Version |omprelease| adds support for OpenMP compression.  However,
+  OpenMP decompression is not yet supported.
+
+- Version |cudarelease| adds support for CUDA compression and decompression.
+  However, only the fixed-rate compression mode is so far supported.
+  The CUDA implementation is further subject to
+  :ref:`additional limitations <cuda-limitations>`.
+
+- The |cfp| :ref:`C wrappers <cfp>` for |zfp|'s compressed arrays support
+  only a subset of the C++ API.  |zfp| |4darrrelease| adds support for proxy
+  references, pointers, and iterators, but views and read-only arrays are
+  not yet supported.  Furthermore, |cfp| works only with the |zfp| codec.
+
+- The Python and Fortran bindings do not yet support |zfp|'s compressed-array
+  classes.  Moreover, only a select subset of the
+  :ref:`high-level API <hl-api>` is available via Python.
diff --git a/docs/source/low-level-api.rst b/docs/source/low-level-api.rst
new file mode 100644
index 00000000..27a2ba7c
--- /dev/null
+++ b/docs/source/low-level-api.rst
@@ -0,0 +1,437 @@
+.. include:: defs.rst
+
+.. _ll-api:
+
+Low-Level C API
+===============
+
+The |libzfp| low-level C API provides functionality for compressing individual
+*d*-dimensional blocks of up to |4powd| values.  If a block is not complete,
+i.e., contains fewer than |4powd| values, then |zfp|'s partial
+block support should be favored over padding the block with, say, zeros
+or other fill values.  The blocks (de)compressed need not be contiguous
+and can be gathered from or scattered to a larger array by setting
+appropriate strides.  As of |zfp| |cpprelease|, templated C++ wrappers
+are also available to simplify calling the low-level API from C++.
+The C API is declared in :file:`zfp.h`; the C++ wrappers are found in
+:file:`zfp.hpp`.
+
+.. note::
+  Because the unit of parallel work in |zfp| is a *block*, and because the
+  low-level API operates on individual blocks, this API supports only the
+  the serial :ref:`execution policy <exec-policies>`.  Any other execution
+  policy set in :c:type:`zfp_stream` is silently ignored.  For parallel
+  execution, see the :ref:`high-level API <hl-api>`.
+
+The following topics are available:
+
+* :ref:`ll-stream`
+
+* :ref:`ll-encoder`
+
+  * :ref:`ll-1d-encoder`
+  * :ref:`ll-2d-encoder`
+  * :ref:`ll-3d-encoder`
+  * :ref:`ll-4d-encoder`
+
+* :ref:`ll-decoder`
+
+  * :ref:`ll-1d-decoder`
+  * :ref:`ll-2d-decoder`
+  * :ref:`ll-3d-decoder`
+  * :ref:`ll-4d-decoder`
+
+* :ref:`ll-utilities`
+
+* :ref:`ll-cpp-wrappers`
+
+.. _ll-stream:
+
+Stream Manipulation
+-------------------
+
+.. c:function:: size_t zfp_stream_flush(zfp_stream* stream)
+
+  Flush bit stream to write out any buffered bits.  This function must be
+  must be called after the last encode call.  The bit stream is aligned on
+  a stream word boundary following this call.  The number of zero-bits
+  written, if any, is returned.
+
+----
+
+.. c:function:: size_t zfp_stream_align(zfp_stream* stream)
+
+  Align bit stream on next word boundary.  This function is analogous to
+  :c:func:`zfp_stream_flush`, but for decoding.  That is, wherever the
+  encoder flushes the stream, the decoder should align it to ensure
+  synchronization between encoder and decoder.  The number of bits skipped,
+  if any, is returned.
+
+.. _ll-encoder:
+
+Encoder
+-------
+
+A function is available for encoding whole or partial blocks of each scalar
+type and dimensionality.  These functions return the number of bits of
+compressed storage for the block being encoded, or zero upon failure.
+
+.. _ll-1d-encoder:
+
+1D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_encode_block_int32_1(zfp_stream* stream, const int32* block)
+.. c:function:: size_t zfp_encode_block_int64_1(zfp_stream* stream, const int64* block)
+.. c:function:: size_t zfp_encode_block_float_1(zfp_stream* stream, const float* block)
+.. c:function:: size_t zfp_encode_block_double_1(zfp_stream* stream, const double* block)
+
+  Encode 1D contiguous block of 4 values.
+
+----
+
+.. c:function:: size_t zfp_encode_block_strided_int32_1(zfp_stream* stream, const int32* p, ptrdiff_t sx)
+.. c:function:: size_t zfp_encode_block_strided_int64_1(zfp_stream* stream, const int64* p, ptrdiff_t sx)
+.. c:function:: size_t zfp_encode_block_strided_float_1(zfp_stream* stream, const float* p, ptrdiff_t sx)
+.. c:function:: size_t zfp_encode_block_strided_double_1(zfp_stream* stream, const double* p, ptrdiff_t sx)
+
+  Encode 1D complete block from strided array with stride *sx*.
+
+----
+
+.. c:function:: size_t zfp_encode_partial_block_strided_int32_1(zfp_stream* stream, const int32* p, size_t nx, ptrdiff_t sx)
+.. c:function:: size_t zfp_encode_partial_block_strided_int64_1(zfp_stream* stream, const int64* p, size_t nx, ptrdiff_t sx)
+.. c:function:: size_t zfp_encode_partial_block_strided_float_1(zfp_stream* stream, const float* p, size_t nx, ptrdiff_t sx)
+.. c:function:: size_t zfp_encode_partial_block_strided_double_1(zfp_stream* stream, const double* p, size_t nx, ptrdiff_t sx)
+
+  Encode 1D partial block of size *nx* from strided array with stride *sx*.
+
+.. _ll-2d-encoder:
+
+2D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_encode_block_int32_2(zfp_stream* stream, const int32* block)
+.. c:function:: size_t zfp_encode_block_int64_2(zfp_stream* stream, const int64* block)
+.. c:function:: size_t zfp_encode_block_float_2(zfp_stream* stream, const float* block)
+.. c:function:: size_t zfp_encode_block_double_2(zfp_stream* stream, const double* block)
+
+  Encode 2D contiguous block of |4by4| values.
+
+----
+
+.. c:function:: size_t zfp_encode_block_strided_int32_2(zfp_stream* stream, const int32* p, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_encode_block_strided_int64_2(zfp_stream* stream, const int64* p, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_encode_block_strided_float_2(zfp_stream* stream, const float* p, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_encode_block_strided_double_2(zfp_stream* stream, const double* p, ptrdiff_t sx, ptrdiff_t sy)
+
+  Encode 2D complete block from strided array with strides *sx* and *sy*.
+
+----
+
+.. c:function:: size_t zfp_encode_partial_block_strided_int32_2(zfp_stream* stream, const int32* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_encode_partial_block_strided_int64_2(zfp_stream* stream, const int64* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_encode_partial_block_strided_float_2(zfp_stream* stream, const float* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_encode_partial_block_strided_double_2(zfp_stream* stream, const double* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+
+  Encode 2D partial block of size *nx* |times| *ny* from strided array with
+  strides *sx* and *sy*.
+
+.. _ll-3d-encoder:
+
+3D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_encode_block_int32_3(zfp_stream* stream, const int32* block)
+.. c:function:: size_t zfp_encode_block_int64_3(zfp_stream* stream, const int64* block)
+.. c:function:: size_t zfp_encode_block_float_3(zfp_stream* stream, const float* block)
+.. c:function:: size_t zfp_encode_block_double_3(zfp_stream* stream, const double* block)
+
+  Encode 3D contiguous block of |4by4by4| values.
+
+----
+
+.. c:function:: size_t zfp_encode_block_strided_int32_3(zfp_stream* stream, const int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_encode_block_strided_int64_3(zfp_stream* stream, const int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_encode_block_strided_float_3(zfp_stream* stream, const float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_encode_block_strided_double_3(zfp_stream* stream, const double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+
+  Encode 3D complete block from strided array with strides *sx*, *sy*, and
+  *sz*.
+
+----
+
+.. c:function:: size_t zfp_encode_partial_block_strided_int32_3(zfp_stream* stream, const int32* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_encode_partial_block_strided_int64_3(zfp_stream* stream, const int64* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_encode_partial_block_strided_float_3(zfp_stream* stream, const float* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_encode_partial_block_strided_double_3(zfp_stream* stream, const double* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+
+  Encode 3D partial block of size *nx* |times| *ny* |times| *nz* from strided
+  array with strides *sx*, *sy*, and *sz*.
+
+.. _ll-4d-encoder:
+
+4D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_encode_block_int32_4(zfp_stream* stream, const int32* block)
+.. c:function:: size_t zfp_encode_block_int64_4(zfp_stream* stream, const int64* block)
+.. c:function:: size_t zfp_encode_block_float_4(zfp_stream* stream, const float* block)
+.. c:function:: size_t zfp_encode_block_double_4(zfp_stream* stream, const double* block)
+
+  Encode 4D contiguous block of |4by4by4by4| values.
+
+----
+
+.. c:function:: size_t zfp_encode_block_strided_int32_4(zfp_stream* stream, const int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_encode_block_strided_int64_4(zfp_stream* stream, const int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_encode_block_strided_float_4(zfp_stream* stream, const float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_encode_block_strided_double_4(zfp_stream* stream, const double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Encode 4D complete block from strided array with strides *sx*, *sy*, *sz*, and
+  *sw*.
+
+----
+
+.. c:function:: size_t zfp_encode_partial_block_strided_int32_4(zfp_stream* stream, const int32* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_encode_partial_block_strided_int64_4(zfp_stream* stream, const int64* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_encode_partial_block_strided_float_4(zfp_stream* stream, const float* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_encode_partial_block_strided_double_4(zfp_stream* stream, const double* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Encode 4D partial block of size *nx* |times| *ny* |times| *nz* |times| *nw*
+  from strided array with strides *sx*, *sy*, *sz*, and *sw*.
+
+.. _ll-decoder:
+
+Decoder
+-------
+
+Each function below decompresses a single block and returns the number of bits
+of compressed storage consumed.  See corresponding encoder functions above for
+further details.
+
+.. _ll-1d-decoder:
+
+1D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_decode_block_int32_1(zfp_stream* stream, int32* block)
+.. c:function:: size_t zfp_decode_block_int64_1(zfp_stream* stream, int64* block)
+.. c:function:: size_t zfp_decode_block_float_1(zfp_stream* stream, float* block)
+.. c:function:: size_t zfp_decode_block_double_1(zfp_stream* stream, double* block)
+
+  Decode 1D contiguous block of 4 values.
+
+----
+
+.. c:function:: size_t zfp_decode_block_strided_int32_1(zfp_stream* stream, int32* p, ptrdiff_t sx)
+.. c:function:: size_t zfp_decode_block_strided_int64_1(zfp_stream* stream, int64* p, ptrdiff_t sx)
+.. c:function:: size_t zfp_decode_block_strided_float_1(zfp_stream* stream, float* p, ptrdiff_t sx)
+.. c:function:: size_t zfp_decode_block_strided_double_1(zfp_stream* stream, double* p, ptrdiff_t sx)
+
+  Decode 1D complete block to strided array with stride *sx*.
+
+----
+
+.. c:function:: size_t zfp_decode_partial_block_strided_int32_1(zfp_stream* stream, int32* p, size_t nx, ptrdiff_t sx)
+.. c:function:: size_t zfp_decode_partial_block_strided_int64_1(zfp_stream* stream, int64* p, size_t nx, ptrdiff_t sx)
+.. c:function:: size_t zfp_decode_partial_block_strided_float_1(zfp_stream* stream, float* p, size_t nx, ptrdiff_t sx)
+.. c:function:: size_t zfp_decode_partial_block_strided_double_1(zfp_stream* stream, double* p, size_t nx, ptrdiff_t sx)
+
+  Decode 1D partial block of size *nx* to strided array with stride *sx*.
+
+.. _ll-2d-decoder:
+
+2D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_decode_block_int32_2(zfp_stream* stream, int32* block)
+.. c:function:: size_t zfp_decode_block_int64_2(zfp_stream* stream, int64* block)
+.. c:function:: size_t zfp_decode_block_float_2(zfp_stream* stream, float* block)
+.. c:function:: size_t zfp_decode_block_double_2(zfp_stream* stream, double* block)
+
+  Decode 2D contiguous block of |4by4| values.
+
+----
+
+.. c:function:: size_t zfp_decode_block_strided_int32_2(zfp_stream* stream, int32* p, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_decode_block_strided_int64_2(zfp_stream* stream, int64* p, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_decode_block_strided_float_2(zfp_stream* stream, float* p, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_decode_block_strided_double_2(zfp_stream* stream, double* p, ptrdiff_t sx, ptrdiff_t sy)
+
+  Decode 2D complete block to strided array with strides *sx* and *sy*.
+
+----
+
+.. c:function:: size_t zfp_decode_partial_block_strided_int32_2(zfp_stream* stream, int32* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_decode_partial_block_strided_int64_2(zfp_stream* stream, int64* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_decode_partial_block_strided_float_2(zfp_stream* stream, float* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. c:function:: size_t zfp_decode_partial_block_strided_double_2(zfp_stream* stream, double* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+
+  Decode 2D partial block of size *nx* |times| *ny* to strided array with
+  strides *sx* and *sy*.
+
+.. _ll-3d-decoder:
+
+3D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_decode_block_int32_3(zfp_stream* stream, int32* block)
+.. c:function:: size_t zfp_decode_block_int64_3(zfp_stream* stream, int64* block)
+.. c:function:: size_t zfp_decode_block_float_3(zfp_stream* stream, float* block)
+.. c:function:: size_t zfp_decode_block_double_3(zfp_stream* stream, double* block)
+
+  Decode 3D contiguous block of |4by4by4| values.
+
+----
+
+.. c:function:: size_t zfp_decode_block_strided_int32_3(zfp_stream* stream, int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_decode_block_strided_int64_3(zfp_stream* stream, int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_decode_block_strided_float_3(zfp_stream* stream, float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_decode_block_strided_double_3(zfp_stream* stream, double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+
+  Decode 3D complete block to strided array with strides *sx*, *sy*, and *sz*.
+
+----
+
+.. c:function:: size_t zfp_decode_partial_block_strided_int32_3(zfp_stream* stream, int32* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_decode_partial_block_strided_int64_3(zfp_stream* stream, int64* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_decode_partial_block_strided_float_3(zfp_stream* stream, float* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. c:function:: size_t zfp_decode_partial_block_strided_double_3(zfp_stream* stream, double* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+
+  Decode 3D partial block of size *nx* |times| *ny* |times| *nz* to strided
+  array with strides *sx*, *sy*, and *sz*.
+
+.. _ll-4d-decoder:
+
+4D Data
+^^^^^^^
+
+.. c:function:: size_t zfp_decode_block_int32_4(zfp_stream* stream, int32* block)
+.. c:function:: size_t zfp_decode_block_int64_4(zfp_stream* stream, int64* block)
+.. c:function:: size_t zfp_decode_block_float_4(zfp_stream* stream, float* block)
+.. c:function:: size_t zfp_decode_block_double_4(zfp_stream* stream, double* block)
+
+  Decode 4D contiguous block of |4by4by4by4| values.
+
+----
+
+.. c:function:: size_t zfp_decode_block_strided_int32_4(zfp_stream* stream, int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_decode_block_strided_int64_4(zfp_stream* stream, int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_decode_block_strided_float_4(zfp_stream* stream, float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_decode_block_strided_double_4(zfp_stream* stream, double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Decode 4D complete block to strided array with strides *sx*, *sy*, *sz*, and *sw*.
+
+----
+
+.. c:function:: size_t zfp_decode_partial_block_strided_int32_4(zfp_stream* stream, int32* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_decode_partial_block_strided_int64_4(zfp_stream* stream, int64* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_decode_partial_block_strided_float_4(zfp_stream* stream, float* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+.. c:function:: size_t zfp_decode_partial_block_strided_double_4(zfp_stream* stream, double* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Decode 4D partial block of size *nx* |times| *ny* |times| *nz* |times| *nw*
+  to strided array with strides *sx*, *sy*, *sz*, and *sw*.
+
+.. _ll-utilities:
+
+Utility Functions
+-----------------
+
+These functions convert 8- and 16-bit signed and unsigned integer data to
+(by promoting) and from (by demoting) 32-bit integers that can be
+(de)compressed by |zfp|'s :code:`int32` functions.  These conversion functions
+are preferred over simple casting since they eliminate the redundant leading
+zeros that would otherwise have to be compressed, and they apply the
+appropriate bias for unsigned integer data.
+
+----
+
+.. c:function:: void zfp_promote_int8_to_int32(int32* oblock, const int8* iblock, uint dims)
+.. c:function:: void zfp_promote_uint8_to_int32(int32* oblock, const uint8* iblock, uint dims)
+.. c:function:: void zfp_promote_int16_to_int32(int32* oblock, const int16* iblock, uint dims)
+.. c:function:: void zfp_promote_uint16_to_int32(int32* oblock, const uint16* iblock, uint dims)
+
+  Convert *dims*-dimensional contiguous block to 32-bit integer type.
+  Use *dims* = 0 to promote a single value.
+
+----
+
+.. c:function:: void zfp_demote_int32_to_int8(int8* oblock, const int32* iblock, uint dims)
+.. c:function:: void zfp_demote_int32_to_uint8(uint8* oblock, const int32* iblock, uint dims)
+.. c:function:: void zfp_demote_int32_to_int16(int16* oblock, const int32* iblock, uint dims)
+.. c:function:: void zfp_demote_int32_to_uint16(uint16* oblock, const int32* iblock, uint dims)
+
+  Convert *dims*-dimensional contiguous block from 32-bit integer type.
+  Use *dims* = 0 to demote a single value.
+
+.. _ll-cpp-wrappers:
+
+C++ Wrappers
+------------
+
+.. cpp:namespace:: zfp
+
+To facilitate calling the low-level API from C++, a number of wrappers are
+available (as of |zfp| |cpprelease|) that are templated on scalar type and
+dimensionality.  Each function of the form :code:`zfp_function_type_dims`,
+where *type* denotes scalar type and *dims* denotes dimensionality, has a
+corresponding C++ wrapper :code:`zfp::function<type, dims>`.  For example,
+the C function :c:func:`zfp_encode_block_float_2` has a C++ wrapper
+:cpp:func:`zfp::encode_block\<float, 2>`.  Often *dims* can be inferred from
+the parameters of overloaded functions, in which case it is omitted as
+template parameter.  The C++ wrappers are defined in :file:`zfp.hpp`.
+
+Encoder
+^^^^^^^
+
+.. cpp:function:: template<typename Scalar, uint dims> size_t encode_block(zfp_stream* stream, const Scalar* block)
+
+  Encode contiguous block of dimensionality *dims*.
+
+----
+
+.. cpp:function:: template<typename Scalar> size_t encode_block_strided(zfp_stream* stream, const Scalar* p, ptrdiff_t sx)
+.. cpp:function:: template<typename Scalar> size_t encode_block_strided(zfp_stream* stream, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+.. cpp:function:: template<typename Scalar> size_t encode_block_strided(zfp_stream* stream, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. cpp:function:: template<typename Scalar> size_t encode_block_strided(zfp_stream* stream, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Encode complete block from strided array with strides *sx*, *sy*, *sz*, and
+  *sw*.
+
+----
+
+.. cpp:function:: template<typename Scalar> size_t encode_partial_block_strided(zfp_stream* stream, const Scalar* p, size_t nx, ptrdiff_t sx)
+.. cpp:function:: template<typename Scalar> size_t encode_partial_block_strided(zfp_stream* stream, const Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. cpp:function:: template<typename Scalar> size_t encode_partial_block_strided(zfp_stream* stream, const Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. cpp:function:: template<typename Scalar> size_t encode_partial_block_strided(zfp_stream* stream, const Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Encode partial block of size *nx* |times| *ny* |times| *nz* |times| *nw*
+  from strided array with strides *sx*, *sy*, *sz*, and *sw*.
+
+Decoder
+^^^^^^^
+
+.. cpp:function:: template<typename Scalar, uint dims> size_t decode_block(zfp_stream* stream, Scalar* block)
+
+  Decode contiguous block of dimensionality *dims*.
+
+----
+
+.. cpp:function:: template<typename Scalar> size_t decode_block_strided(zfp_stream* stream, Scalar* p, ptrdiff_t sx)
+.. cpp:function:: template<typename Scalar> size_t decode_block_strided(zfp_stream* stream, Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+.. cpp:function:: template<typename Scalar> size_t decode_block_strided(zfp_stream* stream, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. cpp:function:: template<typename Scalar> size_t decode_block_strided(zfp_stream* stream, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Decode complete block to strided array with strides *sx*, *sy*, *sz*, and
+  *sw*.
+
+----
+
+.. cpp:function:: template<typename Scalar> size_t decode_partial_block_strided(zfp_stream* stream, Scalar* p, size_t nx, ptrdiff_t sx)
+.. cpp:function:: template<typename Scalar> size_t decode_partial_block_strided(zfp_stream* stream, Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
+.. cpp:function:: template<typename Scalar> size_t decode_partial_block_strided(zfp_stream* stream, Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+.. cpp:function:: template<typename Scalar> size_t decode_partial_block_strided(zfp_stream* stream, Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+
+  Decode partial block of size *nx* |times| *ny* |times| *nz* |times| *nw* to
+  strided array with strides *sx*, *sy*, *sz*, and *sw*.
diff --git a/docs/source/modes.rst b/docs/source/modes.rst
new file mode 100644
index 00000000..4810c6b3
--- /dev/null
+++ b/docs/source/modes.rst
@@ -0,0 +1,261 @@
+.. include:: defs.rst
+
+.. index::
+   single: Compression mode
+.. _modes:
+
+Compression Modes
+=================
+
+|zfp| accepts one or more parameters for specifying how the data is to be
+compressed to meet various constraints on accuracy or size.  At a high
+level, there are five different compression modes that are mutually
+exclusive:
+:ref:`expert <mode-expert>`,
+:ref:`fixed-rate <mode-fixed-rate>`,
+:ref:`fixed-precision <mode-fixed-precision>`,
+:ref:`fixed-accuracy <mode-fixed-accuracy>`, and
+:ref:`reversible <mode-reversible>` mode.
+The user has to select one of these modes and its corresponding parameters.
+In streaming I/O applications, the
+:ref:`fixed-accuracy mode <mode-fixed-accuracy>` is preferred, as
+it provides the highest quality (in the absolute error sense) per bit of
+compressed storage.
+
+The :c:type:`zfp_stream` struct encapsulates the compression parameters and
+other information about the compressed stream.  Its members should not be
+manipulated directly.  Instead, use the access functions (see the
+:ref:`C API <hl-api>` section) for setting and querying them.  One can
+verify the active compression mode on a :c:type:`zfp_stream` through
+:c:func:`zfp_stream_compression_mode`.  The members that govern the
+compression parameters are described below.
+
+.. _mode-expert:
+.. index::
+   single: Compression mode; Expert mode
+
+Expert Mode
+-----------
+
+The most general mode is the 'expert mode,' which takes four integer
+parameters.  Although most users will not directly select this mode,
+we discuss it first since the other modes can be expressed in terms of
+setting expert mode parameters.
+
+The four parameters denote constraints that are applied to each block
+in the :ref:`compression algorithm <algorithm-lossy>`.
+Compression is terminated as soon as one of these constraints is not met,
+which has the effect of truncating the compressed bit stream that encodes
+the block.  The four constraints are as follows:
+
+.. c:member:: uint zfp_stream.minbits
+
+  The minimum number of compressed bits used to represent a block.  Usually
+  this parameter equals one bit, unless each and every block is to be stored
+  using a fixed number of bits to facilitate random access, in which case it
+  should be set to the same value as :c:member:`zfp_stream.maxbits`.
+
+.. c:member:: uint zfp_stream.maxbits
+
+  The maximum number of bits used to represent a block.  This parameter
+  sets a hard upper bound on compressed block size and governs the rate
+  in :ref:`fixed-rate mode <mode-fixed-rate>`.  It may also be used as an
+  upper storage limit to guard against buffer overruns in combination with
+  the accuracy constraints given by :c:member:`zfp_stream.maxprec` and
+  :c:member:`zfp_stream.minexp`.
+
+.. c:member:: uint zfp_stream.maxprec
+
+  The maximum number of bit planes encoded.  This parameter governs the number
+  of most significant uncompressed bits encoded per transform coefficient.
+  It does not directly correspond to the number of uncompressed mantissa bits
+  for the floating-point or integer values being compressed, but is closely
+  :ref:`related <q-relerr>`.  This is the parameter that specifies the
+  precision in :ref:`fixed-precision mode <mode-fixed-precision>`, and it
+  provides a mechanism for controlling the *relative error*.  Note that this
+  parameter selects how many bits planes to encode regardless of the magnitude
+  of the common floating-point exponent within the block.
+
+.. c:member:: int zfp_stream.minexp
+
+  The smallest absolute bit plane number encoded (applies to floating-point
+  data only; this parameter is ignored for integer data).  The place value of
+  each transform coefficient bit depends on the common floating-point exponent,
+  *e*, that scales the integer coefficients.  If the most significant
+  coefficient bit has place value 2\ :sup:`e`, then the number of bit planes
+  encoded is (one plus) the difference between *e* and
+  :c:member:`zfp_stream.minexp`.  As an analogy, consider representing
+  currency in decimal.  Setting :c:member:`zfp_stream.minexp` to -2 would,
+  if generalized to base 10, ensure that amounts are represented to cent
+  accuracy, i.e., in units of 10\ :sup:`-2` = $0.01.  This parameter governs
+  the *absolute error* in :ref:`fixed-accuracy mode <mode-fixed-accuracy>`.
+  Note that to achieve a certain accuracy in the decompressed values, the
+  :c:member:`zfp_stream.minexp` value has to be conservatively lowered since
+  |zfp|'s inverse transform may magnify the error (see also
+  FAQs :ref:`#20-22 <q-relerr>`).
+
+Care must be taken to allow all constraints to be met, as encoding
+terminates as soon as a single constraint is violated (except
+:c:member:`zfp_stream.minbits`, which is satisfied at the end of encoding by
+padding zeros).
+
+.. warning::
+
+  For floating-point data, the :c:member:`zfp_stream.maxbits` parameter must
+  be large enough to allow the common block exponent and any control bits to
+  be encoded.  This implies *maxbits* |geq| 9 for single-precision data and
+  *maxbits* |geq| 12 for double-precision data.  Choosing a smaller value is
+  of no use as it would prevent any fraction (value) bits from being encoded,
+  resulting in an all-zero decompressed block.  More importantly, such a
+  constraint will not be respected by |zfp| for performance reasons, which
+  if not accounted for could potentially lead to buffer overruns.
+
+As mentioned above, other combinations of constraints can be used.
+For example, to ensure that the compressed stream is not larger than
+the uncompressed one, or that it fits within the amount of memory
+allocated, one may in conjunction with other constraints set
+::
+
+  maxbits = 4^d * CHAR_BIT * sizeof(Type)
+
+where Type is either float or double.  The ``minbits`` parameter is useful
+only in fixed-rate mode; when ``minbits`` = ``maxbits``, zero-bits are
+padded to blocks that compress to fewer than ``maxbits`` bits.
+
+The effects of the above four parameters are best explained in terms of the
+three main compression modes supported by |zfp|, described below.
+
+.. _mode-fixed-rate:
+.. index::
+   single: Compression mode; Fixed-rate mode
+   single: Rate
+
+Fixed-Rate Mode
+---------------
+
+In fixed-rate mode, each *d*-dimensional compressed block of |4powd| values
+is stored using a fixed number of bits given by the parameter
+:c:member:`zfp_stream.maxbits`.  This number of compressed bits per
+*block* is amortized over the |4powd| values to give a *rate* in
+bits per *value*::
+
+  rate = maxbits / 4^d
+
+This rate is specified in the :ref:`zfp executable <zfpcmd>` via the
+:option:`-r` option, and programmatically via :c:func:`zfp_stream_set_rate`,
+as a floating-point value.  Fixed-rate mode can also be achieved via the
+expert mode interface by setting
+::
+
+  minbits = maxbits = (1 << (2 * d)) * rate
+  maxprec = ZFP_MAX_PREC
+  minexp = ZFP_MIN_EXP
+
+Note that each block stores a bit to indicate whether the block is empty,
+plus a common exponent.  Hence :c:member:`zfp_stream.maxbits` must be at
+least 9 for single precision and 12 for double precision.
+
+Fixed-rate mode is needed to support random access to blocks, and also is
+the mode used in the implementation of |zfp|'s
+:ref:`compressed arrays <arrays>`.  Fixed-rate mode also ensures a
+predictable memory/storage footprint, but usually results in far worse
+accuracy per bit than the variable-rate fixed-precision and fixed-accuracy
+modes.
+
+.. note::
+  Use fixed-rate mode only if you have to bound the compressed size
+  or need read and write random access to blocks.
+
+.. _mode-fixed-precision:
+.. index::
+   single: Compression mode; Fixed-precision mode
+
+Fixed-Precision Mode
+--------------------
+
+In fixed-precision mode, the number of bits used to encode a block may
+vary, but the number of bit planes (i.e., the precision) encoded for the
+transform coefficients is fixed.  To achieve the desired precision,
+use option :option:`-p` with the :ref:`zfp executable <zfpcmd>` or call
+:c:func:`zfp_stream_set_precision`.  In expert mode, fixed precision is
+achieved by specifying the precision in :c:member:`zfp_stream.maxprec`
+and fully relaxing the size constraints, i.e.,
+::
+
+  minbits = ZFP_MIN_BITS
+  maxbits = ZFP_MAX_BITS
+  maxprec = precision
+  minexp = ZFP_MIN_EXP
+
+Fixed-precision mode is preferable when relative rather than absolute
+errors matter.
+
+.. _mode-fixed-accuracy:
+.. index::
+   single: Compression mode; Fixed-accuracy mode
+
+Fixed-Accuracy Mode
+-------------------
+
+In fixed-accuracy mode, all transform coefficient bit planes up to a
+minimum bit plane number are encoded.  (The actual minimum bit plane
+is not necessarily :c:member:`zfp_stream.minexp`, but depends on the
+dimensionality, *d*, of the data.  The reason for this is that the inverse
+transform incurs range expansion, and the amount of expansion depends on
+the number of dimensions.)  Thus, :c:member:`zfp_stream.minexp` should
+be interpreted as the base-2 logarithm of an absolute error tolerance.
+In other words, given an uncompressed value, *f*, and a reconstructed
+value, *g*, the absolute difference \| *f* |minus| *g* \| is at most
+2\ :sup:`minexp`.
+(Note that it is not possible to guarantee error tolerances smaller than
+machine epsilon relative to the largest value within a block.)  This error
+tolerance is not always tight (especially for 3D and 4D arrays), but can
+conservatively be set so that even for worst-case inputs the error
+tolerance is respected.  To achieve fixed accuracy to within 'tolerance',
+use option :option:`-a` with the :ref:`zfp executable <zfpcmd>` or call
+:c:func:`zfp_stream_set_accuracy`.  The corresponding expert mode
+parameters are::
+
+  minbits = ZFP_MIN_BITS
+  maxbits = ZFP_MAX_BITS
+  maxprec = ZFP_MAX_PREC
+  minexp = floor(log2(tolerance))
+
+As in fixed-precision mode, the number of bits used per block is not
+fixed but is dictated by the data.  Use *tolerance* = 0 to achieve
+near-lossless compression (see :ref:`mode-reversible` for guaranteed
+lossless compression).  Fixed-accuracy mode gives the highest quality
+(in terms of absolute error) for a given compression rate, and is
+preferable when random access is not needed.
+
+.. note::
+  Fixed-accuracy mode is available for floating-point (not integer) data
+  only.
+
+.. index::
+   single: Compression mode; Reversible mode
+   single: Lossless compression
+.. _mode-reversible:
+
+Reversible Mode
+---------------
+
+As of |zfp| |revrelease|, reversible (lossless) compression is supported.
+As with the other compression modes, each block is compressed and decompressed
+independently, but reversible mode uses a different compression algorithm
+that ensures a bit-for-bit identical reconstruction of integer and
+floating-point data.  For IEEE-754 floating-point data, reversible mode
+preserves special values such as subnormals, infinities, NaNs, and
+positive and negative zero.
+
+The expert mode parameters corresponding to reversible mode are::
+
+  minbits = ZFP_MIN_BITS
+  maxbits = ZFP_MAX_BITS
+  maxprec = ZFP_MAX_PREC
+  minexp < ZFP_MIN_EXP
+
+Reversible mode is enabled via :c:func:`zfp_stream_set_reversible` and through
+the :option:`-R` command-line option in the :ref:`zfp executable <zfpcmd>`.
+It is supported by both the low- and high-level interfaces and by the serial
+and OpenMP execution policies, but it is not yet implemented in CUDA.
diff --git a/docs/source/pointers.inc b/docs/source/pointers.inc
new file mode 100644
index 00000000..aceb71ad
--- /dev/null
+++ b/docs/source/pointers.inc
@@ -0,0 +1,187 @@
+.. index::
+   single: Pointers
+.. _pointers:
+
+Pointers
+--------
+
+.. cpp:namespace:: zfp
+
+.. cpp:class:: array1::const_pointer
+.. cpp:class:: array2::const_pointer
+.. cpp:class:: array3::const_pointer
+.. cpp:class:: array4::const_pointer
+.. cpp:class:: array1::pointer : public array1::const_pointer
+.. cpp:class:: array2::pointer : public array2::const_pointer
+.. cpp:class:: array3::pointer : public array3::const_pointer
+.. cpp:class:: array4::pointer : public array4::const_pointer
+
+Similar to :ref:`references <references>`, |zfp| supports proxy pointers
+(also known as fancy pointers) to individual array elements.  From the
+user's perspective, such pointers behave much like regular pointers to
+uncompressed data, e.g., instead of
+::
+
+    float a[ny][nx];     // uncompressed 2D array of floats
+    float* p = &a[0][0]; // point to first array element
+    p[nx] = 1;           // set a[1][0] = 1
+    *++p = 2;            // set a[0][1] = 2
+
+one would write
+::
+
+    zfp::array2<float> a(nx, ny, rate);       // compressed 2D array of floats
+    zfp::array2<float>::pointer p = &a(0, 0); // point to first array element
+    p[nx] = 1;                                // set a(0, 1) = 1
+    *++p = 2;                                 // set a(1, 0) = 2
+
+However, even though |zfp|'s proxy pointers point to individual scalars,
+they are associated with the array that those scalars are stored in, including
+the array's dimensionality.  Pointers into arrays of different dimensionality
+have incompatible type.  Moreover, pointers to elements in different arrays
+are incompatible.  For example, one cannot take the difference between
+pointers into two different arrays.
+
+Unlike |zfp|'s proxy references, its proxy pointers support traversing
+arrays using conventional pointer arithmetic.  In particular, unlike the
+:ref:`iterators <iterators>` below, |zfp|'s pointers are oblivious to the
+fact that the compressed arrays are partitioned into blocks, and the pointers
+traverse arrays element by element as though the arrays were flattened to
+one-dimensional arrays.  That is, if :code:`p` points to the first element
+of a 3D array :code:`a(nx, ny, nz)`, then
+:code:`a(i, j, k) == p[i + nx * (j + ny * k)]`.  In other words, pointer
+indexing follows the same order as flat array indexing
+(see :cpp:func:`array::operator[]`).
+
+A pointer remains valid during the lifetime of the array into which it points.
+Like conventional pointers, proxy pointers can be passed to other functions
+and manipulated there, for instance, by passing the pointer by reference via
+:code:`pointer&`.
+
+As of |zfp| |crpirelease|, const qualified pointers :code:`const_pointer`
+are available, and conceptually are equivalent to :code:`const Scalar*`.
+Pointers are available for :ref:`read-only arrays <carray_classes>` also.
+
+The following operators are defined for proxy pointers.  Below *p* refers
+to the pointer being acted upon.
+
+.. cpp:namespace:: zfp::arrayANY
+
+.. cpp:function:: pointer pointer::operator=(const pointer& q)
+.. cpp:function:: const_pointer const_pointer::operator=(const const_pointer& q)
+
+  Assignment operator.  Assigns *q* to *p*.
+
+----
+
+.. cpp:function:: reference pointer::operator*() const
+.. cpp:function:: const_reference const_pointer::operator*() const
+
+  Dereference operator.  Return proxy (const) reference to the value pointed
+  to by *p*.
+
+----
+
+.. cpp:function:: reference pointer::operator[](ptrdiff_t d) const
+.. cpp:function:: const_reference const_pointer::operator[](ptrdiff_t d) const
+
+  Offset dereference operator.  Return proxy (const) reference to the value
+  stored at :code:`p[d]`.
+
+----
+
+.. cpp:function:: pointer pointer::operator+(ptrdiff_t d) const
+.. cpp:function:: const_pointer const_pointer::operator+(ptrdiff_t d) const
+
+  Return a copy of the pointer incremented by *d*.
+
+----
+
+.. cpp:function:: pointer pointer::operator-(ptrdiff_t d) const
+.. cpp:function:: const_pointer const_pointer::operator-(ptrdiff_t d) const
+
+  Return a copy of the pointer decremented by *d*.
+
+----
+
+.. cpp:function:: ptrdiff_t pointer::operator-(const pointer& q) const
+.. cpp:function:: ptrdiff_t const_pointer::operator-(const const_pointer& q) const
+
+  Return difference *p - q*.  Defined only for pointers within the same
+  array.
+
+----
+
+.. cpp:function:: bool pointer::operator==(const pointer& q) const
+.. cpp:function:: bool const_pointer::operator==(const const_pointer& q) const
+
+  Return true if *p* and *q* point to the same array element.
+
+----
+
+.. cpp:function:: bool pointer::operator!=(const pointer& q) const
+.. cpp:function:: bool const_pointer::operator!=(const const_pointer& q) const
+
+  Return true if *p* and *q* do not point to the same array element.
+  This operator returns false if *p* and *q* do not point into the same array.
+
+----
+
+.. _ptr_inequalities:
+.. cpp:function:: bool pointer::operator<=(const pointer& q) const
+.. cpp:function:: bool pointer::operator>=(const pointer& q) const
+.. cpp:function:: bool pointer::operator<(const pointer& q) const
+.. cpp:function:: bool pointer::operator>(const pointer& q) const
+.. cpp:function:: bool const_pointer::operator<=(const const_pointer& q) const
+.. cpp:function:: bool const_pointer::operator>=(const const_pointer& q) const
+.. cpp:function:: bool const_pointer::operator<(const const_pointer& q) const
+.. cpp:function:: bool const_pointer::operator>(const const_pointer& q) const
+
+  Return true if the two pointers satisfy the given relationship.  These operators
+  return false if *p* and *q* do not point into the same array.
+
+----
+
+.. cpp:function:: pointer& pointer::operator++()
+.. cpp:function:: const_pointer& const_pointer::operator++()
+
+  Prefix increment pointer, i.e., :code:`++p`.  Return reference to
+  the incremented pointer.
+
+----
+
+.. cpp:function:: pointer& pointer::operator--()
+.. cpp:function:: const_pointer& const_pointer::operator--()
+
+  Prefix decrement pointer, i.e., :code:`--p`.  Return reference to
+  the decremented pointer.
+
+----
+
+.. cpp:function:: pointer pointer::operator++(int)
+.. cpp:function:: const_pointer const_pointer::operator++(int)
+
+  Postfix increment pointer, i.e., :code:`p++`.  Return a copy of
+  the pointer before it was incremented.
+
+----
+
+.. cpp:function:: pointer pointer::operator--(int)
+.. cpp:function:: const_pointer const_pointer::operator--(int)
+
+  Postfix decrement pointer, i.e., :code:`p--`.  Return a copy of
+  the pointer before it was decremented.
+
+----
+
+.. cpp:function:: pointer pointer::operator+=(ptrdiff_t d)
+.. cpp:function:: const_pointer const_pointer::operator+=(ptrdiff_t d)
+
+  Increment pointer by *d*.  Return a copy of the incremented pointer.
+
+----
+
+.. cpp:function:: pointer pointer::operator-=(ptrdiff_t d)
+.. cpp:function:: const_pointer const_pointer::operator-=(ptrdiff_t d)
+
+  Decrement pointer by *d*.  Return a copy of the decremented pointer.
diff --git a/docs/source/python.rst b/docs/source/python.rst
new file mode 100644
index 00000000..92373c81
--- /dev/null
+++ b/docs/source/python.rst
@@ -0,0 +1,155 @@
+.. include:: defs.rst
+
+.. py:module:: zfpy
+
+.. _zfpy:
+
+Python Bindings
+===============
+
+|zfp| |zfpyrelease| adds |zfpy|: Python bindings that allow compressing
+and decompressing `NumPy <https://www.numpy.org>`_ integer and
+floating-point arrays.  The |zfpy| implementation is based on
+`Cython <https://cython.org>`_ and requires both NumPy and Cython
+to be installed.  Currently, |zfpy| supports only serial execution.
+
+The |zfpy| API is limited to two functions, for compression and
+decompression, which are described below.
+
+Compression
+-----------
+
+.. py:function:: compress_numpy(arr, tolerance = -1, rate = -1, precision = -1, write_header = True)
+
+  Compress NumPy array, *arr*, and return a compressed byte stream.  The
+  non-expert :ref:`compression mode <modes>` is selected by setting one of
+  *tolerance*, *rate*, or *precision*.  If none of these arguments is
+  specified, then :ref:`reversible mode <mode-reversible>` is used.  By
+  default, a header that encodes array shape and scalar type as well as
+  compression parameters is prepended, which can be omitted by setting
+  *write_header* to *False*.  If this function fails for any reason, an
+  exception is thrown.
+
+|zfpy| compression currently requires a NumPy array
+(`ndarray <https://www.numpy.org/devdocs/reference/arrays.ndarray.html>`_)
+populated with the data to be compressed.  The array metadata (i.e.,
+shape, strides, and scalar type) are used to automatically populate the
+:c:type:`zfp_field` structure passed to :c:func:`zfp_compress`.  By default,
+all that is required to be passed to the compression function is the
+NumPy array; this will result in a stream that includes a header and is
+losslessly compressed using the :ref:`reversible mode <mode-reversible>`.
+For example::
+
+  import zfpy
+  import numpy as np
+
+  my_array = np.arange(1, 20)
+  compressed_data = zfpy.compress_numpy(my_array)
+  decompressed_array = zfpy.decompress_numpy(compressed_data)
+
+  # confirm lossless compression/decompression
+  np.testing.assert_array_equal(my_array, decompressed_array)
+
+Using the fixed-accuracy, fixed-rate, or fixed-precision modes simply requires
+setting one of the *tolerance*, *rate*, or *precision* arguments, respectively.
+For example::
+
+  compressed_data = zfpy.compress_numpy(my_array, tolerance=1e-3)
+  decompressed_array = zfpy.decompress_numpy(compressed_data)
+
+  # Note the change from "equal" to "allclose" due to the lossy compression
+  np.testing.assert_allclose(my_array, decompressed_array, atol=1e-3)
+
+Since NumPy arrays are C-ordered by default (i.e., the rightmost index
+varies fastest) and :c:func:`zfp_compress` assumes Fortran ordering
+(i.e., the leftmost index varies fastest), :py:func:`compress_numpy`
+automatically reverses the order of dimensions and strides in order to
+improve the expected memory access pattern during compression.
+The :py:func:`decompress_numpy` function also reverses the order of
+dimensions and strides, and therefore decompression will restore the
+shape of the original array.  Note, however, that the |zfp| stream does
+not encode the memory layout of the original NumPy array, and therefore
+layout information like strides, contiguity, and C vs. Fortran order
+may not be preserved.  Nevertheless, |zfpy| correctly compresses NumPy
+arrays with any memory layout, including Fortran ordering and non-contiguous
+storage.
+
+Byte streams produced by :py:func:`compress_numpy` can be decompressed
+by the :ref:`zfp command-line tool <zfpcmd>`.  In general, they cannot
+be :ref:`deserialized <serialization>` as compressed arrays, however.
+
+.. note::
+  :py:func:`decompress_numpy` requires a header to decompress properly, so do
+  not set *write_header* = *False* during compression if you intend to
+  decompress the stream with |zfpy|.
+
+Decompression
+-------------
+
+.. py:function:: decompress_numpy(compressed_data)
+
+  Decompress a byte stream, *compressed_data*, produced by
+  :py:func:`compress_numpy` (with header enabled) and return the
+  decompressed NumPy array.  This function throws on exception upon error.
+
+:py:func:`decompress_numpy` consumes a compressed stream that includes a
+header and produces a NumPy array with metadata populated based on the
+contents of the header.  Stride information is not stored in the |zfp|
+header, so :py:func:`decompress_numpy` assumes that the array was compressed
+with the first (leftmost) dimension varying fastest (typically referred to as
+Fortran-ordering).  The returned NumPy array is in C-ordering (the default
+for NumPy arrays), so the shape of the returned array is reversed from
+the shape information stored in the embedded header.  For example, if the
+header declares the array to be of shape (*nx*, *ny*, *nz*) = (2, 4, 8),
+then the returned NumPy array will have a shape of (8, 4, 2).
+Since the :py:func:`compress_numpy` function also reverses the order of
+dimensions, arrays both compressed and decompressed with |zfpy| will have
+compatible shape.
+
+.. note::
+  Decompressing a stream without a header requires using the
+  internal :py:func:`_decompress` Python function (or the
+  :ref:`C API <hl-api>`).
+
+.. py:function:: _decompress(compressed_data, ztype, shape, out = None, tolerance = -1, rate = -1, precision = -1)
+
+  Decompress a headerless compressed stream (if a header is present in
+  the stream, it will be incorrectly interpreted as compressed data).
+  *ztype* specifies the array scalar type while *shape* specifies the array
+  dimensions; both must be known by the caller.  The compression mode is
+  selected by specifying one (or none) of *tolerance*, *rate*, and
+  *precision*, as in :py:func:`compress_numpy`, and also must be known
+  by the caller.  If *out = None*, a new NumPy array is allocated.  Otherwise,
+  *out* specifies the NumPy array or memory buffer to decompress into.
+  Regardless, the decompressed NumPy array is returned unless an error occurs,
+  in which case an exception is thrown.
+
+In :py:func:`_decompress`, *ztype* is one of the |zfp| supported scalar types
+(see :c:type:`zfp_type`), which are available in |zfpy| as
+::
+
+    type_int32 = zfp_type_int32
+    type_int64 = zfp_type_int64
+    type_float = zfp_type_float
+    type_double = zfp_type_double
+
+These can be manually specified (e.g., :code:`zfpy.type_int32`) or generated
+from a NumPy *dtype* (e.g., :code:`zfpy.dtype_to_ztype(array.dtype)`).
+
+If *out* is specified, the data is decompressed into the *out* buffer.
+*out* can be a NumPy array or a pointer to memory large enough to hold the
+decompressed data.  Regardless of the type of *out* and whether it is provided,
+:py:func:`_decompress` always returns a NumPy array.  If *out* is not
+provided, then the array is allocated for the user.  If *out* is provided,
+then the returned NumPy array is just a pointer to or wrapper around the
+user-supplied *out*.  If *out* is a NumPy array, then its shape and scalar
+type must match the required arguments *shape* and *ztype*.  To avoid this
+constraint check, use :code:`out = ndarray.data` rather than
+:code:`out = ndarray` when calling :py:func:`_decompress`.
+
+.. warning::
+  :py:func:`_decompress` is an "experimental" function currently used
+  internally for testing.  It does allow decompression of streams without
+  headers, but providing too small of an output buffer or incorrectly
+  specifying the shape or strides can result in segmentation faults.
+  Use with care.
diff --git a/docs/source/references.inc b/docs/source/references.inc
new file mode 100644
index 00000000..6812b5a2
--- /dev/null
+++ b/docs/source/references.inc
@@ -0,0 +1,106 @@
+.. index::
+   single: References
+.. _references:
+
+References
+----------
+
+.. cpp:namespace:: zfp
+
+.. cpp:class:: array1::const_reference
+.. cpp:class:: array2::const_reference
+.. cpp:class:: array3::const_reference
+.. cpp:class:: array4::const_reference
+.. cpp:class:: array1::reference : public array1::const_reference
+.. cpp:class:: array2::reference : public array2::const_reference
+.. cpp:class:: array3::reference : public array3::const_reference
+.. cpp:class:: array4::reference : public array4::const_reference
+
+Array :ref:`indexing operators <lvref_idx>` must return lvalue references that
+alias array elements and serve as vehicles for assigning values to those
+elements.  Unfortunately, |zfp| cannot simply return a standard C++ reference
+(e.g., :code:`float&`) to an uncompressed array element since the element in
+question may exist only in compressed form or as a transient cached entry that
+may be invalidated (evicted) at any point.
+
+To address this, |zfp| provides *proxies* for references and pointers that
+act much like regular references and pointers, but which refer to elements
+by array and index rather than by memory address.  When assigning to an
+array element through such a proxy reference or pointer, the corresponding
+element is decompressed to cache (if not already cached) and immediately
+updated.
+
+|zfp| references may be freely passed to other functions and they remain
+valid during the lifetime of the corresponding array element.  One may also
+take the address of a reference, which yields a
+:ref:`proxy pointer <pointers>`.  When a reference appears as an rvalue in
+an expression, it is implicitly converted to a value.
+
+|zfp| |crpirelease| adds ``const`` qualified versions of references,
+pointers, and iterators to support const correctness and potential performance
+improvements when only read access is needed.  As with STL containers, the
+corresponding types are prefixed by ``const_``, e.g.,
+``const_reference``.  The mutable versions of these classes inherit
+the read-only API from the corresponding const versions.
+
+Only references into :ref:`read-write arrays <array_classes>` are discussed
+here; the :ref:`read-only arrays <carray_classes>` support the same
+``const_reference`` API.
+
+.. note::
+  Do not confuse :code:`const_reference` and :code:`const reference`.  The
+  former is a reference to an immutable array element, while the latter means
+  that the proxy reference object itself is immutable.
+
+References define a single type:
+
+.. cpp:namespace:: zfp::arrayANY
+
+.. cpp:type:: reference::value_type
+.. cpp:type:: const_reference::value_type
+
+  Scalar type associated with referenced array elements.
+
+----
+
+The following operators are defined for |zfp| references.  They act on the
+referenced array element in the same manner as operators defined for
+conventional C++ references.  References are obtained via
+:ref:`array inspectors <array_accessor>`
+and :ref:`mutators <lvref>`.
+
+----
+
+.. cpp:function:: value_type reference::operator value_type() const
+.. cpp:function:: value_type const_reference::operator value_type() const
+
+  Conversion operator for dereferencing the reference.  Return the value
+  of the referenced array element.
+
+----
+
+.. cpp:function:: pointer reference::operator&() const
+.. cpp:function:: const_pointer const_reference::operator&() const
+
+  Return (const) pointer to the referenced array element.
+ 
+----
+
+.. _ref_copy:
+.. cpp:function:: reference reference::operator=(const reference& ref)
+
+  Assignment (copy) operator.  The referenced element, *elem*, is assigned the
+  value stored at the element referenced by *ref*.  Return :code:`*this`.
+
+----
+
+.. _ref_mutators:
+.. cpp:function:: reference reference::operator=(Scalar val)
+.. cpp:function:: reference reference::operator+=(Scalar val)
+.. cpp:function:: reference reference::operator-=(Scalar val)
+.. cpp:function:: reference reference::operator*=(Scalar val)
+.. cpp:function:: reference reference::operator/=(Scalar val)
+
+  Assignment and compound assignment operators.  For a given operator
+  :code:`op`, update the referenced element, *elem*, via
+  *elem* :code:`op` *val*.  Return :code:`*this`.
diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt
new file mode 100644
index 00000000..28fdfbff
--- /dev/null
+++ b/docs/source/requirements.txt
@@ -0,0 +1,3 @@
+# force older sphinx version for readthedocs build
+sphinx==1.6.7
+sphinx-fortran
diff --git a/docs/source/serialization.inc b/docs/source/serialization.inc
new file mode 100644
index 00000000..7f5da8b5
--- /dev/null
+++ b/docs/source/serialization.inc
@@ -0,0 +1,215 @@
+.. index::
+   single: Serialization
+.. _serialization:
+
+Serialization
+-------------
+
+.. cpp:namespace:: zfp
+
+|zfp|'s read-write compressed arrays can be serialized to sequential,
+contiguous storage and later recovered back into an object, e.g., to support
+I/O of compressed-array objects.  Two pieces of information are needed
+to describe a |zfp| array: the raw compressed data, obtained via
+:cpp:func:`array::compressed_data` and :cpp:func:`array::compressed_size`,
+and a :ref:`header <header>` that describes the array scalar type,
+dimensions, and rate.
+The user may concatenate the header and compressed data to form a
+fixed-rate byte stream that can be read by the |zfp|
+:ref:`command-line tool <zfpcmd>`.  When serializing the array,
+the user should first call :cpp:func:`array::flush_cache` before
+accessing the raw compressed data.
+
+There are two primary ways to construct a compressed-array object from
+compressed data: via array-specific :ref:`constructors <array_ctor_header>`
+and via a generic :ref:`factory function <array_factory>`:
+
+- When the array scalar type (i.e., :code:`float` or :code:`double`) and
+  dimensionality (i.e., 1D, 2D, 3D, or 4D) are already known, the corresponding
+  array :ref:`constructor <array_ctor_header>` may be used.  If the
+  scalar type and dimensionality stored in the header do not match
+  the array class, then an :ref:`exception <exception>` is thrown.
+
+- |zfp| provides a :ref:`factory function <array_factory>` that can be used
+  when the serialized array type is unknown but described in the header.
+  This function returns a pointer to the abstract base class,
+  :cpp:class:`array`, which the caller should dynamically cast to the
+  corresponding derived array, e.g., by examining
+  :cpp:func:`array::scalar_type` and :cpp:func:`array::dimensionality`.
+
+  The (static) factory function is made available by including
+  :file:`zfp/factory.hpp`.  This header must be included *after* first
+  including the header files associated with the compressed arrays, i.e.,
+  :file:`zfp/array1.hpp`, :file:`zfp/array2.hpp`, :file:`zfp/array3.hpp`, and
+  :file:`zfp/array4.hpp`.  Only those arrays whose header files are included
+  can be constructed by the factory function.  This design decouples the
+  array classes so that they may be included independently, for example,
+  to reduce compilation time.
+
+Both types of deserialization functions accept an :cpp:class:`array::header`,
+an optional buffer holding compressed data, and an optional buffer size.
+If this buffer is provided, then a separate copy of the compressed data it
+holds is made, which is used to initialize the array.  If the optional buffer
+size is also provided, then these functions throw an
+:ref:`exception <exception>` if the size is not at least as large as is
+expected from the metadata stored in the header.  This safeguard is
+implemented to avoid accessing memory beyond the end of the buffer.  If no
+buffer is provided, then all array elements are default initialized to zero.
+The array may later be initialized by directly reading/copying data into the
+space pointed to by :cpp:func:`array::compressed_data` and calling
+:cpp:func:`array::clear_cache` (in either order).
+
+Below is a simple example of serialization of a 3D compressed array of doubles
+(error checking has been omitted for clarity)::
+
+  zfp::array3d a(nx, ny, nz, rate);
+  ...
+  a.flush_cache();
+  zfp::array::header h(a);
+  fwrite(h.data(), h.size_bytes(), 1, file);
+  fwrite(a.compressed_data(), a.compressed_size(), 1, file);
+
+We may then deserialize this array using the factory function.  The following
+example reads the compressed data directly into the array without making a
+copy::
+
+  zfp::array::header h;
+  fread(h.data(), h.size_bytes(), 1, file);
+  zfp::array* p = zfp::array::construct(h);
+  fread(p->compressed_data(), p->compressed_size(), 1, file);
+  assert(p->dimensionality() == 3 && p->scalar_type() == zfp_type_double);
+  zfp::array3d& a = *dynamic_cast<zfp::array3d*>(p);
+
+When the array is no longer in use, call :code:`delete p;` to deallocate it.
+
+.. note::
+  The array serialization API changed significantly in |zfp| |crpirelease|.
+  The :cpp:func:`array::get_header` function is now deprecated and has been
+  replaced with a :ref:`header constructor <header_ctor>` that takes an
+  array as parameter.  Exceptions are now part of the main :code:`zfp`
+  namespace rather than nested within the array header.  The header is no
+  longer a simple POD data structure but should be queried for its data
+  pointer and size.
+
+.. index::
+   single: Header
+
+.. _header:
+
+Header
+^^^^^^
+
+.. cpp:namespace:: zfp
+
+Short 12-byte headers are used to describe array metadata and compression
+parameters when serializing a compressed array.  This header is the same as
+supported by the :c:func:`zfp_read_header` and :c:func:`zfp_write_header`
+functions, using :c:macro:`ZFP_HEADER_FULL` to indicate that complete metadata
+is to be stored in the header.  The header is also compatible with the |zfp|
+:ref:`command-line tool <zfpcmd>`.  Processing of the header may result in an
+:ref:`exception <exception>` being thrown.
+
+.. note::
+  Compressed-array headers use |zfp|'s most concise representation of only
+  96 bits.  Such short headers support compressed blocks up to 2048 bits long.
+  This implies that the highest rate for 3D arrays is 2048/4\ :sup:`3` = 32
+  bits/value; the highest rate for 4D arrays is only 2048/4\ :sup:`4` = 8
+  bits/value.  3D and 4D arrays whose rate exceeds these limits cannot be
+  serialized and result in an exception being thrown.  1D and 2D arrays
+  support rates up to 512 and 128 bits/value, respectively, which both
+  are large enough to represent all usable rates.
+
+.. cpp:class:: array::header
+
+  The header stores information such as scalar type, array dimensions, and
+  compression parameters such as rate.  Compressed-array headers are always
+  96 bits long.
+
+.. cpp:namespace:: zfp::array
+
+----
+
+.. cpp:function:: header::header()
+
+  Default constructor for header.
+
+----
+
+.. _header_ctor:
+.. cpp:function:: header::header(const array& a)
+
+  Construct header for compressed-array *a*.  Throws an
+  :ref:`exception <exception>` upon failure.
+
+----
+
+.. _header_ctor_buffer:
+.. cpp:function:: header::header(const void* buffer, size_t bytes = 0)
+
+  Deserialize header from memory buffer given by *buffer* of optional
+  size *bytes*.  This memory buffer is obtained from an existing
+  header during serialization via :cpp:func:`header::data` and
+  :cpp:func:`header::size_bytes`.  The constructor throws an
+  :ref:`exception <exception>` upon failure.
+
+----
+
+.. cpp:function:: zfp_type header::scalar_type() const
+
+  Scalar type associated with array (see :cpp:func:`array::scalar_type`).
+
+----
+
+.. cpp:function:: uint header::dimensionality() const
+
+  Dimensionality associated with array (see :cpp:func:`array::dimensionality`).
+
+----
+
+.. _header_dims:
+.. cpp:function:: size_t header::size_x() const
+.. cpp:function:: size_t header::size_y() const
+.. cpp:function:: size_t header::size_z() const
+.. cpp:function:: size_t header::size_w() const
+
+  :ref:`Array dimensions <array_dims>`.  Unused dimensions have a size of zero.
+
+----
+
+.. cpp:function:: double header::rate() const
+
+  Rate in bits per value (see :cpp:func:`array::rate`);
+
+----
+
+.. cpp:function:: virtual const void* header::data() const = 0
+
+  Return pointer to header data.
+
+----
+
+.. cpp:function:: virtual size_t header::size_bytes(uint mask = ZFP_DATA_HEADER) const = 0
+
+  When *mask* = :c:macro:`ZFP_DATA_HEADER`, return header payload size in
+  bytes pointed to by :cpp:func:`header::data`.  Only those bytes are needed
+  to (de)serialize a header.  The header object stores additional (redundant)
+  metadata whose size can be queried via :c:macro:`ZFP_DATA_META`.
+
+.. index::
+   single: Exceptions
+
+.. _exception:
+
+Exceptions
+^^^^^^^^^^
+
+.. cpp:class:: exception : public std::runtime_error
+
+  Compressed arrays may throw this exception upon serialization, when
+  constructing a header via its :ref:`constructor <header_ctor>`, or
+  deserialization, when constructing a compressed array via its
+  :ref:`constructor <array_ctor_header>` or
+  :ref:`factory function <array_factory>`.
+  The :cpp:func:`exception::what` method returns a :code:`std::string`
+  error message that indicates the cause of the exception.
+  Most error messages changed in |zfp| |4darrrelease|.
diff --git a/docs/source/setup.py b/docs/source/setup.py
new file mode 100644
index 00000000..60684932
--- /dev/null
+++ b/docs/source/setup.py
@@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup()
diff --git a/docs/source/testing.rst b/docs/source/testing.rst
new file mode 100644
index 00000000..cb7e6a60
--- /dev/null
+++ b/docs/source/testing.rst
@@ -0,0 +1,18 @@
+.. include:: defs.rst
+
+Regression Tests
+================
+
+The :program:`testzfp` program performs basic regression testing by exercising
+a small but important subset of |libzfp| and the compressed-array
+classes.  It serves as a sanity check that |zfp| has been built properly.
+These tests assume the default compiler settings, i.e., with none of the
+settings in :file:`Config` or :file:`CMakeLists.txt` modified.  By default,
+small, synthetic floating-point arrays are used in the test.  To test larger
+arrays, use the :code:`large` command-line option.  When large arrays are
+used, the (de)compression throughput is also measured and reported in number
+of uncompressed bytes per second.
+
+More extensive unit and functional tests are available on the |zfp| GitHub
+`develop branch <https://github.com/LLNL/zfp/tree/develop>`_ in the
+:file:`tests` directory.
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
new file mode 100644
index 00000000..e43e249e
--- /dev/null
+++ b/docs/source/tutorial.rst
@@ -0,0 +1,620 @@
+.. include:: defs.rst
+.. _tutorial:
+
+Tutorial
+========
+
+This tutorial provides examples that illustrate how to use the |zfp|
+library and compressed arrays, and includes code snippets that show
+the order of declarations and function calls needed to use the
+compressor.
+
+This tutorial is divided into three parts: the high-level |libzfp|
+:ref:`library <tut-hl>`; the low-level
+:ref:`compression codecs <tut-ll>`; and the
+:ref:`compressed array classes <tut-arrays>` (in that order).  Users
+interested only in the compressed arrays, which do not directly expose
+anything related to compression other than compression
+:ref:`rate control <mode-fixed-rate>`, may safely skip the next two
+sections.
+
+All code examples below are for 3D arrays of doubles, but it should be
+clear how to modify the function calls for single precision and for 1D,
+2D, or 4D arrays.
+
+.. _tut-hl:
+
+High-Level C Interface
+----------------------
+
+Users concerned only with storing their floating-point data compressed may
+use |zfp| as a black box that maps a possibly non-contiguous floating-point
+array to a compressed bit stream.  The intent of |libzfp| is to provide both
+a high- and low-level interface to the compressor that can be called from
+both C and C++ (and possibly other languages).  |libzfp| supports strided
+access, e.g., for compressing vector fields one scalar at a time, or for
+compressing arrays of structs.
+
+Consider compressing the 3D C/C++ array
+::
+
+  // define an uncompressed array
+  double a[nz][ny][nx];
+
+where *nx*, *ny*, and *nz* can be any positive dimensions.
+
+.. include:: disclaimer.inc
+
+To invoke the |libzfp| compressor, the dimensions and type must first be
+specified in a :c:type:`zfp_field` parameter object that encapsulates the
+type, size, and memory layout of the array::
+
+  // allocate metadata for the 3D array a[nz][ny][nx]
+  uint dims = 3;
+  zfp_type type = zfp_type_double;
+  zfp_field* field = zfp_field_3d(&a[0][0][0], type, nx, ny, nz);
+
+For single-precision data, use :code:`zfp_type_float`.  As of version 0.5.1,
+the high-level API also supports integer arrays (:code:`zfp_type_int32`
+and :code:`zfp_type_int64`).  See FAQs :ref:`#8 <q-integer>` and
+:ref:`#9 <q-int32>` regarding integer compression.
+
+Functions similar to :c:func:`zfp_field_3d` exist for declaring 1D, 2D, and
+4D arrays.  If the dimensionality of the array is unknown at this point, then
+a generic :c:func:`zfp_field_alloc` call can be made to just allocate a
+:c:type:`zfp_field` struct, which can be filled in later using the
+:ref:`set <zfp_field_set>` functions.  If the array is non-contiguous, then
+:c:func:`zfp_field_set_stride_3d` should be called.
+
+The :c:type:`zfp_field` parameter object holds information about the
+uncompressed array.  To specify the compressed array, a :c:type:`zfp_stream`
+object must be allocated::
+
+  // allocate metadata for a compressed stream
+  zfp_stream* zfp = zfp_stream_open(NULL);
+
+We may now specify the rate, precision, or accuracy (see :ref:`modes`
+for more details on the meaning of these parameters)::
+
+  // set compression mode and parameters
+  zfp_stream_set_rate(zfp, rate, type, dims, zfp_false);
+  zfp_stream_set_precision(zfp, precision);
+  zfp_stream_set_accuracy(zfp, tolerance);
+
+Note that only one of these three functions should be called.  The return
+value from these functions gives the actual rate, precision, or tolerance,
+and may differ slightly from the argument passed due to constraints imposed
+by the compressor, e.g., each block must be stored using a whole number of
+bits at least as large as the number of bits in the floating-point exponent;
+the precision cannot exceed the number of bits in a floating-point value
+(i.e., 32 for single and 64 for double precision); and the tolerance must
+be a (possibly negative) power of two.
+
+The compression parameters have now been specified, but before compression
+can occur a buffer large enough to hold the compressed bit stream must be
+allocated.  Another utility function exists for estimating how many bytes
+are needed::
+
+  // allocate buffer for compressed data
+  size_t bufsize = zfp_stream_maximum_size(zfp, field);
+  void* buffer = malloc(bufsize);
+
+Note that :c:func:`zfp_stream_maximum_size` returns the smallest buffer
+size necessary to safely compress the data---the *actual* compressed size
+may be smaller.  If the members of :code:`zfp` and :code:`field` are for
+whatever reason not initialized correctly, then
+:c:func:`zfp_stream_maximum_size` returns 0.
+
+Before compression can commence, we must associate the allocated buffer
+with a bit stream used by the compressor to read and write bits::
+
+  // associate bit stream with allocated buffer
+  bitstream* stream = stream_open(buffer, bufsize);
+  zfp_stream_set_bit_stream(zfp, stream);
+
+Compression can be accelerated via OpenMP multithreading (since |zfp|
+|omprelease|) and CUDA (since |zfp| |cudarelease|).  To enable OpenMP
+parallel compression, call::
+
+  if (!zfp_stream_set_execution(zfp, zfp_exec_omp)) {
+    // OpenMP not available; handle error
+  }
+
+See the section :ref:`execution` for further details on how to configure
+|zfp| and its run-time parameters for parallel compression.
+
+Finally, the array is compressed as follows::
+
+  // compress entire array
+  size_t size = zfp_compress(zfp, field);
+
+If the stream was rewound before calling :c:func:`zfp_compress`,
+the return value is the actual number of bytes of compressed storage,
+and as already mentioned, *size* |leq| *bufsize*.  If *size* = 0, then the
+compressor failed.  Since |zfp| 0.5.0, the compressor does not rewind the
+bit stream before compressing, which allows multiple fields to be compressed
+one after the other.  The return value from :c:func:`zfp_compress` is always
+the total number of bytes of compressed storage so far relative to the memory
+location pointed to by *buffer*.
+
+To decompress the data, the field and compression parameters must be
+initialized with the same values as used for compression, either via
+the same sequence of function calls as above or by recording these
+fields and setting them directly.  Metadata such as array dimensions and
+compression parameters are by default not stored in the compressed stream.
+It is up to the caller to store this information, either separate from
+the compressed data, or via the :c:func:`zfp_write_header` and
+:c:func:`zfp_read_header` calls, which should precede the corresponding
+:c:func:`zfp_compress` and :c:func:`zfp_decompress` calls, respectively.
+These calls allow the user to specify what information to store in the header,
+including a 'magic' format identifier, the field type and dimensions, and the
+compression parameters (see the :ref:`ZFP_HEADER <header-macros>` macros).
+
+In addition to this initialization, the bit stream has to be rewound to
+the beginning (before reading the header and decompressing the data)::
+
+  // rewind compressed stream and decompress array
+  zfp_stream_rewind(zfp);
+  size_t size = zfp_decompress(zfp, field);
+
+The return value is zero if the decompressor failed.
+
+Simple Example
+^^^^^^^^^^^^^^
+
+Tying it all together, the code example below (see also the
+:ref:`simple <ex-simple>` program) shows how to compress a 3D array
+:code:`double array[nz][ny][nx]`::
+
+  // input: (void* array, size_t nx, size_t ny, size_t nz, double tolerance)
+
+  // initialize metadata for the 3D array a[nz][ny][nx]
+  zfp_type type = zfp_type_double;                          // array scalar type
+  zfp_field* field = zfp_field_3d(array, type, nx, ny, nz); // array metadata
+
+  // initialize metadata for a compressed stream
+  zfp_stream* zfp = zfp_stream_open(NULL);                  // compressed stream and parameters
+  zfp_stream_set_accuracy(zfp, tolerance);                  // set tolerance for fixed-accuracy mode
+  //  zfp_stream_set_precision(zfp, precision);             // alternative: fixed-precision mode
+  //  zfp_stream_set_rate(zfp, rate, type, 3, zfp_false);   // alternative: fixed-rate mode
+
+  // allocate buffer for compressed data
+  size_t bufsize = zfp_stream_maximum_size(zfp, field);     // capacity of compressed buffer (conservative)
+  void* buffer = malloc(bufsize);                           // storage for compressed stream
+
+  // associate bit stream with allocated buffer
+  bitstream* stream = stream_open(buffer, bufsize);         // bit stream to compress to
+  zfp_stream_set_bit_stream(zfp, stream);                   // associate with compressed stream
+  zfp_stream_rewind(zfp);                                   // rewind stream to beginning
+
+  // compress array
+  size_t zfpsize = zfp_compress(zfp, field);                // return value is byte size of compressed stream
+
+
+.. _tut-ll:
+
+Low-Level C Interface
+---------------------
+
+For applications that wish to compress or decompress portions of an array
+on demand, a low-level interface is available.  Since this API is useful
+primarily for supporting random access, the user also needs to manipulate
+the :ref:`bit stream <bs-api>`, e.g., to position the bit pointer to where
+data is to be read or written.  Please be advised that the bit stream
+functions have been optimized for speed and do not check for buffer
+overruns or other types of programmer error.
+
+Like the high-level API, the low-level API also makes use of the
+:c:type:`zfp_stream` parameter object (see previous section) to specify
+compression parameters and storage, but does not encapsulate array
+metadata in a :c:type:`zfp_field` object.  Functions exist for encoding
+and decoding complete or partial blocks, with or without strided access.
+In non-strided mode, the uncompressed block to be encoded or decoded is
+assumed to be stored contiguously.  For example,
+::
+
+  // compress a single contiguous block
+  double block[4 * 4 * 4] = { /* some set of values */ };
+  size_t bits = zfp_encode_block_double_3(zfp, block);
+
+The return value is the number of bits of compressed storage for the block.
+For fixed-rate streams, if random write access is desired, then the stream
+should also be flushed after each block is encoded::
+
+  // flush any buffered bits
+  zfp_stream_flush(zfp);
+
+This flushing should be done only after the last block has been compressed in
+fixed-precision and fixed-accuracy mode, or when random access is not needed
+in fixed-rate mode.
+
+The block above could also have been compressed as follows using strides::
+
+  // compress a single contiguous block using strides
+  double block[4][4][4] = { /* some set of values */ };
+  ptrdiff_t sx = &block[0][0][1] - &block[0][0][0]; // x stride =  1
+  ptrdiff_t sy = &block[0][1][0] - &block[0][0][0]; // y stride =  4
+  ptrdiff_t sz = &block[1][0][0] - &block[0][0][0]; // z stride = 16
+  size_t bits = zfp_encode_block_strided_double_3(zfp, &block[0][0][0], sx, sy, sz);
+
+The strides are measured in number of array elements, not in bytes.
+
+For partial blocks, e.g., near the boundaries of arrays whose dimensions
+are not multiples of four, there are corresponding functions that accept
+parameters *nx*, *ny*, and *nz* to specify the actual block dimensions,
+with 1 |leq| *nx*, *ny*, *nz* |leq| 4.  Corresponding functions exist for
+decompression.  Such partial blocks typically do not compress as well as
+full blocks and should be avoided if possible.
+
+To position a bit stream for reading (decompression), use
+::
+
+  // position the stream at given bit offset for reading
+  stream_rseek(stream, offset);
+
+where the offset is measured in number of bits from the beginning of the
+stream.  For writing (compression), a corresponding call exists::
+
+  // position the stream at given bit offset for writing
+  stream_wseek(stream, offset);
+
+Note that it is possible to decompress fewer bits than are stored with a
+compressed block to quickly obtain an approximation.  This is done by
+setting :code:`zfp->maxbits` to fewer bits than used during compression.
+For example, to decompress only the first 256 bits of each block::
+
+  // modify decompression parameters to decode 256 bits per block
+  uint maxbits;
+  uint maxprec;
+  int minexp;
+  zfp_stream_params(zfp, NULL, &maxbits, &maxprec, &minexp);
+  assert(maxbits >= 256);
+  zfp_stream_set_params(zfp, 256, 256, maxprec, minexp);
+
+This feature may be combined with progressive decompression, as discussed
+further in FAQ :ref:`#13 <q-progressive>`.
+
+.. _tut-arrays:
+
+Compressed C++ Arrays
+---------------------
+
+.. cpp:namespace:: zfp
+
+The |zfp| compressed-array API has been designed to facilitate integration
+with existing applications.  After initial array declaration, a |zfp| array
+can often be used in place of a regular C/C++ array or STL vector, e.g.,
+using flat indexing via :code:`a[index]`, nested indexing :code:`a[k][j][i]`
+(via :ref:`nested views <nested_view>`), or using multidimensional indexing
+via :code:`a(i)`, :code:`a(i, j)`, :code:`a(i, j, k)`, or
+:code:`a(i, j, k, l)`.  There are, however, some important differences.  For
+instance, applications that rely on addresses or references to array elements
+may have to be modified to use special proxy classes that implement pointers
+and references; see :ref:`limitations`.
+
+|zfp|'s compressed arrays do not support special floating-point values like
+infinities and NaNs, although subnormal numbers are handled correctly.
+Similarly, because the compressor assumes that the array values vary smoothly,
+using finite but large values like :c:macro:`HUGE_VAL` in place of
+infinities is not advised, as this will introduce large errors in smaller
+values within the same block.  Future extensions will provide support for
+a bit mask to mark the presence of non-values.
+
+The |zfp| C++ classes are implemented entirely as header files and make
+extensive use of C++ templates to reduce code redundancy.  These classes
+are wrapped in the :cpp:any:`zfp` namespace.
+
+Currently, there are eight array classes for 1D, 2D, 3D, and 4D arrays, each
+of which can represent single- or double-precision values.  Although these
+arrays store values in a form different from conventional single- and
+double-precision floating point, the user interacts with the arrays via
+floats and doubles.
+
+The description below is for 3D arrays of doubles---the necessary changes
+for other array types should be obvious.  To declare and zero initialize
+an array, use
+::
+
+  // declare nx * ny * nz array of compressed doubles
+  zfp::array3<double> a(nx, ny, nz, rate);
+
+This declaration is conceptually equivalent to
+::
+
+  double a[nz][ny][nx] = { 0.0 };
+
+or using STL vectors
+::
+
+  std::vector<double> a(nx * ny * nz, 0.0);
+
+but with the user specifying the amount of storage used via the *rate*
+parameter.  (A predefined type :cpp:type:`array3d` also exists, while
+the suffix 'f' is used for floats.)
+
+.. include:: disclaimer.inc
+
+Note that the array dimensions can be arbitrary and need not be multiples
+of four (see above for a discussion of incomplete blocks).  The *rate*
+argument specifies how many bits per value (amortized) to store in the
+compressed representation.  By default, the block size is restricted to a
+multiple of 64 bits, and therefore the rate argument can be specified in
+increments of 64 / |4powd| bits in *d* dimensions, i.e.
+::
+
+  1D arrays: 16-bit granularity
+  2D arrays: 4-bit granularity
+  3D arrays: 1-bit granularity
+  4D arrays: 1/4-bit granularity
+
+For finer granularity, the :c:macro:`BIT_STREAM_WORD_TYPE` macro needs to
+be set to a type narrower than 64 bits during compilation of |libzfp|,
+e.g., if set to :c:type:`uint8` the rate granularity becomes 8 / |4powd|
+bits in *d* dimensions, or
+::
+
+  1D arrays: 2-bit granularity
+  2D arrays: 1/2-bit granularity
+  3D arrays: 1/8-bit granularity
+  4D arrays: 1/32-bit granularity
+
+Note that finer granularity usually implies slightly lower performance.
+Also note that because the arrays are stored compressed, their effective
+precision is likely to be higher than the user-specified rate.
+
+The array can also optionally be initialized from an existing contiguous
+floating-point array stored at *pointer* with an *x* stride of 1, *y*
+stride of *nx*, and *z* stride of *nx* |times| *ny*::
+
+  // declare and initialize 3D array of doubles
+  zfp::array3d a(nx, ny, nz, rate, pointer, cache_size);
+
+The optional *cache_size* argument specifies the minimum number of bytes
+to allocate for the cache of uncompressed blocks (see :ref:`tut-caching`
+below for more details).
+
+As of |zfp| 0.5.3, entire arrays may be copied via the copy constructor or
+assignment operator::
+
+  zfp::array3d b(a); // declare array b to be a copy of array a
+  zfp::array3d c; // declare empty array c
+  c = a; // copy a to c
+
+Copies are deep and have value (not reference) semantics.  In the above
+example, separate storage for *b* and *c* is allocated, and subsequent
+modifications to *b* and *c* will not modify *a*.
+
+If not already initialized, a function :cpp:func:`array::set` can be used
+to copy uncompressed data to the compressed array::
+
+  const double* pointer; // pointer to uncompressed, initialized data
+  a.set(pointer); // initialize compressed array with floating-point data
+
+Similarly, an :cpp:func:`array::get` function exists for retrieving
+uncompressed data::
+
+  double* pointer; // pointer to where to write uncompressed data
+  a.get(pointer); // decompress and store the array at pointer
+
+The compressed representation of an array can also be queried or initialized 
+directly without having to convert to/from its floating-point representation::
+
+  size_t bytes = compressed_size(); // number of bytes of compressed storage
+  void* compressed_data(); // pointer to compressed data
+
+The array can through this pointer be initialized from offline compressed
+storage, but only after its dimensions and rate have been specified (see
+above).  For this to work properly, the cache must first be emptied via an
+:cpp:func:`array::clear_cache` call (see below).
+
+Through operator overloading, the array can be accessed in one of two ways.
+For read accesses, use
+::
+
+  double value = a[index]; // fetch value with given flat array index
+  double value = a(i, j, k); // fetch value with 3D index (i, j, k)
+
+These access the same value if and only if
+:code:`index = i + nx * (j + ny * k)`.
+Note that 0 |leq| *i* < *nx*, 0 |leq| *j* < *ny*, and 0 |leq| *k* < *nz*,
+and *i* varies faster than *j*, which varies faster than *k*.
+
+|zfp| |viewsrelease| adds views to arrays, which among other things can
+be used to perform nested indexing::
+
+  zfp::array3d::nested_view v(&a);
+  double value = v[k][j][i];
+
+A view is a shallow copy of an array or a subset of an array.
+
+Array values may be written and updated using the usual set of C++ assignment
+and compound assignment operators.  For example::
+
+  a[index] = value; // set value at flat array index
+  a(i, j, k) += value; // increment value with 3D index (i, j, k)
+
+Whereas one might expect these operators to return a (non-const) reference
+to an array element, this would allow seating a reference to a value that
+currently is cached but is transient, which could be unsafe.  Moreover,
+this would preclude detecting when an array element is modified.  Therefore,
+the return type of both operators :code:`[]` and :code:`()` is a proxy
+reference class, similar to :code:`std::vector<bool>::reference` from the
+STL library.  Because read accesses to a mutable object cannot call the
+const-qualified accessor, a proxy reference may be returned even for read
+calls.  For example, in
+::
+
+  a[i] = a[i + 1];
+
+the array :code:`a` clearly must be mutable to allow assignment to
+:code:`a[i]`, and therefore the read access :code:`a[i + 1]` returns type
+:cpp:class:`array::reference`.  The value associated with the read access
+is obtained via an implicit conversion.
+
+When the array is const qualified, the operators :code:`[]` and :code:`()`
+are inspectors that return a proxy :ref:`const reference <references>` that
+implicitly converts to a value.  If used as arguments in :code:`printf` or
+other functions that take a variable number of arguments, implicit conversion
+is not done and the reference has to be explicitly cast to value, e.g.,
+:code:`printf("%f", (double)a[i]);`.
+
+Array dimensions *nx*, *ny*, *nz*, and *nw* can be queried using these functions::
+
+  size_t size(); // total number of elements nx * ny * nz * nw
+  size_t size_x(); // nx
+  size_t size_y(); // ny
+  size_t size_z(); // nz
+  size_t size_w(); // nw
+
+The array dimensions can also be changed dynamically, e.g., if not known
+at time of construction, using
+::
+
+  void resize(size_t nx, size_t ny, size_t nz, size_t nw, bool clear = true);
+
+When *clear* = true, the array is explicitly zeroed.  In either case, all
+previous contents of the array are lost.  If *nx* = *ny* = *nz* = 0, all
+storage is freed.
+
+Finally, the rate supported by the array may be queried via
+::
+
+  double rate(); // number of compressed bits per value
+
+and changed using
+::
+
+  void set_rate(rate); // change rate
+
+This also destroys prior contents.
+
+As of |zfp| 0.5.2, iterators and proxy objects for pointers and references are
+supported.  Note that the decompressed value of an array element exists only
+intermittently, when the decompressed value is cached.  It would not be safe
+to return a :code:`double&` reference or :code:`double*` pointer to the cached
+but transient value since it may be evicted from the cache at any point, thus
+invalidating the reference or pointer.  Instead, |zfp| provides proxy objects
+for references and pointers that guarantee persistent access by referencing
+elements by array object and index.  These classes perform decompression on
+demand, much like how Boolean vector references are implemented in the STL.
+
+As of |zfp| |raiterrelease|, all iterators for 1D-4D arrays support random
+access.  Iterators ensure that array values are visited one block at a time,
+and are the preferred way of looping over array elements.  Such block-by-block
+access is especially useful when performing write accesses since then complete
+blocks are updated one at a time, thus reducing the likelihood of a partially
+updated block being evicted from the cache and compressed, perhaps with some
+values in the block being uninitialized.  Here is an example of initializing
+a 3D array::
+
+  for (zfp::array3d::iterator it = a.begin(); it != a.end(); it++) {
+    size_t i = it.i();
+    size_t j = it.j();
+    size_t k = it.k();
+    a(i, j, k) = some_function(i, j, k);
+  }
+
+Pointers to array elements are available via a special pointer class.  Such
+pointers may be a useful way of passing (flattened) |zfp| arrays to functions
+that expect uncompressed arrays, e.g., by using the pointer type as template
+argument.  For example::
+
+  template <typename double_ptr>
+  void sum(double_ptr p, size_t count)
+  {
+    double s = 0;
+    for (size_t i = 0; i < count; i++)
+      s += p[i];
+    return s;
+  }
+
+Then the following are equivalent::
+
+  // sum of STL vector elements (double_ptr == double*)
+  std::vector<double> vec(nx * ny * nz, 0.0);
+  double vecsum = sum(&vec[0], nx * ny * nz);
+
+  // sum of zfp array elements (double_ptr == zfp::array3d::pointer)
+  zfp::array3<double> array(nx, ny, nz, rate);
+  double zfpsum = sum(&array[0], nx * ny * nz);
+
+As another example,
+::
+
+  for (zfp::array1d::pointer p = &a[0]; p - &a[0] < a.size(); p++)
+    *p = 0.0;
+
+initializes a 1D array to all-zeros.  Pointers visit arrays in standard
+row-major order, i.e.
+::
+
+  &a(i, j, k) == &a[0] + i + nx * (j + ny * k)
+              == &a[i + nx * (j + ny * k)]
+
+where :code:`&a(i, j, k)` and :code:`&a[0]` are both of type
+:cpp:class:`array3d::pointer`.  Thus, iterators and pointers do not
+visit arrays in the same order, except for the special case of 1D arrays.
+Like iterators, pointers support random access for arrays of all dimensions
+and behave very much like :code:`float*` and :code:`double*` built-in pointers.
+
+Proxy objects for array element references have been supported since the
+first release of |zfp|, and may for instance be used in place of
+:code:`double&`.  Iterators and pointers are implemented in terms of
+references.
+
+The following table shows the equivalent zfp type to standard types when
+working with 1D arrays::
+
+  double&                               zfp::array1d::reference
+  double*                               zfp::array1d::pointer
+  std::vector<double>::iterator         zfp::array1d::iterator
+  const double&                         zfp::array1d::const_reference
+  const double*                         zfp::array1d::const_pointer
+  std::vector<double>::const_iterator   zfp::array1d::const_iterator
+
+.. _tut-caching:
+
+Caching
+^^^^^^^
+
+As mentioned above, the array class maintains a software write-back cache
+of at least one uncompressed block.  When a block in this cache is evicted
+(e.g., due to a conflict), it is compressed back to permanent storage only
+if it was modified while stored in the cache.
+
+The size cache to use is specified by the user and is an important
+parameter that needs careful consideration in order to balance the extra
+memory usage, performance, and accuracy (recall that data loss is incurred
+only when a block is evicted from the cache and compressed).  Although the
+best choice varies from one application to another, we suggest allocating
+at least two layers of blocks (2 |times| (*nx* / 4) |times| (*ny* / 4)
+blocks) for applications that stream through the array and perform stencil
+computations such as gathering data from neighboring elements.  This allows
+limiting the cache misses to compulsory ones.  If the *cache_size* parameter
+is set to zero bytes, then a default size of |sqrt|\ *n* blocks (rounded
+up to the next integer power of two) is used, where *n* is the total number
+of blocks in the array.
+
+The cache size can be set during construction, or can be set at a later
+time via
+::
+
+  void set_cache_size(bytes); // change cache size
+
+Note that if *bytes* = 0, then the array dimensions must have already been
+specified for the default size to be computed correctly.  When the cache
+is resized, it is first flushed if not already empty.  The cache can
+also be flushed explicitly if desired by calling
+::
+
+  void flush_cache(); // empty cache by first compressing any modified blocks
+
+To empty the cache without compressing any cached data, call
+::
+
+  void clear_cache(); // empty cache without compression
+
+To query the byte size of the cache, use
+::
+
+  size_t cache_size(); // actual cache size in bytes
diff --git a/docs/source/versions.rst b/docs/source/versions.rst
new file mode 100644
index 00000000..0824e55c
--- /dev/null
+++ b/docs/source/versions.rst
@@ -0,0 +1,456 @@
+.. include:: defs.rst
+
+Release Notes
+=============
+
+1.0.1 (2023-12-15)
+------------------
+
+This patch release primarily addresses minor bug fixes and is needed to update
+the zfpy Python wheels.
+
+**Added**
+
+- A new build macro, ``BUILD_TESTING_FULL``, specifies that all unit tests be
+  built; ``BUILD_TESTING`` produces a smaller subset of tests.  Full tests and
+  documentation are now included in releases.
+
+**Fixed**
+
+- #169: `libm` dependency is not always correctly detected.
+- #171: `ptrdiff_t` is not always imported in Cython.
+- #176: cfp API is not exposed via CMake configuration file.
+- #177: Full test suite is not included in release.
+- #181: `rpath` is not set correctly in executables.
+- #204: Array strides are not passed by value in zFORp.
+- #220: Errors reported with scikit-build when building zfpy.
+
+----
+
+1.0.0 (2022-08-01)
+------------------
+
+This release is not ABI compatible with prior releases due to numerous changes
+to function signatures and data structures like ``zfp_field``.  However, few of
+the API changes, other than to the |cfp| C API for compressed arrays, should
+impact existing code.  Note that numerous header files have been renamed or
+moved relative to prior versions.
+
+**Added**
+
+- ``zfp::const_array``: read-only variable-rate array that supports
+  fixed-precision, fixed-accuracy, and reversible modes.
+- Compressed-array classes for 4D data.
+- ``const`` versions of array references, pointers, and iterators.
+- A more complete API for pointers and iterators.
+- |cfp| support for proxy references and pointers, iterators, and 
+  (de)serialization.
+- Support for pointers and iterators into array views.
+- ``zfp::array::size_bytes()`` allows querying the size of different components
+  of an array object (e.g., payload, cache, index, metadata, ...).
+- Templated C++ wrappers around the low-level C API.
+- A generic codec for storing blocks of uncompressed scalars in |zfp|'s
+  C++ arrays.
+- Additional functions for querying ``zfp_field`` and ``zfp_stream`` structs.
+- ``zfp_config``: struct that encapsulates compression mode and parameters.
+- Rounding modes for reducing bias in compression errors.
+- New examples: ``array``, ``iteratorC``, and ``ppm``.
+
+**Changed**
+
+- Headers from ``array/``, ``cfp/include/``, and ``include/`` have been renamed
+  and reorganized into a common ``include/`` directory.
+
+  * The libzfp API is now confined to ``zfp.h``, ``zfp.hpp``, and ``zfp.mod``
+    for C, C++, and Fortran bindings, respectively.  These all appear in
+    the top-level ``include/`` directory upon installation.
+  * C++ headers now use a ``.hpp`` suffix; C headers use a ``.h`` suffix.
+  * C++ headers like ``array/zfparray.h`` have been renamed ``zfp/array.hpp``.
+  * C headers like ``cfp/include/cfparrays.h`` have been renamed
+    ``zfp/array.h``.
+
+- ``size_t`` and ``ptrdiff_t`` replace ``uint`` and ``int`` for array sizes and
+  strides in the array classes and C/Fortran APIs.
+- ``zfp_bool`` replaces ``int`` as Boolean type in the C API.
+- ``bitstream_offset`` and ``bitstream_size`` replace ``size_t`` to ensure
+  support for 64-bit offsets into and lengths of bit streams.  Consequently,
+  the ``bitstream`` API has changed accordingly.
+- All array and view iterators are now random-access iterators.
+- Array inspectors now return ``const_reference`` rather than a scalar
+  type like ``float`` to allow obtaining a ``const_pointer`` to an element
+  of an immutable array.
+- ``zfp::array::compressed_data()`` now returns ``void*`` instead of
+  ``uchar*``.
+- The array (de)serialization API has been revised, resulting in new
+  ``zfp::array::header`` and ``zfp::exception`` classes with new exception
+  messages.
+- The array ``codec`` class is now responsible for all details regarding
+  compression.
+- The compressed-array C++ implementation has been completely refactored to
+  make it more modular, extensible, and reusable across array types.
+- Array block shapes are now computed on the fly rather than stored.
+- The |cfp| C API now wraps array objects in structs.
+- The |zfpy| Python API now supports the more general ``memoryview`` over
+  ``bytes`` objects for decompression.
+- The zFORp Fortran module name is now ``zfp`` instead of ``zforp_module``.
+- Some command-line options for the ``diffusion`` example have changed.
+- CMake 3.9 or later is now required for CMake builds.
+
+**Removed**
+
+- ``zfp::array::get_header()`` has been replaced with a ``zfp::array::header``
+  constructor that accepts an array object.
+- ``ZFP_VERSION_RELEASE`` is no longer defined (use ``ZFP_VERSION_PATCH``).
+
+**Fixed**
+
+- #66: ``make install`` overwrites googletest.
+- #84: Incorrect order of parameters in CUDA ``memset()``.
+- #86: C++ compiler warns when ``__STDC_VERSION__`` is undefined.
+- #87: ``CXXFLAGS`` is misspelled in ``cfp/src/Makefile``.
+- #98: ``zfp_stream_maximum_size()`` underestimates size in reversible mode.
+- #99: Incorrect ``private_view`` reads due to missing writeback.
+- #109: Unused CPython array is incompatible with PyPy.
+- #112: PGI compiler bug causes issues with memory alignment.
+- #119: All-subnormal blocks may cause floating-point overflow.
+- #121: CUDA bit offsets are limited to 32 bits.
+- #122: ``make install`` does not install |zfp| command-line utility.
+- #125: OpenMP bit offsets are limited to 32 bits.
+- #126: ``make install`` does not install Fortran module.
+- #127: Reversible mode reports incorrect compressed block size.
+- #150: cmocka tests do not build on macOS.
+- #154: Thread safety is broken in ``private_view`` and ``private_const_view``.
+- ``ZFP_MAX_BITS`` is off by one.
+- ``diffusionC``, ``iteratorC`` are not being built with ``gmake``.
+
+----
+
+0.5.5 (2019-05-05)
+------------------
+
+**Added**
+
+- Support for reversible (lossless) compression of floating-point and
+  integer data.
+- Methods for serializing and deserializing |zfp|'s compressed arrays.
+- Python bindings for compressing NumPy arrays.
+- Fortran bindings to |zfp|'s high-level C API.
+
+**Changed**
+
+- The default compressed-array cache size is now a function of the total
+  number of array elements, irrespective of array shape.
+
+**Fixed**
+
+- Incorrect handling of execution policy in |zfp| utility.
+- Incorrect handling of decompression via header in |zfp| utility.
+- Incorrect cleanup of device memory in CUDA decompress.
+- Missing tests for failing mallocs.
+- CMake does not install CFP when built.
+- ``zfp_write_header()`` and ``zfp_field_metadata()`` succeed even if array
+  dimensions are too large to fit in header.
+
+----
+
+0.5.4 (2018-10-01)
+------------------
+
+**Added**
+
+- Support for CUDA fixed-rate compression and decompression.
+- Views into compressed arrays for thread safety, nested array indexing,
+  slicing, and array subsetting.
+- C language bindings for compressed arrays.
+- Support for compressing and decompressing 4D data.
+
+**Changed**
+
+- Execution policy now applies to both compression and decompression.
+- Compressed array accessors now return Scalar type instead of
+  ``const Scalar&`` to avoid stale references to evicted cache lines.
+
+**Fixed**
+
+- Incorrect handling of negative strides.
+- Incorrect handling of arrays with more than 2\ :sup:`32` elements in |zfp|
+  command-line tool.
+- ``bitstream`` is not C++ compatible.
+- Minimum cache size request is not respected.
+
+----
+
+0.5.3 (2018-03-28)
+------------------
+
+**Added**
+
+- Support for OpenMP multithreaded compression (but not decompression).
+- Options for OpenMP execution in |zfp| command-line tool.
+- Compressed-array support for copy construction and assignment via deep
+  copies.
+- Virtual destructors to enable inheritance from |zfp| arrays.
+
+**Changed**
+
+- ``zfp_decompress()`` now returns the number of compressed bytes processed so
+  far, i.e., the same value returned by ``zfp_compress()``.
+
+----
+
+0.5.2 (2017-09-28)
+------------------
+
+**Added**
+
+- Iterators and proxy objects for pointers and references.
+- Example illustrating how to use iterators and pointers.
+
+**Changed**
+
+- Diffusion example now optionally uses iterators.
+- Moved internal headers under array to ``array/zfp``.
+- Modified 64-bit integer typedefs to avoid the C89 non-compliant ``long long``
+  and allow for user-supplied types and literal suffixes.
+- Renamed compile-time macros that did not have a ``ZFP`` prefix.
+- Rewrote documentation in reStructuredText and added complete documentation
+  of all public functions, classes, types, and macros.
+
+**Fixed**
+
+- Issue with setting stream word type via CMake.
+
+----
+
+0.5.1 (2017-03-28)
+------------------
+
+This release primarily fixes a few minor issues but also includes changes in
+anticipation of a large number of planned future additions to the library.
+No changes have been made to the compressed format, which is backwards
+compatible with version 0.5.0.
+
+**Added**
+
+- High-level API support for integer types.
+- Example that illustrates in-place compression.
+- Support for CMake builds.
+- Documentation that discusses common issues with using |zfp|.
+
+**Changed**
+
+- Separated library version from CODEC version and added version string.
+- Corrected inconsistent naming of ``BIT_STREAM`` macros in code and
+  documentation.
+- Renamed some of the header bit mask macros.
+- ``stream_skip()`` and ``stream_flush()`` now return the number of bits
+  skipped or output.
+- Renamed ``stream_block()`` and ``stream_delta()`` to make it clear that they
+  refer to strided streams.  Added missing definition of
+  ``stream_stride_block()``.
+- Changed ``int`` and ``uint`` types in places to use ``ptrdiff_t`` and
+  ``size_t`` where appropriate.
+- Changed API for ``zfp_set_precision()`` and ``zfp_set_accuracy()`` to not
+  require the scalar type.
+- Added missing ``static`` keyword in ``decode_block()``.
+- Changed ``testzfp`` to allow specifying which tests to perform on the
+  command line.
+- Modified directory structure.
+
+**Fixed**
+
+- Bug that prevented defining uninitialized arrays.
+- Incorrect computation of array sizes in ``zfp_field_size()``.
+- Minor issues that prevented code from compiling on Windows.
+- Issue with fixed-accuracy headers that caused unnecessary storage.
+
+----
+
+0.5.0 (2016-02-29)
+------------------
+
+This version introduces backwards incompatible changes to the CODEC.
+
+**Added**
+
+- Modified CODEC to more efficiently encode blocks whose values are all
+  zero or are smaller in magnitude than the absolute error tolerance.
+  This allows representing "empty" blocks using only one bit each.
+- Added functions for compactly encoding the compression parameters
+  and field meta data, e.g., for producing self-contained compressed
+  streams.  Also added functions for reading and writing a header
+  containing these parameters.
+
+**Changed**
+
+- Changed behavior of ``zfp_compress()`` and ``zfp_decompress()`` to not
+  automatically rewind the bit stream.  This makes it easier to concatenate
+  multiple compressed bit streams, e.g., when compressing vector fields or
+  multiple scalars together.
+- Changed the |zfp| example program interface to allow reading and writing
+  compressed streams, optionally with a header.  The |zfp| tool can now be
+  used to compress and decompress files as a stand alone utility.
+
+----
+
+0.4.1 (2015-12-28)
+------------------
+
+**Added**
+
+- Added ``simple.c`` as a minimal example of how to call the compressor.
+
+**Changed**
+
+- Changed compilation of diffusion example to output two executables:
+  one with and one without compression.
+
+**Fixed**
+
+- Bug that caused segmentation fault when compressing 3D arrays whose
+  dimensions are not multiples of four.  Specifically, arrays of dimensions
+  *nx* |times| *ny* |times| *nz*, with *ny* not a multiple of four, were not
+  handled correctly.
+- Modified ``examples/fields.h`` to ensure standard compliance.  Previously,
+  C99 support was needed to handle the hex float constants, which are
+  not supported in C++98.
+
+----
+
+0.4.0 (2015-12-05)
+------------------
+
+This version contains substantial changes to the compression algorithm that
+improve PSNR by about 6 dB and speed by a factor of 2-3.  These changes are
+not backward compatible with previous versions of |zfp|.
+
+**Added**
+
+- Support for 31-bit and 63-bit integer data, as well as shorter integer types.
+- New examples for evaluating the throughput of the (de)compressor and for
+  compressing grayscale images in the pgm format.
+- Frequently asked questions.
+
+**Changed**
+
+- Rewrote compression codec entirely in C to make linking and calling
+  easier from other programming languages, and to expose the low-level
+  interface through C instead of C++.  This necessitated significant
+  changes to the API as well.
+- Minor changes to the C++ compressed array API, as well as major
+  implementation changes to support the C library.  The namespace and
+  public types are now all in lower case.
+
+**Removed**
+
+- Support for general fixed-point decorrelating transforms.
+
+----
+
+0.3.2 (2015-12-03)
+------------------
+
+**Fixed**
+
+- Bug in ``Array::get()`` that caused the wrong cached block to be looked up,
+  thus occasionally copying incorrect values back to parts of the array.
+
+----
+
+0.3.1 (2015-05-06)
+------------------
+
+**Fixed**
+
+- Rare bug caused by exponent underflow in blocks with no normal and some
+  subnormal numbers.
+
+----
+
+0.3.0 (2015-03-03)
+------------------
+
+This version modifies the default decorrelating transform to one that uses
+only additions and bit shifts.  This new transform, in addition to being
+faster, also has some theoretical optimality properties and tends to improve
+rate distortion.  This change is not backwards compatible.
+
+**Added**
+
+- Compile-time support for parameterized transforms, e.g., to support other
+  popular transforms like DCT, HCT, and Walsh-Hadamard.
+- Floating-point traits to reduce the number of template parameters.  It is
+  now possible to declare a 3D array as ``Array3<float>``, for example.
+- Functions for setting the array scalar type and dimensions.
+- ``testzfp`` for regression testing.
+
+**Changed**
+
+- Made forward transform range preserving: (-1, 1) is mapped to (-1, 1).
+  Consequently Q1.62 fixed point can be used throughout.
+- Changed the order in which bits are emitted within each bit plane to be more
+  intelligent.  Group tests are now deferred until they are needed, i.e., just
+  before the value bits for the group being tested.  This improves the quality
+  of fixed-rate encodings, but has no impact on compressed size.
+- Made several optimizations to improve performance.
+- Consolidated several header files.
+
+----
+
+0.2.1 (2014-12-12)
+------------------
+
+**Added**
+
+- Win64 support via Microsoft Visual Studio compiler.
+- Documentation of the expected output for the diffusion example.
+
+**Changed**
+
+- Made several minor changes to suppress compiler warnings.
+
+**Fixed**
+
+- Broken support for IBM's ``xlc`` compiler.
+
+----
+
+0.2.0 (2014-12-02)
+------------------
+
+The compression interface from ``zfpcompress`` was relocated to a separate
+library, called ``libzfp``, and modified to be callable from C.  This API now
+uses a parameter object (``zfp_params``) to specify array type and dimensions
+as well as compression parameters.
+
+**Added**
+
+- Several utility functions were added to simplify ``libzfp`` usage:
+
+  * Functions for setting the rate, precision, and accuracy.
+    Corresponding functions were also added to the ``Codec`` class.
+  * A function for estimating the buffer size needed for compression.
+
+- The ``Array`` class functionality was expanded:
+
+  * Support for accessing the compressed bit stream stored with an array,
+    e.g., for offline compressed storage and for initializing an already
+    compressed array.
+  * Functions for dynamically specifying the cache size.
+  * The default cache is now direct-mapped instead of two-way associative.
+
+**Fixed**
+
+- Corrected the value of the lowest possible bit plane to account for both
+  the smallest exponent and the number of bits in the significand.
+- Corrected inconsistent use of rate and precision.  The rate refers to the
+  number of compressed bits per floating-point value, while the precision
+  refers to the number of uncompressed bits.  The ``Array`` API was changed
+  accordingly.
+
+----
+
+0.1.0 (2014-11-12)
+------------------
+
+Initial beta release.
diff --git a/docs/source/view-indexing.pdf b/docs/source/view-indexing.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..d6e2edb7f3df939b03385ffeecd7d769c10948b3
GIT binary patch
literal 121307
zcmeFaWmFss+O7>D1a}AoZ7f)j#@*e61Pku&4#7i$y9M{)PH>mt?i$?P?XYL&-Sd8d
zv(C4M{b#S?2Z2ggLDkho-F2m(zC$d{FGvZbqK9Sz&;ZP}i~#KH)Z)5k`ZfkY0FZ%}
z5%8}iS^yJ}f&PD%=<Zkk*8>1%AR`NaTF}tMM%U{8vx%mSF2Am}xsEO;C$zPVm9C~K
zw8K=OJ-A$i@i?Q*5MUN|md3^_-9~I21Pm4kM#mTJ?d=T^u1uHl1b7Ysgh|PW$z}s1
z<{j7%63_0&FY8b882U>KXRqo{>&A-cwy+K;Sev(-jA%&1)P+Gv;iSiaw8$@BxN{?c
z0;Dm56J{|yZ6NM^mWsc*FG51XlHcIH?y{hP{MszXEgX>o4+}kTYzG1P6~wzW!-pu{
zgWH`3k@O@QAC?ME8uWr28<h10hy(^|K%=(8XUEGH{5~pSxx@xe(2wvlAoyftQ1x6h
zpnkX-%&nj=!PnhGs5B7d1R+A$8X;FfKnZW|&^*P)z`q_xyhc4fIY9$)1iaL+(04TB
zbf<o~@q*?hVWS0J5#fSDH;8Ho#CiYN%l!$^b}GbHb04H0s0|oTFHq7(oL?ZH38bm!
zq=jg(-Mipvh6Y}(J9?%x1eUvD?StN8!*dIBukTs7Io(0O^tfGwwX$$`bnr~RMa-sa
zg!%>lYW=$*4B#fj1q5;AAR50D2bAx^%Y_@!K7^xdA$W)fgh2od1afpS=>{Kr0a1#G
z7X;MuZZvJ7kR}_L%Zn4KiHoa2itKTg%ZUbwz3m=X)3A`_yY<TFf*0&AEFK^&l%;OU
z?Bop1>CIbP+k{E(HkAyhb)(UzK?b~f`GVMk0Hg>VWP5Fa;GDvvxPjyxA!C3VW0cP#
znWP70!P^K78`kngnwo<LWC#L=ySKBK7ucQbfjN|vFysQHCHIT%ranZ{Ij+I?G!q+@
z0jzG!tq2-W{DV@EAlE0?H@bmZF@&(2yLZ7CctY>8Q&RGCQ(CX|yWJ=vBQ!p_bAPhL
z2Wcq6Km-sIgNGo7zj%4a7QhL4sQ>0}F$dJv6Eyvkf<NZ4I@;B>2xtM<nHSdno?<l(
zs$~;Mft!OFI4T%x@+a@xev`YFl{=wB_4l_5iFft_MsvHnZtGQ_u&%kBLLgda@1$T*
zE$c$Fi3dDVK7d}%XJ#(Uf6E24^LB8)uKWN7#Y^c$Ffw_w$HR+ggveEt#`$t%;qV)E
z!+~!DV=vAN@U0fa5OsSPcXdd}Yib;$I_w$gGl&-gU0iY+Bd6DF8RR3NtLMhO-fgX*
z0A}pvU<iIo5k+xWL{jf5A)%sqJo;CV(A=8B5ilU5YBY^duR5-5Gf7FB<|$Ja=D$F4
z4?uZp9nQPcK)XA(+<_8-cuLFJx`Cz2fbdMXb!N|aLKY!x9-d<%3E-28-ciDUz6_<Z
z*mpYzdH;oaJ1Hg%qzQ!PnrXi<3a56_Vr2D>M{I=sHtg<)UZVzA5!l=@Q4i1Cp>nFj
z8-aX+1nRp@#M5ROW0NpPsQ`?yC1ajs|I^t#h%_}(yCC@yu#HPuwD*MJJT~mk=q;Z<
z;IL>O8E#$((~l;iWq{o_-{N)P^l1nRVdNj_e?6XvbiVre3bxfQDkdtPEHivOn3z>y
zEO*?`Tp4NWwahdK0{6xnC~Hg{S{`7bEfz;#e5@(d#m^Z(qEJ+`*hSO4gZ(2oCv2jF
zj~lM+y$E+`-K{-R^Ytey$EWH3*l32fqhAyRLI*=23~1Nj*sp~7LgUjeUVF5s5ZPsZ
z3?(Sxn)CTUvk&EL;|rLWOWtY)FwH^`zFyj8gP?<*G7WDZ;dr}~>42>)Vi<#2Sr}i}
zNC_-T123($uVyKx+L=oZFc?QA)X#lyS;b*E5Pp7eq_*hzjsfNvJ)JwN>SUQzJW(DP
zWkk?L@v9MNa{X!4f4Qys;y5{jCkP~r8%m*M&onUU!?>kEaisrngfIJ)I!#HW;ODHQ
z4?^N@MDOirRmT@6LTGQYF|ktKflio+chwWxnOZ=2%x8arXJ?!R`a`($_b2QQoh1rH
z`hC%-8Q~yF&XP%)Hwd4Z&LKSx$3&2)WarU(X}07DF?$lBhiH^&Z%_;AKwu~OO)K2`
z``jI=8uQp(a7jgq+f|$EbmY|=&>wxlWNR&1`D?+&?$|Cb2~X`;=deEX7wdq&=5n9&
zJ&jjaF4?arw>FO&A1h9x1ZCDDQt9-xNYfWMVfIz4eMcDAm3;Ggj0RbCxUo*io}#TH
z{$|!Om*&(|F_dnZ2yb)sXnSD5<F2lhLd|y2Q{6WkY$Nk$+5twMMn6ZJoS7ZlToGqc
zKnQj58_D77s|wjo-`Bn@rmLUGc2qeCdfFgWs;#hnRV{L}#4bU-B1=v?I2GzPl4fU&
z`KWH~7Z+=*_UfP;4jN>A5c5a7>YWofi&!=;m3E4?UhN_D^MsQn-dw`hAP5Gpc*$Yg
z^^jFoxI+34B<1ZNRa{a@^_w`1%N{c;(?U>xe#31G@0OmvP2?z67tKRq%5D7B;exSU
zzVy2H^YpG*B60_f;d?@+vsbMKY8Kg~W`<Y9R5<k4%!~TW3H@103_%j%EY>2g#>Icl
zfvEcopHWx1H(e#9Sd(lE=ajLCFGX|~shqxxwUN>U6Ohdprv6H2dB{|7AxTdMP0nKP
zXw(LlXy=I~#a?nsuwtnrlaogKVZ*Onm7$_PFCm9@4{l;IB1;4-B5+vOYCEhmX&qBw
zh*&0t>pNa~m$QahSd;x>DmOA}Q~Q1=4zq6-hYP#a5dk7$7LoGMWDjysM4F234?OSn
zvy{Tp7bz(5I0Kw_x14u9^&ai?QXQ=Zivw#IM%E%YqQ7{L_qiQ~qPXnptrS$HQqr96
zT;afn5T|8V>~QTEOtq=EHmWi<i6WfrEpxXx+Y#Ee^|Q82DS@U5K{yC?ZwgPsdZi76
zjU;VN1V7N_`YSE?jmme>S#x9hHnfZ*fpw*5w4vqq-(<X{v-}<ca72%dBCXRVM?PHe
z&ir9s$y!l&k=1$y`s=-(k*cg$MQ>bN%3>4F(6`9+!PTB+#iOh7C4Vl8wD>-$i-4~1
zNh})WDf5g=$;6g-2CVglZ|Wq-zMAg8BTf!PWDk!R8Fr@1m!4`YH9evEOgg-5su-Uv
zb?nYXBVyiHRmn2S04K%RR(u>gB1k>MT^^5YP%qy3@hw;B_iQ7Fn@u+0n2^+E$5G+y
zff@R%O6+W5LB7)@toM1S99h`7nJ!9&F-9taiIk1ok(Waf%8GZGm9tek3nh_3DE;s=
zAL-)OYPLRXo{Y**eq8)6(Sqq+arHG;iyUE$i656K@S7K^tJS2qqEWrLM-;lYX#3ZQ
zqdBQ!BWy>X-Qu!+&Gf*d4Pz+H265(;W@tIZ4*^>{nL98ajnzSW+dWZUnoYS1TClAJ
zvrM%^eHr#eRZ{uNvQSJw1e5gDX}Uhirh=2OG#yFwXws=dRaET`Xe*egt<p={o695$
z7H6iUeqrXv_f9E#08OBi`gKZ8^fG!@l@#a)s)NGo*rMJ&j2rg(u-7`^fq3mEg%YAx
zxZR%=BeOsmzq1&(C62)v$$&%23f82V`)Kcc<cnX4g^v1~zEjdN#>f6<ZEHoUQ><>P
zI^t$eqpBga6SD}WGk0&0m@Srb8ljl;0tu7BMjUNht=1&IXF#ocCb`%=GTW(Q$EWg=
z<JXdEwbBB1DKe9j-_VkekyQ+ywLZ2vu{zup4dd-trjR+Ne*p^=bGHCHTLf*Fp5%f-
z5j!!Jj{lD%_V-9!aRf>ux`OeQuE(?Eiy!bzSpiETZG$@p7cL$nq!HyMvAeqXtodzU
znC<bwl$hFnh{l^lHueO=$?O^Pg+b2dTB<4jJkhQ@UUsg&Wk?F*Fa0h-*~XUk=I1VN
z4vjs0MIr%ieE5yH>-D=T?-z4P**df@KmF2#h^A(VOs4~4y4bvXL&|e!#Ej9R<Lg!g
zGQN7<Op=Fq8;ft46(T`v89X(_B?7CN?l!Me!&D~RGUU}^VQxuCVBIw6chGlemgTAR
zV(iw5R~*^1kGJibc<~#sZK@ZvBC%Sk%~{qm+<WK9oEX%KU6lg;`hDRKSrc0(&Qr=w
z%j%rB<t#{Bt0tQIQOanPSug!zKSUn~M%%uzCsMc)6rX14OBiw(2Tzr6HWnZ5F!6Rk
zbNo>s0%=7Z_lwWDM{ql<Y$aF$?z%?g)Bc#Ev2u2lJ)Gz#Mnj+2J;6acV(1jj8rv9o
zpM^a&EAsIyxvxnt&qtF4al@T1gR@<&Ya=_|;umK2Xc65zfJ_ycH=FAML|cn>xw~xa
zF29tCI1>9rptY;LhQ3y%o5Ki4G#(jgJ7C|E>CfqjXi;9m$D@vXBW@j-SiFd8B*!|l
zS*zra+2nM1ClTB`lR&CqjGFcRvwRzXt>fk!J)T<S_QK?Ap5i$;Tpg0Jy^-REIDRmQ
ztz&<>fDFGF{gHDT_Ry+Lvp!D!on)stgUh)*{7YeuS^{<LfI<tIMB|Rzk1>8Ib!<n0
zrYlbSRHj}5-kF$cq2kLMEW9R}-kW-Ta~<70Rz{IPh4Ey54$GDdg6*5qOACu2mDVMb
z`ra-mEKPgDJD`OJx0}6khu$R8r!!5-`SZA{#erneV$yAKr^8vlH-ln+1NM@GtKZZL
zT5{x@XdUbrI7E?`d3cH(e=$}okYk6RYkYVIRV8JxXG8kR%Ztn?*M=+W3*Kw+nkvB=
zt%{)jigz;3TpwR}hbssr0_h3F4(9jEPvN9vIqtOfYlXGnj#G7%hx(N$ay4)+%vlPo
zS1Q{aW@@bj`U{O?IbVkO&}AxBRJkY-&Xw&gP*Q7n71J|=kvg6E1OfC-Ge(1hsJ$gC
zO=0Fbg-Trl$GehKXD$cQ`S{H(PQQ@<{G>1D!<RhQy?w;?5%Vo#6ioEHjh{1v@K|b4
zvnb1z#7>?+itrYDfRNBfC`O)hekp+70jt!{F_lzZc)KA?7)c9+@>YZaXl9x6u{%2X
zs&VK=(+Yc9^}rqis#CoAI+P7xTh_+7^V>Apxq=I=R@dokV|4i30_zHJ{ox*?>5pZ5
zYT&7~NfLJYCEq)Yg<8M4>5Unt)NO4<JCX>ZNwl&?NCvINk(D>!8J$Q&r;$sXue;>+
zE3_`mb&E9n`47$TNAjh4Iq^<4m0TCRbJmC0{AHFuc*-5tZ&a({R1H0tl54np8O8Op
zQFp&K@_nV742p)#m*}sQ*sXy~@AHgZdp@dMl#aVsOAHe9thg4l!(xSJvhx>`nVPc+
zAaeUtI8Mt@U7Y7nBp@bcPe^82G&pDMg_%JueIUzl;j|1ezg?f!cOJlW8l|brt58jv
zPBPPB2}y>No;oy;wCT^1oKTjzRi16G2oj)o1QPs+*Vk|171U@VHETo4wCk=Qnvej7
z7I0%xN3(#4?d4U5b@I#?v(nr6RUbe)*lH=PI4VIgNK(}sXk+Pn%o!>&8AcTfn@im0
z@Wy>9SHOUP-$sQ_iB_Cdq$Q>g)o1S)Hw*rDj%-QcSy~ohF=61vJ{*;B-Rilr;)p26
zsw~xaUUJL#qGBal&`<z85I)8>XLY?p4(BQWuv{Hf|E604<xEaL@tl$vlJKzmjXTS@
z*<@$6<mB>2#|Y<WxkEf863Q2jN%VI`Z!x|}2gQ&T+_6doY)DTqq#~eX@^|y};r!q+
z^RD6uKq7m2tLC0KSLgilee_GyqMR2cw5mBg1Tq`RS?ka{8Q|y6u08g!4W?!pJ_G3j
zMCcP<Gt+i)z!+Z!;@K`yhWet(6DSQChF#yl5_XJ$B<R3$qPW)@QBs^TcKxHGvL#N`
zZ(H61O@X%C0&w*C)PRzZeE7GOvBXXI__cPElr|_X0j)fQUOqz~<<RuL%gKnzzRg#w
zWQ{cAS#C+p89f@_P4yZ4489=&H%&=ZM5TPFJgqI2_sTWYd|usORJyWoM$Z$RVbws#
z6{VMaRxy|$9H~USKrq`tC7L#C#?Qrk>!V%_2Szcy8aQU;p+=51Lh#|vrO*n`uN;Qp
zncNQHPzbHz;1O++2#GJ+u}OoOK`7AlYMzMQ^;~i{t*8VZ;r&ZWusnS9CCIJ3ye%^3
zdK6lARWtB<+$tOs=hAzP`f7b6p%(iwdvE!)Z#hX$=?Wy1cvvxB(Z1aKbP<e`Vi)d(
zK&>Z4pLs6hPJH$XhwA0F81>N$J68^o7L_7y6$O>^BLktXt9`IZ`YoOB44z;&3vlc^
z>vb<+*&5jfOKTGku1EN!a0!AF&k?ym<x2`Yt&I~Fb3dws2_)_@$DP&;_S#p;a%H_i
z^mgm}8QUOi9*QY1?8w76gP9HR=%*45bNyLzSiPKh@L}={g0e9E?4#^pvTWs^Y{WE*
zVObV$S{eSE6Z2o^Qr)Z8x=!Ta?6K<TdC>aAdr7UGvf`ZwY9RAx#_QlNJbMChYlEL8
z-X}Gp7Ew6}i*}dE&5UFW{z}M1J5|=hU3X%qo+lqXwCMT~IJ<K({&|02<`4zOewv%g
zXn3jxQaEJ;;L$9DQs;AFB-oHs8`kAr&U;rOXZK^jw!+%A+LVCo&0?4{szL)%ph$B8
zyKKCjrl)UnA`3U=Qe@Lfpo^Q8oaA{Bl~=r|26`Ef;0107gRRkk+^s~ImTjztby$8w
zi$G77nUgKX9#bNt8%y36!b&WnmkH7)?r9SpE|TFdcy}0=;sLJ1ML22uO9~wx4#F5N
zU{%4y8tDinar*YiK4T5rJQKliko&Qz%jQYNyOoHo;{sc8pWB0D$G`;~Dl^Rdo$?+|
zsWHjJ3hkHlrvnNtUJ>&sH*map3Q-mBd>HC`!Et*Hi-e73(gdti3rq3!0qv-(Us}}Z
zer*l6U-K<qX}<tYwfScD(O#KKapI{S^eXqk*hrRjnrA7jGOro!a|A0mQ*>%0zr99t
z=_Xn-@~EIG&bdyQl{yvYbMl*6do4slS&ol8Ft_x<wqba`c>{@AVOXVbG}NM_*7*w2
z`l7RKr?XB5Eg>sz@1!4{Bmu}A!|lcVk>z{H_1LDYM@#cG1kQJ?pZs9@soU!TWi*MP
zIxBJ1X{KUyfuEw<rlef%d=ErpsPNG+=*Gmw2aDveKYM~O^xS@;5X&^3(WId<f5DVQ
z@NG8WXF1g^RQ(&3m(d5tQe-nEF~zcnjI5!FKe&e8DsblWwWx8LNWT_aT59iCZ>bE(
zkqO$p=0;u<JM+3DTX3qE{~B}FnJ&+5sb}8jELIvVcY11VzNJ)xoDr(dnvc~&oFC5r
zMHAdN(0rs>e7jk^TRW5{$6(Z4zWEo=GA9W&^a`Ai43!r@3Au(8wlH<UuE3?5w@FUR
z0fTi~3y``7o$R<?6sB-lX|<8hQ-D{f4mVHwG-z^o$TEzgW;4eLE~HA>Huq46!OFRI
z(T_uDAIQW`7?&UWUFXC57U%4W=(q+;HHW$iM%y;Absbr>Mfnz@9|;Kn?s=ly2>7?3
zzS_B0ZOof*uvMSn_Od2?CO$|IHMq(a)%BYXAFcTaKocDgrgDN2GHkr<Iff!DjYVry
zJWV3OI;q}X5ukhrCy#7t&m3$yeh&A8q(BcHJ(Jv<d!@Ah+lu_c3hz(jblkW>*1Qgb
z#_`32kI{tW9M@R9>e=q50c_JJg8Ex1_F|uCC9|!KjJpgLV`cTA)=FH^8kpXLSz(*W
zVggDoZ&$AP)&<ZimSM9RE~jHEJt)ZgPF|Xy_TaObEHkf&y1a;y53Q154-N-ZME)#&
zKlM3HEG7&l<;Z-b^U_NG02K!OOuMwX(rCYe`is8^R-O~e2f}Y*+){G51+y*e%#N*R
ziI>ZmbvLGyQg4Yvm0z9!ipeh>?Z$`cVBhb(-Pfl?4!meRG<O`Idh6nQ+nc3+((t8d
zQfu~nADg_-7I&{9Pm^0L{gSnBx!QMKYsXx#u<Nas!)1JrGjqTVWe0vxi+k*t{4b)k
zv&}lF_}xVrk><4Jsb~l(H38zSsbg1&oRVQ3s$BSQe(_^GIusf6n~`9)SoG~suM6HO
zMMSXE>qy1HzoUs}o%;wsYR@SL6U*w#GyW}md-b?1W;hA~1_VWfSKS4v0M8`AYu7N)
zzn@*lFaHp(CUGLA4^V-PXOsD(7$(A5D79sN>TcO`(la8){-e_P?KHjTX!qwh>LTy@
zO$LtP(-^We;(VNgK0Eb|?P|u3Osi{SZ=c{V0Z8#%>0C~tL1~eJl%>Osz>cjP=B&ub
zFBN7v8hdPcRTb>9Yy#me8h!~=6O12hvXcfw1YASA;v22ffqFyPyw`f|Z&TI>%*0q6
ze3Q!OR6AKTi?G@HCDQGbF%`m=!n|&O2@JNVn*O{opPo_3(E<m4z7%D<P&C#i$#$C<
zQ+f?cHr%)S2~^eZz;^hQO{@$KO(ln<s)Yum{JTYVnKp_JTv<5nMc*+}{~EaD&9eKC
z$?8N*m4w7^8{<F2C)6v+5%$d9pF}CPB%Citf_ud<;nC<T>|ZSP=JF$wIum!f-IPf2
z*7Zc<#C{4nsJ?lnb<V8EQ})X<Zpp+9ajUfEEKTkk?O~Q2N__B-H$nlOsK<=?dT>}s
z2NAb&`MkNbdNM1<<-+1OwQZ>FWSVr!(PGWY{L^YVu0l#BTFX6BH%51Lpe*&EW8g{{
zyK)^?Fh`=pPP!0BmcQWq(;_B!e|jAAg<|2@P2!=8z8SnW?WU-UKIeP?I&`Of?8B+W
z%BX)v1`-$2sp$nB!a5pP<S#;_%Ot6FvrGC}iUFh*YJw5(xean1knHNRYtJH!`0-`Z
zguqvX>i&9DdNb&-uF4p~yIPSqIdtj!HGBMnSkXt+XE~nJe5?=@g-#|Ll?ciKX8FcM
z>y)hRS&USsujrY-;P!}ukhTjmYR}BX@dqcpE1oQ+C72c|)1Pta{W{)T9g0a4XHi8)
z4Z9Y~m!P(@+}|a@VbGV4klM)vx~;d38a9mP&y`kes@xi56XIA(g^$7G7n&3xPcrA=
zp~_q7|8`pl)PdO)k7hEORTMU?(8XR#N)0c9ik>^Ng0+0612)yJ3=y`so_*=e;thjx
zbVWUe-39e>>tLx%C=<O#j@_*Mt7vJ+mxyBX8hE;pZHA%^K^o>w^-EgIxF@#!V;v`m
z^^BWg@mF7?aa3x|5Z+;#^L_D3usHe2YDK7gLKroeIfIXGaalD;GWV4MH^4|ewwR??
z2?|c)hyLB+4AJNM*9%CQL#P864Cs2vjb|W!{hTotto=S{{Mhw63t3p&*ON1)R^ea5
ze0A4oIjvJ`m{e+Acx5t~Y&=4~MR!Ct>)3~(Rtv&*qZyQLy=F(#AbuT<Wb@wkm5*sm
zAKk@|3Mdy^r8fm&AmS7qz5$14OmDi$*x6kC*Q3)^f=EZSR<6Bz!X+lvExkUtr?A80
zZyrXDcQ6sJ1=)8|>FkP_Rn6hN7TZsAT>jAUacpU@a%5&5XXO@!(LhYEpZ%aBVdA#w
zD45?$Wb>OMp_Y(pZ#BR5t|k&fV&yKpiG?{8#@XKckEa~_w9>-xJJ{r=W0TeTe$hwb
z{)r=iL<WP3UQYJ;nDjCGeqXS(Wf9|3pG*&nLq5|{z+rJu9eu+>GS#TDZDDAe;4sgQ
zqH)w)<NZb^@|na?w61YbXNgV8i3N$R)R!GfIE%4qwbT%C3L9`cbp&R8mFoPw%v)~z
zRLWg7c_f7rBk(M>8@BVL0ec<@ysPhM^~E<?2lE<=-)RCZk<D<;#zfO3`rPgY?au&4
zb-c^YSM@@=oGIvAvrO=OZOm3*ZhbSp13%8$DHfj-WhyE;<sse-XDsa&1H0)D1bp82
zru%!Zo60wA6=T=T$?jyKOiM)P8Wm>D%FdAL)DS?vJi#lJg|=y8r9!3dg=51~1=Psc
znT{=4;e~N^F0><HQ8ELoIF&FXY{;6vm&rGWtwCjQMsG>t6>i%F?H}$liBbt?%sA;F
zr{aoWG`@8j-&}3)s;;<|OAO<q037|q+5_dx>m^XlB55az=(lh=T`t@XrIhkxyjJ#~
z*N3ku^+%~?UR|42urqhNT36W<V{Lbuki^<5MsERIi)WHaQi=-kfWxB5Ld+e@TF2Xb
zO|?>MK|E$%t=G&x-=ON!QEc9dQ*D0%66Ptn3OKY9w1ja?h~I<X0O|0Ta&=|@R2p$J
zRJWC7*|y7YdICQHOTOxRktc)FXG43OBH^F>o?`^JKi`07>xk0T)JKhpt-(S`8TumB
zZ@i6Ar3&{n1vbMDE99%jF?vVPPME!tlq0=ZWb~*=l*1K`%NSolFyjv*QC)P{cNTiY
zL8Kkab^;d0X}r1@7Q4lm10>>2XW@YXh=M$oL7OtFJ+Th$>&l$$;1I#DKnNmj;xpI7
zb*(eWUgKpH2ASics?dV*>(E&{AnHRQD%m5SB&i3Jwdl`Lf0;}W`eYSZGcW4>vj7zF
zt)C+f76WytOJ;?XkkgsUF|!XvBAt?c=+3HjQ{{2A#Q9!unGXAyIz`x8Iy{vdBYmBn
zC$FHZ?3zL)E8%**hU@ZJm}#hs#KCt>8kfij<~ag^&m00~xi{%eI1M)OAqH|V)>z-a
z)t7Ik5}{alU3ZZpleSu?rBrncqf3jpXZIy$>_MtvcynUG)DQ`mx6OQMOBOloq`R2%
zam{qS+hF?^y|$VXVss=~!WIN(nG#tT;yu=#$`Coz42*dXe6wc|2UNqM7ozaiZG+B>
zH=NpYVP*&~641&9qHoB{C^Y!JMX!51vo#aRp;$0<pgX945v$%kW+)}RDlTp7lGAXI
zH#ZeL%V$QQ#cAJLIV4I0est2M&4`2Y_JRnKZ}4cUSu%>A%!Hee?+_FBQ*6e!iXfSf
zNY%wI&G=UXgt3|wrf=e~{9D=Mo3Kc{*h(x22-a)hw(TcZdx=dqDh~38Gx<M3<i`!y
zgXZ)_t;4ZwzJsG~WgJBEx*`x~+FhLPHdiJRV}TFwkfJF;)fXXSsYuMIrKKdAb{_HV
zkm_k=DqDyRr6?`Qwt7JzVo<tEt%o}amtiQIwCY3QCM5mVF>IjIy^pzJ1heJ_;T&hy
z4O-Vs=bx|n1Ma`NA18oj{`XjcoTG*A{eXbEnGL_LwYHU^g^l?i0|WQVlA5OXkBEqf
zON#JP@JYzZ0`JEQ?k5TKtpWEV2Y;-=%WLicP@!Z50w`%2XaFoMEC4137F7T>kF~b0
znGJxE<$gszO$%XNLw$pPE&lOS{&<WM_{W1HHku}e+B|0ZCc1wNFUZ>HntlK<F$1U-
z|FsQ0{U6`LK+{TA*9JgJ{f}=%t!HSbORZ;aYej8lXr@bTe*a;uYo}{QZEfg4ZDU}i
zt4nQTZ%*x`Yh_OM&po{#gwO^2F-#%D$@xDJSpEe;g@J|!@c-jO^?x?J=kVuL#=r61
z@4c;-%|B@V@s~gVHLs?%?jM*QLj3>I{^Qw@x!+GiD{GrS4i!Lu4?tY=UkeOO0BQw8
z9UB8{z#n`3zYYKh{P%wR<LBH@i3kE20RPy(Kh~p`wldfL1Dp!={XhwTT29x&M)ltS
zfVBVK{-4L8D5Yhjt9`%2e;$gUxs~btk^J+CzJH$Y=TB&;SQzO5OjHbXK*0TT{l^;r
z*!up6B{XfU4DU~gKX!};@Q*+5*O1Y*zJIQ?b*=vxV|i>x|8M*G*!;@BCtT?M_b3aU
zGz~~|stt{9GU9TdE~K8ju&OnTEC&jmn9uS@sw_k*S0+ALa4`-sIzl?15PEPF4lx)$
z@R+a=R38I4u-*woYAP`B5X0S%1^6F9tm;r;9{$n*|Cq1&*D%gwNAdr}P5<xh|EqEH
z{f@g21@;l#4FAUUAK_;F_xAtQxCwv9J&ywS2=4ziYW5%Devm2ur56+J-*NAwz(0cf
zf70jwa!LGuT})^nWYB*pZj|3}TZ5t?Jj6}=#Hgfwj>>;VrR?vxkx>vI!TrRjq<xOc
zf8wV59XBBg(j&N^7?t+}#ed$}{8vY%_3yapQIH?O{luuGeU8e1p4~9N;}$?cc?9<p
zqmu49D*uUF?04KBP*5M@zPGwRF9!d~jUe6MIx3l1o?w;9?Bc9M0{F@;1Kf0st^VEI
zpgIw3(Z-PIAgq?i29KbAVocKgtz+^pLM{C}YENXtM^Ha8BI*9t5&0LP7XBS|II__r
zsGk^*bbsr3{EJX4{*F2w+4vFEPmD(TzjZYJMX33IM_rC=@(?xs6JwG7ITl+6<bFrp
zj%@k}>L<qH{YwUa-ogC+bmlKQtHpmuJ&tVl2<j)sBK>nLwhVmy9rZf0`6H;G7>oBW
z3O%3IyuYKqK(=@U^%G<9{<X|!)H1)JW`jnye2ALiiLuD=9E&ZT4{XuvF@<4xVl3Vl
z1^m~a)twJ)G2$_WVR&LJGCap(OXmYy%y>*;?#CPcJS+c`e{N!Uj>VSF2ew%8n8GkT
zF%}u0W3i?4fh~4CrZD#-QBOLn8J}aZrSpL;PCTYC_p?Y(L;W0!Eu9Z+apN(CVSHjN
zGCs#*OXmYyynIYy7@ru6jL)&y()qhBvcWv2F!x1SPr86GJ;!28=L1{BMz(rri%d_9
zMW*LiY?*jqi;<5h4AT>1k?A=WTP7aZV&-EC!}P>hWO|OpmWc<pSoxU3Fg-CA@84SR
z=e7R7^`(J{2e#Pxn8GkUF&3GhW3gr8fh|rxrZCJ;j78?>SZtYiV2hiNDGc)yW0Cne
z7F#AB*y7b=3d8)wSY&>V#g>WRZIKQ3F@<4%Vk|O0$70LG16#y-OktRx7>g{=vDh;4
zz!s^HtsmMV%M)Xf<vA8xS|8YA)?*68^2AuYe=qNU{e5-o16%y@n8L6;F&0^#W3i?6
zfi3=gOkr4_7>g{=vDni3z!s+-Q<(d={{MLa_)lKO1>V00|Icsm-)iQq4|H+sF^2)t
zJTVx7G|$P{()vIcuOG7*Ak7n_5lHi#jV-OeD<d1+V;%#fd15#MX`a)urS*X{;yz|F
zK$<7UBar4fA6r@<Xe0GwE(4@_YCt}0o%uVx<NvRjYHxiYj@-yL55*Dq<Yt_|cQwvm
z1AF9wI#xaAGr%YJ;sBomvSs9fJa#>1G{F0Mr$29#|D*eC;B!E>j6BfC>BpP~2z+Wl
z0-po2W#oZCZa*qDz^4Y}-@6j$FS=$wFvy!nrRJ%9IKaPmA<ka}`*(w6gMU<NXrF43
zfA2b+zX<jNgT#AOYG|JtkbmzooWBV61B0Y_RBC9S8jyeQDxALv_5*|Dc~oj>pBj*V
z?;@PP2=)VmltH$A#2}yAf&+Z6!BKAMd0>#;k4g>QQv>q32<IO51B0A-RBGs+8j#Ob
zIQOt0805~QQbYIDfP5~)xrhD0Aa5U)8oH+j<Z~U)J?!5Nk`3Wesd;K24)D1U=N|S0
zg9JP(HS|vn$mdF&d)N;Q68Nap&_6XGpG$G>VLvcP-bbZ|{;2`^T#IuL`+-5qJ}Ncz
zPYuZDVw`)}4-8TV+3pd8d}=ce@VOdCxn<^oLC!uZHBarv0X~=G+{1ogkh_mc%~RWP
zfY0?f_pl!r<lUoE^VEJE;B!IFJ?!5Nk`3`usd;Kc4)D1m=N|S0gT#MSYM$DW1AH#Y
zxrhD0AZZ_!ny0qp02!Z$bXsN}7$o1LQuEZF9N=?N&i$GFz#!!wm71qE<p7_na_(V2
zFi71;rRJ$!Il$+#oO{?04AKtS{t<(GYFiHQxh_Y!rTKwD?ma3sPwmSAJ{RWP!~WeM
z*+3qZnx{7A0G}&!?qUCKkZeegO3hO{bAZpKIrp$17$m`?QuEZ-9N=?p&OPi021)m*
z)I7B}2l!l^a}WE0LGnK;HBW8M0X|pf+{1ogkn)d8%~QK`fY0SQ_pl!rq~4=a^VIen
z;B$S>J?sYtY5%CyJheXu_*|fK5Bq^ZenED4#2}yApaXoa&{1v~`rRPeKp&Nwr*`N7
zpG$P^VLvcP<VU6EsVzFd=Ng@R*bfYn@KLFGYL5=^xk%?8_5*{Ye^hFo+N4AK+@y04
z`+-3UJSH`?Pi@kneQwgZhyB1HKRhNiv`=l)p?z-BxrhD0AoU-U8rr8e>Ciqm>D<G9
zV2}=vNe%5&n{;TOn{@7BKQPFzk4X*f|8CL|*EQ3(F#rO9^!Ftq)c;(f1u)XxOX9zl
z{+OKq*Ove$y8AIdYC%I28(pjW&nB8Sy8OD@<~q9n`~Q3ov<H`q*coHYcefEf8t5Gm
z{xUR6?Ah1|+k;3)LuVrbH6&W;E=(9misK2umqC6pMEb%nqLpdWdCq$7=+4-6KCWCZ
zvF@yVtG;kc$=HE8KwKaKUl=0$6|x5s5;|HqgxraQggJ<oR#>W*7BUe#yPib=)-g2`
zR4_QI&u2oU!#hxE0iGs|JSn(<reDXQklY>JaEQ?_TXmP)bXVG1L7;%ZDmTzjQb%sM
zZ8%?$%T#xGWKe<d{cHk`YiSF8j91gE>cJolX&?Y95)z7QHXLa?-_(U5jhD#??TyL1
z6G1^5OpT};kiJ^ix9>gGkv=y$bb4`fa&o3>*q4zaujCN{V9y(YG>tF)KBv~EATEqI
zyMs>PT<X)kN5*Z0eI;~v@QomWhEoF-wUKpZAxQcq=C?&3ckcHf6)YM#`4AXR+(>SX
zPHrI0jq3VfU;O><RCf!v)qNJe%>UF75=yUuz*vM3Vg{46U?SDXP9gC-XoCdPc)5&@
zxcymB>!@i?;|JCRD9kwr3&wyn<EzFA@H=IMoiCow8Uv{YKc)-OcpADiq?B+#-wRaN
z@F8K)|17*1Ss=|OJe|1Vb~778^El>td9BObgoK!-F2-HU&b<oxBMV(lWrrGaOyJNw
zi4GM^*#GO-Sws|w<V28>aimw5@^d}?V3%y?(}A-9(nhciP-e|&_g75`$rE^CAoLSh
z&>GTe1Ig}pu-C%`Nazp%3nADA5Zlj$!ndk8Tc25Pii@LzJ+EGjX({f01=DKabmN+B
z8G`ZT|8a4Hb$j)R=aT$G^`<hf8{;ioW~%2g_zQlWbWkKHLm>A@alscMkw_4pw@U4h
zuy@9=#cpdxgz+KnGf9TU9mj=V>+T3Z?$m!Fg4`-~gM8JcL|wRJ`f1vl-iBSQ{_9RZ
z=T7qC4tT8)c^9~R#}V>5`rGs#&E4Ml9m=bkmf7pu7aAg4K26ZvcwZwye>J2;4REE7
z`tmNQ?cU*xw!0VeN((aA+_(l<l)aikhX#6TV{5ui9Cn!Bis_nJyc~i2`uWZ&AB5nS
zmew6wi)i;C+SMkvHrWN+%Qo`0B+5~wAmL8xuOh<TAR4I9+d`goI(fu!FTZG)Lwa1^
zz6F9H3jlx<s30|Jhzs8Qq1V;#4V6H=8|^MvZgxV#z_>f_iZU`l5HZEt8ccy8-UmFl
z6?ed|Y3R^CXiMe=8XecsH!2s>+0-MU%Ppuw@9~>6s8UaU2*yqd>6peCSXSL=hRLg!
z(SNqT6af+66OCH^k>$sc?x#sQYmm9!RP&u%uc@$uu%1zsB?WJ|>=4Mj=}JNFOaN{!
zS#Qt6Hwh4ayY_za*q1`CCOANeY&#z+?;?yctB*jphL|a5JSnj<jcK>b_0^1rzHbD!
zVNZ<1O6_UCL-7afneG@K3aqY|MiI83qhl*B)>+ivu`dc|Fa#a6nzymyTU%&q0@iHR
ztCuTzXA{?G<q-@p3~y|i4?p9c(G78Q4H^nA?dJ&N<wOw+MB7VtAj?Un?Z(ca(L=mN
zo!;5~ID=cGg1tEU`b*0gQs0JZNg^^GUi|F$hC)<8D$ittZ7S!0qwkTt-@*zd2X=&=
zd~5)mkWYetXc}$)PwurLJ$U2pA&uJJbH(D*Gy=hRTGjAsVNqFxn~$~|Y8kPIXv`}P
z%t{bK&PB$I)da=Sv@2^VjYpNu@VxoxFUSzhsF$y1K|AWayP^!vf(JlP(fx4BOmHN0
zu+>0c7~BFT6Z-b-`{$GV(gkzU5?)gcz<-T}nzwTbJeB!qN%DO!0;8k{UU7A6e?F?P
zeu_$04kP8;3ok|XL@sqgahQ)iPPXRP8^>Yc8=vr1rx53T&)W2Bx$-2<maJBX6Qepw
z3u#=gj;~?-quvN59#<~ocnKYlM&NWG1l@G*^h!mwmkcU=dy!8rr^J|RZZPhq?rj3N
z>id!b<`j8!gKMjT>{sQpys|C)9)(7}iyP}^C&|fH9#gPFbf7XfYE~ahXHoA<pE&No
z``JFb+E8U<`fm!CpRkr-yHollfHO*=R?E;d2HNRkVN~;a66A<u8r#;g3#3$r)D;?;
zuZy8EzAAF2su~0l$<etOwa}{>{$yE~?H(2I?gNOXlQ(_+_+?pS=H1jXba9OI)yOH_
zxHFA1X(5z;W1M;pC$rAJ*7d7%>EaN(&AN|<r(W+o%5IZ<$z4Paagn2i<fqBi=g7W&
zQ^#AHgUd`jN!r_=3771>v{T%*avwPi5~tWNjID*RFJS95q_Q1kn#B-V;o@A}r$9F-
zRl@(VIT&KSPuq<d+A`(p1!h<PmA3e^EJDM9lv#IcHCHx#P2f;6Lg@7Zx+De~4sEG{
zY9yP^@fgQRk_1C;`aJqTeioi=gQ}-cy^7t3Hu`!z9G<Wm#Z&T)1I!~W+n>LLe)hbv
z3JiM(*?%gS)<YJzTW&0&3vQ`^AMc8fKPh_CIYAyG<UD*4wD}dq_B=#YDIDXN=i-Gc
zjz2$Jm`a&xnNfNC4k3K!_Lc_QWm~OqO;SY`4+v9YyB!*z$sV@<K}~<N4_~hYf3F+_
zb&pC!I22UiMI(LCPd=q`WMOX@FO#a(9ouR9WjqmX*<-w#UNgx!4$RRc(8&H?=DJT+
z-;tUsnTRbh;L5{T11HRV!!e+%CTd)=&*ti1ak3wLy}BJzvG5LRWC_@;dz+0LDRR)K
zC42m7j=Xlct}$NrXofwaApT@+3A49@NTiu>@b<HonV|$67cIHc8BX(yO=FYviy*m`
zT!d>;Q-fK=)LjL>S86W$kr~HHt!r!7O0o_Ro2v=_xz|Tpc6}E?o@2eoE2Fi2lTgao
z=^pg``;^r54cq<6ma{zl7r4<<GpiTk*A?3g!KL+ch9mjPVf0yvwidS~{1XSLy_dX!
zjxYpug_5PNTDDRRp-vovpw-E2;wvfiq~8UylTS1PT0Z!>l$*X#VosT_w<SJW(Zd(F
ztob<{R!=s0{!8BC<b{@`;!<F&ouOoW+VVz(g)t1#x?KS2=mt+;m(J-p8}n7Z<f~sR
zyy1a%CINzHvhXqc;%Rwjs8@1CBPKB47cnc+`wM4-PEkdw`E;PPqJ%|#ySX$28Ol;m
z7in`BYb{IA%a7i?Zu$urVdu;=qGC48o8sSGik%!nddoV*-)B<Zv>%s28+v*i6X>)g
z@*O^O2vjjvuw6qh0!m3|2gg#P<a;05+ZyLJ*CP6?8-{QbvH<qB$ljwUZ5xzaA*tJ5
zYaw-#3}1u|@!EB@$&FTo1ddWO-E#P~E+V0!pkR4qgZloT-mzI9lhVsebpcyBE<JL|
zkT5P-WrEnBW^XmcRAT*+jJ++BJ!bj?m~M7oVsX!lmr$B6;KJzbf_6{EP}YsK<hGg*
z(}<bQ1srT1;V!?n=B(Fo$za%76;V}SUaM281g&e{R49#w{tyZ9NuHf`*E;VRPQs0p
zm^b}OJnM2+NLf&s@d2kq=Yrl|!mH{P#ixkk?A8+PRJfnNWLn&42<g%Meo2Ghgq<Z&
z@OB+aN-fzqmWZ2Aijvc<h?t&_5n;xYg^qcpewKfsmW9_`E?x;(4aZY!l{h*`@-FoM
z+^Gt23fbq~Nh+K4gYQ7XKvl|#nL*_HG=w)Ydo2e%w-ZV|dH+j$U6$(gI@sJ*pfUbe
z2a1zeT)k%gUn-nA^&<AC@IHQ^XwHVkhi83^+0tsC7{WE@*0~ucp{cAOcW+RIMxDYW
z;}kkp7fX!a-*Te39Lc&eDc_(la-q#hZzXalqC`;FPaz7VU9v7slE$Q_CGgk{RW&fi
zGDJD3i*$K*r%;Lh0s|Mzs)PBwC3MIgSW8tDMqA%)q|LL1h|3|B-r<hON}a-FZ>dg@
znV$Va-~&xP?pM6C0l`+>9ygj{HZ=6IL`w@ile3{wK{O#o#S=!^MDOKXDcN#BD3P6E
zJ;-hrzhn>S_XuM3!X(Q=qR_gX2xn{6oL8>NN33u^m8`z~v>#bY4L2#;>J{j(RKe*L
zFDpw_7VpQ}8mD?|w(TYLy=-;IY!bpQy()xxaZ{&mqw>XI$D+6p{Aj)_duh?2NUR1M
z*^Wx3wC`YH1Dh9_YsJBcxSVxDg;Ych_@_z}e6fxLPXeRN*h<vm*RIJCS)?>dO)`#`
zp30Y<ebU`|k)H<WT|l{xTn>QdiRjUYT){6@V^k-ASAC7)({&N#0UUf}mmy3(xX`Bu
zJ%T#)0Rnq`OLmpon;&7wS2=vG?TrE~wvT>Qiqzl{qC!K}V@$Z!&E%>VdN4tU7D?lu
z_n}*3BYY5S7x&@2HaVG(6|OtilG}0hA>`^_?m2^D#+tI;)@hSGD>I~cHA9`r2V}e<
zEUKErkC4&pKRG&gxn`IH1-vfrEhy5mbvZJFlOwH+_s1%MadnrJQ%q0@V+cdZYA{sZ
z$ELm#f;F#>BM3I2g+k!+Rg;~6$0_3M1R6V}Xs~;XHDMQ{s{T4guAwY_o#>VR7-cJ5
z;U{xIV=cLHg|^A0O#%eJ{&24cbx~BrM)8rxHL-Z1@F$v~1RdDDA^Y{%VFiO);X)l(
z)#LMl3dr3JlP^BMEU@o74V0`HcF}giyj1WUd-dZs2jYX>b%2Y(bxTk$2imnjQ@b~T
zSoLZ7z2uvmuTP^-(n)UAu);*(cERmR;q9`K<2wp6$kmB#1}D+bCCM!k72aIyc2b_4
z&#l&IR~~ix7tD>8yqlJfLRrQeLD66oTXQDT)r9jTthgejMrbbBMr*^y>u`nLxfPZC
zK7k$0xfHTdhLX_T6%f8k5MQq}jAbN+&M|0>zn|&gr>Jb(D@zMFU1V#qkfu<H#crB?
zEnxsH>tR~?>#7*u`M{xRXm+i6Ey<4Jxbe`6cBz$z^|%L5Wc-J~O0=@@^c?L7?T(Vy
zj>|+^uGZy&3D4o+4&H(Yqe`x0F6VxKyy$8C3v=fqfH?^O%O$J0dC7(Irx(u@C;2TC
z@P<5)jm2CCd;D9>yf*9l7l<jDemZBm5~R^Xqj&Dzjvr0RVNZgq-}s6Yti|Ml>BES%
zv?eqYnyX@3?gj{Fwg<)PN1Dr8>z0F8Bq8P;P?QJm$0IYHg8aB78(l0={vHwyUzsHf
z8?Ek4vO6;XDX%C$4M`OmC@|$kKMv1`5+9EqInGuiVUX)-)=5T|Q0I|pDjL7z-R64O
zd0Ej0UKSqfTXH4iAW5y<O7x-$H*RiIH4a*5&}(h6^sEHFH_3cra@anoe?Ex!hioDq
zMPU&m#rzvXVNw=>1{n51N-gLmJ?+k2#(XNj1l{N@SA`pf?nNSjAqrExpB1l#<O!&c
zg})}WaViix6wjC&RPhopCsDeuI?|=hIv6U;)aA|;i%4yj@Ic<8w-H|xqp+E0h?w<u
zbBM;*hLG7?rFT^J<9wUKeod3c8nulaop6D#cLeb<)<;B{s7b`q5Z-xd5mAB#W{Qv_
z`d4u=u#&vZ+5G1(78T~r+Kn81$WMHlUq6x^_6AWQ^?D~)M@{|+<$&*B&M%|si~Uj&
z=?i^Svz9v)P+ru|Sla%=3o>K({HD<NedwO;E;!)ZXOUXRVV12QOyu8JoifzoQXKaJ
zU&hZf2EQgwwg<ez%n#?swB7c^8yTT3!wQ_EGw5@=ivgSj4ws)F=LiRCuTh-(-n<b^
zz8!q!P1c<S<F98pQz(h@<1lH$+^nd^b<Z&s0hNZ)j5eLtMaN!JDKNEf1kL0V;#8WF
z6an2{uAM@|VK3^DJx3f8+D_Dmi)#KXz&eK6d;VVzd|rV;EYJ~S-#blEAhGg!b&gqk
zd0vtUSRbS_R}Irf9fGwqy?=+W0~6z=8+J(^SVjaMT(8%D@sozG9JAImW0HUnxUwF1
z8`|no`AIGOCT-~hjplkYZpgZ6Up*172<N$-M6>raH>?C<#1Kn+pN*5AYLE~Qi?e;j
ztHK<<)a`@B&RH<S?8zWg%_?_2FGs9d_9h9spcEJkNfDt&LcUCvpJrwq-sWm(^RDQ&
zkxrlA_YMka@{^q9NW`BW_sS*AZgx@Y+<+P$9rba>C-U-@_dCK4ox@4Oi|uXZvKN4;
zjL%f`XISK(ZXXS>6j%Y(IrA-6*&1964sg^}Us(>NZ52ieRC%R#-6EOWG~hWc&kk%Q
zTdQvRU3xV89?(Ry`nT?cR|!HKEs6yPy#EzZnt&&eW9yjVeeju@*m#hD+T<&j@zN&+
zBa*R^-tjFd@7Tj4E1@VCNoVvL1JQ3{>|?oUI>uXqdd<8VAMW+dsNH4<6Kj?HZlz5L
zXZ?LH%@%oOTx6xpz&IaunzZrPi)>b?O%NPCuqBxHLq|J>Ags=O*cEEFmbq_yhNpY%
z=tIn!CttIwG^7HK``$XQ8+|C*GWPSE<`_p#pE-UlEFWD<VxGZ(+w$fetzz}}5WIR*
zC!CSIuEiYAY1R*^271-~-C}WKcJY@49i?+ufSM3Nf%*`VB`cA|E}Qv%ERLmUN1CvF
z!`0%dz&2E)pJd(1QJ&}s+KVPHo3GBuUJH|kwZtGX>GJKP+wHv<8(`FQIR6nTM;z>w
z3|2@1y?XQI{NVdxCIym0#z8gP90#uEk!Bv=`{AnNo!jehNut?WjIoe1u^Rp!#n8yi
z0R11YvJlFa$Za=NsSZUf#-a<jlvWPM!$^>4K&G{-P1SbH1<ZcLrRS?pVB%ISW(XJX
zHB=~(m{o{Un(5bX*QG3?)Yo7KYLLCwuYxodAf-|3F6yq5)v1t5A1~-}H1V2D7M-pc
zfOepka+)X)g|txdBq%6%bpZ3^#epP372><-bQU~>PNn*!gU4=Ru>_zy2e5n#t;vH_
z3SylZ^0FO`vHrn4I9hfl=6hMiAG3T!$4Um^NJu!%yaq9is0~zAP@Vq*1(aLPKvDO(
zA|HuOtTAswU#Hpg-$3XyO$$ih>K55MJS%WE@xB!CN!ngtxCtlV_xYlg;}iVD-Kv9T
zzChl)dQTU_s7#{U#@1Efn`+eA1<G~0&5{yv*#d{%!ckPX_b7zJWJN85C6RA&?zdB$
zMG(iJ;Ndv=aTG9-YX`-HrA8UJ#2V^6rPc(3{KD{XgDCNIm+DEGkI5JD=7C_J-fA9}
zpBo@5u#hm?4~JCivJpDgtaUCX#^JB63sspYUFfj!uJ6w7E)*IwjW;uPYKft3FrQ=k
zTY}GJ^(rZIaK!&Qn}Vc?&km0(DsbKDWz3yGlKgO+L3w(ZUaO%<CrnWR$0g>mOQl<)
zCS6f_^A>zDUK2B;wX&%uuY^V(*4d?K@-o~?6o1I9%>xN7Q1R^w=Fbmr3&%8mZM<%O
z6J8lwxFp#%ALGJX1&JxnX+0OqeX+c?AQ~0~Je6iLP?K~kWpNLS<Wf^#;jd^&#l_M@
zDVfD0Ff|h{iZw2G)4H;wxf^XgEL|moTV*?faHOK0-SwEGwjEwEAh~oSpEbsyOtFG;
z)DQ;J;ZlTgnF#Mz42DZ)fyw8&jTdkpXIE;ReJ#Vu%0TCkm{&6JZbpY2*SsoQs7=qZ
zrhU7konPNBE-blutQOqymCwW6iD%^&R3|#jl;p~z{mYb`%t~gtV8*fcZE*@%vq#T3
znyR=prqI_PA5EqR$h{q9gno3@{@V4O9*xM<KIi=mgD7u{@4gdD)Su_Cw|B_Azy20}
zH-3q`BCoSNmBLg3LH9@si%^|uT1^nxi=?p1dFe1ePCikqXVKl}SzUFqs?4{iS0G=q
z_VT@`dk-NVN<YH%*U^>QLb_7qg6L#@^2#7Nw_hLWsVB|PYa$SXO+MiD*vg1I=dDFV
zUF$d@bLUM3o3G#i%1*{y@h~e8JIq!^FJ2*|Xq5PpUMoS>-^6R1zvXAp_~kfxl|c7x
zKuDZN2%2n>8t+Sw>QH-~>S)iDxJxqnr6;u;eApR1n#~+NoiyQ$$(4tK?}i2dw`xeL
zYH?c|GH74l?gL(wUbdn1SV;ve0ml-hG)${+6LZuL;+rq0<%wf?VQZ??pt98d2m(k-
zGJ9NO*96~nyWMmXXsI+ob<nXTxzz~Bzvqxei|E&rq%x4XYIl9sLpsje#7FwrsK@|6
zE-W<Xoco<uV(;zfCxT0#VT#)r4*boSD<d#!WV5|W!HZpUlBIrWWGJ({kn}X2c-jDl
z$hG0s+tzxrY3G|nwo1!W->*c1#0oBLuiVgt@Gr9Z`(V;3;M$?=(hLgm{6ve{<cy*$
zWH-M7YgQoNT$<7t%WeMvPZVNVrU-qfe|m#hF(9bmeEdT?4SZD91__@RvB*Z|8&MUv
zF%FZTX*<EKW_iV@bsjs*>8teIj4oAKn2eE_9p_ee3eCb3Np?-G7{?b{vKjoT;8RPc
zqlDmx4f@9PwQ$=@r1qsUS_LbdI+mZXOyAP%x-1QMjWEl^U67;&foWS5S9Bjnt14sJ
z;+G$@nf^>_EJr2_APFhL;OK^8=WiUzGB&4K`2^>y2E@f*aF}~DGb+@gKY7@0|86y6
zE&FXEU-w4gH-%TC%YK50O>#BXcr?O(7;(&UQ7W8zrLet<1_lPZ2C+Jc4t!~~Z4Ri4
z@YHIhBC3h<p<620%_N$dx)g+6b=2eYj;gWruyQ2h4wHGq>{FgM)<2I2!44*8wv?x0
z7xUUxo7&REk-Mh46<!78FhP-+lZ)QckWS+zzw<+@t3LeBH%;W8hE~7x-HEgD+hY68
z%aJKbY&tliq$~0@7i-~p6yfvjl9Ww2vi=4wyZM+dYbG%hJ-4&{S9Mx*`x;3L2W-Ck
z;mQe7s(v`UDHQ6k*X)$KaAqI3NHqE`Q~ZX@Jme~MsPB~O8=4H~8wBZviC3>}Gn;6D
zv0-kjwO;^M+;5PKioc`g=*L0@r*_@Fl{Gd}wOE*Mza&EbY$Ufm&2ZgkWE#MTus4<*
zdhsT0+jV|u7T?OD(Uv-Ix(HSx%}<oX@LKvN<c#wLHU6w?HjX(ld(p}da;5lhOrc;&
z!klnX4hHqa9om&cCsF51a<Ow>{>{aa8O_tVZ91mBH}iDU!~ci9_YR9<*}H`af(S?y
z6cdAhNRVLyIZ4ivRFEWyWC3B297IHN5=1}=f+UqBf*?T@1rs?)M#(uUS-3Tz3`gdD
z&OMv`J>R>}<qxsDtE#7Z_3El#)m7`4)xj5uQ#vjwN3<YWK~r-wV7e>BNzF!xA<kma
zzi}X`^T*Nq+6nc~qI>0}z7Y5Duvre=3Sv)9Iz)EEKFW)PB8^d#l-~1JZ(Hm0T=l*p
zi<iMa+F$B$QU?>W<+pQ+X}xcBr?qvr8cR4)usYoTUa7}%+UYh`FCO&b{>nj{a$E*R
z9oSL%Na~JevVg%94SUtha2LGXw|fL;ZOPx?r71P}5=J=PrQxpIo0j~ld)~}Dqme!-
z_|t8cn(w{_x!HH68e`?JxmeD3J$Zv){rN-=;oB-iacaYX`*Z?QtE3H&R#`8}GJbK<
zt4X-*Lf3WOZQ^wI{Q>xRWKUMr!ea_+uCq(}^IFsn<Ksnr<@No1{J9~tx0-wi8puX;
zh_wRLG-T+gNafOO=Jr|KniwIy9_g(-!uC!5po997TjY<=l$QsXg+3CgQM~tUSFXF+
z+A}3wNOzRQe2&cqFW%T+PkP#0(5E2O(6w^(MrwK_wLH?e`xJRe?a_JqS7QqkTsl0A
zGt-|Qs}qZjFFpDUPs>}(<l|Tzkk1NvXHsEq-gBzWB2{<4qr+&mWYgig`1FR7-j@cK
zA9pgSj?G#Y_p`({i=YtYPr_q;nw7q(y1DIDW3j)NnD<_#hetxdNF!QHhw<{7xPZ^o
z(m5l=s{L*3r*viah#Ge+mI*V6^sqi`f5`NV)0w8?tfH4oT>JPn)h7^6-b*Z+mP-d5
zqCR{6EQ0GkG-wa-{_w(ykwi7`QvCzBkPJ~O_2qj#MaDfZR>)=MPu$r){S&@DWC%KB
zfTR)1(AMmloi+L!DA+%1U}*$_Dv4iIkdkGWP*PXtTmLp{f`XtwmVyl8;?|B3?bBdY
z^JzG600J>~;1FIOm@Wh=hB7p=vV-t|nTt>fgKM%zW+tY;cG16rPCJdB)(DleGq`GI
zC}w4H)d>APR^86X@&W|RyM}81DuV<LWl#xI16y??JIE=hm6??h)EfLlnK?r39IRP4
zY63pp8bLNb0IP|JK#Z)eSYI|LhHr-Z(B?+KI1!Nl@UJfBMBCzGp|2m*_xfc!r)vNu
zbYwCZ1S)QTGTKP_1GayKcjE@Afx4U7qU_LD2LTpLP(_1ZJHXoms$q7;&J+dNxQ2hZ
zD6F1uydWpKlIWaJ6<cdVG+x?J6-jAOIU`3q-L0_vr6v3PoJg=Dnuim(mV-MiZEb70
z9*zxMwUU9Itr=hfI{Fx(0s1e<p=N}#zHV!1gn|eOt<z=we-Juhv!ee~J+Xa>$9${j
z+xk%+p~8jpCee<|D1oA{+K9;bhO91%1R;z<dH3mXrc!Xa@k*cwP{Il*d<pMRWCHRW
z50eywR(b8_Y|>D{dlp0oacE{Jm$5x4SS{bST*AM{`Y(mm_^-lxb7ImjtZ8MaaIj&G
z)$#2=!ul^o)!+t<M*c70HSgcR>&@x^!5g&RQ$Sgd4Qi}@Z~qC@e=VlQe-*7a=Scsi
zH9qAE7OZy~Q3L+Zj;O(h__wrH7p6uiVA^YS0tI0O1S0~p*96qU3M3K)QFlTQc$@CW
zuWF%ya^e$&noa0rGU(vgmQa&m0~~Ar$PHdk{x7@7e|A)jzY%bWKLM|%CdCFg)=u(2
z0sJrZkuWY!e&Alj%L(4<cJxMqsp(&OA^95tXZ#cJZtDHm0LR)>VlLy~zLfm4qig()
zfE)e^_!Ko6Ho&p=m;V9a|Lh1Ge<R@3e*licr6$J$_)ep2!2j7%HvUGy)&B&Xikbo&
z;5+R!|Jji?{zkwNe*(@(O^FTgopzf4>}VT*BjBh%0Y_3(VFP@po#sC~;>O<yILV)Y
zOHor}1AM2Q=07{?#@`6I!k>U&pgw>F@SR59fbT}$03(BcF6tMsp)$n=_D(y@-N+kT
zhhZZ&#;XKwQkh``d#9b{ZsZLB_U3@qZ?MCu%&~#J(~fdC@&*9=&qe(LkEkrLfxXjC
zayRk@0Q=8H{Q`N|0@A~G8hHc08+ikO{pX^7feLJ~8{s>RyaC^hyaB-8{C4)c&HjQd
z%n5v_kvHJGkv9O?e=h157{cZV4Bu(w4ft;44FL9^i~0o?v6*MVcN%#Ez8iT1fW7%$
z_*XL`NQ})q3%=9H8}Qx88(QV>w<$<3tfFS8kvHJGkv9O?n-h6{caRZSMa@nlZ@_mW
zZve2jDad53qGqR&H{iRGHvrh%6l6YDQM1#?8}Qx88vyKW3bGQbsM%@c4ft;44FL8w
z1=)gC)a*3!27EX21^|1Tf*i&wYIYiV1HK!11Ax6vK`vnxH9L*G0pE?h0l@xKK?;&!
z6*W7JyaC^hyaB-8rXY`Ea|DL(H1Y;~H}Zy7dCN8hc^9jw*=ghr_-^D40QNQonSxc+
z>@@NQd^hq20DGH)e1TQe>@@NQd^hq20DGH)e2-Ps>@@NQd^hq20DGH)Y{e>Sb{csD
zz8iT1fW1vYj$jovJB_>n-;KNhz}}`Hm$8bPokre(??&DLVE?Hg1xc}rnw>`8fbT}$
z0AO!ZkjJo!nw>`8K<q}|0AO!Zket}UoFH}@c>}Q<c|)uG?KTCOicQo2-+Hhn5?Cet
z>k|F{`-M%2-N+jN>}?9N0Gp^m>@@NQVmI;z0DGH){D4i=Aa)vg1F;)<1Ax6vLAGHN
zHHfW|H`eV>(2gIl|DYM!Zb+l4eA(Q{aI3`%+HnLptZaDBz<_@YSke8zV+AlT{|?r;
zCkQDDtb3BY-KHQ%v56YQrm4-p*sA;|U~e<C`F{}XZ3=P)o2Ws+cG^LL@H3k|?7CkL
z^#4Jy|5T8I`>}}{1ZXifvHEMx$UoXh;BRV1wkgQt*hCEi7%}Y-?7!r;^Iz6xZ&Q#^
ztfB_C!(|?bzvj2|Uk3Iz1$hQr>_)^+9d{7_Y{#9yrx|J7rXUNkiW=Ysx3gyCpY68u
zH-WuPL4L$4YT!HVAmO_mWO?H@1=)^O)WCPzLH@H{cK#-2Z&Q$CSVawdryb-!+hON#
z0(+Z+{E1c6?9^Qc@y~YG`J2H0Q$Y%nVHGuqopzA_Y-gRn3G8hOk{+w5LF}}H{AauB
z{7qnQQ;=L(MGa!79ppdTQRi<0dz*q3!zyYJJMAF<*={<26WH4nq#Cw>^oX50=^%EU
zbhOGpZ&Q$;u!<VwPCLk5CmjIxHU-&%Rn#DN+ClC*=>V{|DadiGq6WFs4szE?2Y|gz
zL9SvIH6Vb(&IY1(opb=$e=0~pa;&0er%pPET_+s?_BI6x!76HY>ZF6%b<zP~Z&Q#k
ztfFS8PCAHPCmjIxHU%k;Rn+X%Ne8j(qyxa-rXba^ikh7|=^%EUbO6}f6yy~u`|ZAd
zh@Cp=Aa<Q}w8}fSDacN&qGqQ~I*45-9RT(=1v!CL)a=wr2eIp<1Hj&<AlI;pnw>i7
zAa<Q}0N8&jNI?p$qGqQ~I*45-9RT(=1<8O_)a=wr2eIp<1Hj&<AmLa=%}$+k5W7w~
z0PJlFQUa@}*{PEbV%JFrfW1vYp2I3?cIu>q*mcqYU~f~9Mp#A7PMvfRyG}X)>}?9t
zp2`7>g50T-4r13yN2~nJHU&9}Rn+X%Ne8j(qyxbIQ$Y&iU==kxb<#oXI_Utg|5T8I
zlvqX0PMvfRyG}X)>}?A21XfYAQzsq7u9FS`dz*qpU==kxb<#oXI_Utgw<$<TtfFS8
zPCAHPCmjIxHU)VetEkzjlMZ6nNe6(vO+gxC6*W6`(n0Jx=>V{|DM$ybqGqQ~I>=oo
z9RT(=1?fxWxLrXacj}~r+;!5?DzE=jK?>qx6E(=4I_V&Hopb=$+Y}@fHc^Azsgn+J
z*GUI}y-h(fViPsUojU0tcb#+q*xM8&5}T+&?$k*Kx$C3@z}}`HrLc(_<W8M*kg#1R
zo$~r^3i1LrQG?v6lMZs%Ne3`{n}RgKCTfs7b<#n?c8vnc>$fRLM{J@7xl<<{By877
zr@Vffg7m{GYOp)$@Iv70ZLyk&2n4EPYi+1*WCziPsz^#hpy!Mn?R3F$BdaUG>5>@v
z*Q|8-Igwy46%QxyJJE$e(Oz`xZeB15RLQ{3*31#24Fl^>xFG9)K@K$|l=XF6LnG9`
zT5E2p=>JqtsI=KtJ0n~0BUcUVj3kW=t*;nCpo&ITCU&L}UcP^8HHQ#Jp}hNaI8!M&
z-FPKX1Snwz6uyLaC^7+gj%VFzu6FY^J9H!LI?W8_G8O@e-AHH4CH#A=|569}UxoE%
zi~S30S{W)FY*=Gm1^FLg{g<5PxB;V){|k7{`#11<b6g~NgVuWrDC@C7jdcy=e+2bk
z>K*^9XuT;Of72SDas?aKTb4j>G-1HDlLtu3Ukw@@v}nNl<<}ni8Tj4O@$rGluh`nc
ztwagRBRF@i`ydaguiZVQmtA#jXQ%AF_c-{Zu*TYB^$#O%+_00ocT4T&t@_R&$mve#
z(&*ZJ#*fm`OzwFR7g3k^k{YKuIzlZaOEOjk8I*4^BWd;Pcl)Ra-=~>bu-n++pMe}d
zOW}LO%TsJG@e7}o@iP5duYv;cV+L>565kh`DVy4RpX1PK**VlL+=$LGTpb}CPIm)N
zTmuTCvi*4beoEQdQ4+qt+4(RMhx_b}XN;sh{;wguLw)^tdHJ5A9N)jN+-SW?P{z!}
zRJJO36w;4l<EeGS0Ee5agRo~RJCUH}c*3|Tu6J3<YWU5Ijf7+lRD^iKLPC76$ub`V
zb2j&IjNy=<>L5%Yu<<S^kl?_56+@WjbqwP1qr&esPHm3o%hhjn_Rnv++|Y&G*y|<S
zh=+%A^R@B1D6WG;{t5s2-E+8y&r-C|UuhC08oMFtu(#~phaV0vMUy<3Z@zKg>vfvr
z=&=W7T+maW59<@;`QRY?&hvkZyJnAb$apb;DBt0_tMA?-uZMH`vwPo6w!Ox2N;koo
zk_hnN88ZKdcl~CO?Mv}-5AJ;{cLxmO`}HH?!DM6<hdO9_HQb)59Q1uxUg3hfCU8kG
zDrxWML2FY2?~7VL&EM8n?Mox^5-1XjxU$A19L2Sob5Y_ejtB)2)BAS>H~8=&akM;T
zmlTE^`NiA?J?_)LoSL<<wZqdCM&eC-X^9=zJ0y~+??8kz(L*>ryZEB_6;UrkE^{Lu
z8JCZygu$V;D;``DPgl#xsjhiC;{}S56ZhfWa2NLQ5cZ&mPG<bvHA=rUsZJ=Lo-WJ&
zB<E+-Y`317nT+xca-UGh4X^tlBxM9Wy@W(l`|ws)@5@q?{>-JQ70r?7Vwbs5OaDXU
zYU3sDqBU0h0a2e@1XJBQBf;b&-Z)Wfltue|$*)XLmH$}dSnOt4^N?S=+rP#+w{}{t
ztEy|2*K<+1V$BD~{zjY2PdTU1tRpK12O5vk6XMOiI_LJD9{(v}&aIdHD_I)*$lMR@
zV~zUZ^^|{AfGF*WP$Tu_^nKpiEh`UWM8{sfI$3?3*vnMlrMQPKoSXqtyS8w=;`5t{
zZzpWrLI<o5_8IuE7Ns5i{HAW^OM%VoG=3Q;r#_Ul)+@1KaREm}ha6?LZ+hjlF-|D;
z0Feg~t`Kr)Z?>)7>xJu?;&?cag?q!~Ohh;_YhJf-&IG)w^SX9}Hh7NQd(~+1JhXP^
zQYZ<|tre0P+#9tXuYK;0trF1Jx31wA9#{LxK3krKb8L;E5cf=~hnOBtZU3cEdYoHp
zBq9;x)8u{bvi(1fteW(4bm-_Dp9t&5^Jr;$H8rU~f;W9jQ~$G~fR^vAw&q*z#c6jE
zr@povI^gCS>TFO}1&gxwC!CBt;+AtKAvPNJM2$_f;T^NJd3y@yxNBG*-}SLvJq7i_
zcP<rTjIWH83|Gx(R!MDy>INUo3^Q3ViU+&<BlE6>De&Grc^;`J`0`fr^%p$&nNanr
z!C^^q>BP%Uxc&F&o3z`X_#@8Ao$~%}uaWmvX`n>lh%3t@#mNiDdL)`JE1u|1OBFp&
zX<71Cb}H?y_QddHY~^I)h%cF%J$V=3BXY0aPk!IkKF%aw7^xcU=_qEm?UrMif@we^
z80s#3GH{(fsbma4@$_JcV{g)sv_VwnqQ7d0zJF27P2t=JM?>V61+TC>N7QQA<xt&m
zInqz|l&Fd+KE_9?!2OvaZnw%O?UX?)vg)Hv?~+1^792ZJ`Lb6Is@i%W9m-Bmy|x!q
z=5+~<&r;>~8|9^v@>`tYpLCM%{iYfF?8*@rmUB!~%{f9V<98%&sVNlQoXV?W)OA?v
z>#scFyg!k||3S?vj$4ZrOc7-2Q8x=RC7n)-A0MfGz51E+K){^WjUW5E8R5N<NxJ3!
z);&gZ%5$^q;^9Pmv<@|GPG*E-Q>t$+E=4o-$Yr`dKP34~XRqdE!#+EnJ2b){(me`h
zOWqC)GZ73;KeUN58jEQ&_|B33YL<vEaDX=HnP3H{jV7D}b^M;B9wm}4+@)O7yw9#l
z<nZToi*J>st9tG>T&WTn-VQlr0t|gq%tV=wZQpYnl9A!Qtt4(2)*CxPQf6x?I`dfI
zXyiFc1XO%n)R5-{Bd6=xFSpgd3Wzf--<32lY}NcJbkXmkc$3shdPJ+A2m{v`p8D5p
z%D2^m3(d#IO*M%#sOOkY=<a<L%{a|+zN2Su+Lbo$@?z2(Xq#{y=~96HxVuD8@ae*q
z)vtTLuzQ4mxcr*$t%SxAeI^tK9`)A?+V<+AByjo0JLjtuE7L5_dzaBkzfkLabE>pR
zsm`4n;yDHXbm>`^l_<l@&q`i?Y+Ziv$5PR;j7XnyAz9|smwtx)!oeRowIyBZzD%>n
zUF;UWtGL3bGw?2rKkG1Yssms9>iL{V>&CDz_~w3He%C*arF0~TBn40VR-TtnO#Zqu
z6Y)|mqn1y)p=W=g3}5)D1Fc!<eO@XbwInX_+k&44QN@x_t|$H$%~Axe9b}=OoLu)H
z^sHozS?s1RmffN4N5;F!mwUjZGF`&ELyA)MNqI=5S>zIk_T#C&y~8qKaivq6_^$Gc
z8a?E`?}`p_Q`H2D!HO+J9GPxnMr0lym1RaMJuzu&-#9yL-zI)TO!jA27IkYpdT7Mq
zH0ipQDBzF(DM4gjGELF)wd<*<H%Yja`W!aF@2`*01h+h_WD52QQq9DxJ%904rBK38
zr3cglIAv-+a3<UmJi_+<A`RWw^sfXKCK)_ZJjOoYO@`1qs`{DSNvL|fQ74x#ojoJr
zG>K{l6X}uhlRep=#p2jwdE+FaqQ664Taow%3^7Pl_2v6;vX;i8#s>{magkaakMDhO
zq>;Ih{ZN}XK#|ZGC%^I;bJ@$!;d%%0Y-olHI&3pazLCWDrBj@8Q?w9k>Y#@9wXk0x
zQh8_CBbvm1O)=yfQyq<wPw%Ikqu1)vCyDNKRYQ7=?jOYwprWdq+h6-VIr?~Su}`t$
zM6WB;F^|sYOHPIs>Qdp(9%A$#eGbC47tV_*jQPf&pqnOhseh$4?c()A{dA)LAxoPB
zr#_7c91SgONi`}{R{Z>UGQJX-NM5P<S)TZj&xPDHMXgar77_ysNB!id1kZ1|b_N6~
zoZhF=?(16JcJeVpTLaGp{nppv?@O{9ZF1djh)}1Ed37So4tq*P-DAm{ihVc|%pp~M
ztud+c8O`;!BUe+XWQh8m1zH89j?o@K)Z17#>C#6T88sG;>xme@7<FYSkG^X^$MC|-
zQO?KmGVQY92OkF>M+rLa3{tup651EuYT8er<RVWw$tcXbgNrRIT<TYJJ!c;Hj#243
zQA@Hqy$!6m-y=MFDA(i*=!B_c7jIIJ9qfD`rFK>>ZQg8hh_A7BMSZrTuAx<O@3SWm
z2iQR1sXEj6{(45KAQ$Q8nd@V+HHu0fh`LH|6tUN*oXsKelQrHOS%}NKk3U)^nPIOz
zgXN02Yi;ztDmmhWh{djiMdI4v(-i^o#55}(>f?`mjV~@z@IHhrsJWfJD*c$=px4GP
zQBHX|B#P#~ev6;LDTuC2ONL7A;xR4#2%dntJEFl(<(K=X3Oh41%)~~0OrJi5H^c6(
z@O-~l5mBX7D$n%2rlVgPT8OGhXLRW-kh)A3GygeGe^571@z7W3nxcS9WKm{)<U8Uc
zlCLUeg@jp>!U&K@qo}oG`?^iX$y#1LCTBi(rk;}GX;5)i($(TUL!}j~bIg{d^@R7{
z?3GzGp-|5XVDLYbAK2KIJk@li!)oEDKu|XE=y2qD5^;y<aCWIo_h)x_p|@+u4g0KC
zb2VB8pn8G~uIZmNzoo+kw0Ybg2zGY<EIoBhYA{U6-B-_#@axMDa#?t{EGx<)-)c6j
z_3IUxG&HkhE7N-4(OxdNa!!ND&h8fFswHY4|Fv7Q_fNWH@84T|=*ev0Hzf!0&j+a7
zuMFB3_|}+A+^MZ#FP*=zZ?OHsK$Ph;jq<W~WfhTwho-w)UaX_4CABGQtIe~44ko(-
z#b^A`_+v6o-RYKLp%sG_LmDc@cWMTXOq^!sc#0ghJ*Yr%*px1Y%-Oqe5K!jXyGPNT
zH_sAy&Z&ke9oPE#*uiAzquz*xYp@8}8+ifg*LX-|<pbVNj^e{CACpt?g_-SI238o3
z$M@A*PTn4SH<dMH?<^idaWcY&Zs^=W^6xCotf;1&cXf->vtC{N_5=1lj)~%u<u!wn
zrcaJ<UmbaFdnQxyUD7^Sgo%1f|5N_cClTlB8`OJCx@^ghXkOzYb&Pl}Ioo-At@6CM
z5RL<+jz5m_IiFE3^-6w!idKBDqO$sHmGh1R?<!e3UG45@G~7$|J$8_10b(~&7;3(k
z_YGsyEkbzul{J$W<-HAIkA<VIL`5OW_GH|j9=*=xUS;+vIYmBkC@Jey2g#`<=}4G4
z=y6Zxo>bDkbTOD<PWg7wzI{Hl&s&Sm!K(LUlnBLqlrYPy3J9p+J?x|Dwy>Atn!Gcq
zA@0hM?Mc#bHmSWMhIsarqS9|!oF80a6dER~W_wWj{e>w0$hQmMvhKJ)Y2yx!_S9q$
zkZaki8#j3aC5bF*vN?Jnzf!9DA<Y?j`az*_3IvtY4@3qfhtBk*<1_b|=sU*BUXgnT
zGrw2dWd4*qo{(%!@`P)e8}HRgw|FwX@7A1m@Pf-{4~Ej}knKmwSjG*y4^v1M9DLy`
z@g!Cuwrg+70LOs_Yurf)%au>(R>@5DGO{@I*dmrn6EmZyb&OiCtN+lSKYqbD5U=}1
zt%L5O@)11<{b_!m$nQp>m#>p7y)|;zU>f;eVlzMzvN&x1y0`|pzc78urM=Jg4r9#(
zo<)A=tWYmseVfbO6*^kir}P(mrsMp}_K&2#-1Fgetp<|#r>VSfpv#N!+dOrvJ;R*Z
zadyu=Ia#kCYKIi(HN<wvbSOL{fez>W#AC|}(H~hj`%d{ok5`yuMgMq~wNRi$)ghC_
zDv<=riA2_<@7_^wDmV|F&YaYgGQ2xFBV_HMb3?}Hf;qg%S%95GBob0db-{gr|C2^l
zL_Nvg*HZ{y`Gz)Wq|FcL5F1rcJ!SCW=wV#kBJ~dS%N+M7?fBXHC!#`MG-W*y&UAdp
zy5cN_Dnw~FWD7(DRXmWQdz7xurk9!%Nq2F7IHeC#i0lqYTxyDVl0=cRlA5TW<=IDZ
zkFxo*Uj*dg8-^_8IsRn6zQjcdIlyku$f~~gv`iff)yXIL_z*#>XN$r<r8%FhzpFp`
zK$1l%_&)s2@!1h8YXctIJhv<DkMw8)HOh{Uo=qI&(EV)ZdnEQ;ewY14)JfH|z9&fH
z?A4E7m7=o|Ivae3=}l&yKT}|JBw6RjF^z%Be%=|Ok*hJPbCM)sNzU)5sQd3^Gbx-l
zuj=g#i4+QgcD3CxQc8QrQt4RnzW>mqh)D8{=N$=J8Ge_#2^wzPy+8ZJH&{Z6D1zcJ
z%bB(hL(1n_?^z8qQ9qs1$;v-Be2*%O`Px&3z0IvQ2CZyj;&|h&2X&$HwRLK(!E|F2
z=8x~QpE(?oFf#h0{3o~F&%S5QVmwZ(NfrBO;ts986?KSreL;aYu4k(9;GBn_=-c-~
zQu>46+T=KoYZl}Zb@;hJzfR>1J0DwkG$x>&RGT#LfvTi1Mh|y?GNb8Lp73wExgN?k
zQBjpt?^cC*+6iSdUiS{CTe%0#A>7YqK+@>j_`Yd;JiYJE(ZM^?Ik8s3L{L(8x+q9Z
zAqn19q9*Rpp<r7BoHCj@*OVZ2o`vYL<;G8D&S?`rKX82Dc)&D1_mxSqU>@g%Yx9+2
zw#huiVjS7ic_j%xR#A`Fb2d6b`Qs*)D^F{(OpKM@%s#u`1!0_WN%+p@_%IRrV!!3b
z?h|)D#(e7{zEzP<rswWFJc<gZtsihXug25NP)Zl>!C%+9kFhE3)LjL33Z#tHqhX=J
zWmu88zUSn@w@cs7zik$7s>Rt)ep97{)KS0S&D|(|n)lL6?wLcmjT)R^E-bkhUDOl2
z(<>)#EUNST-3=Lzm~;}|;m@i2%kQ(9whl#hT~<JLso$1ZBq$~`)JUQ|KWKL0Szb=;
z6z@u=OPlFk`_w+m{*)4}tT=~}qxnnTsprp+$LSVom-roS$x-34E$n)&#eZ#Xu|DiV
z<{O%LP2YW_ltWS<uFb0+AdrtWoM7hR$<<DWlaDgH7v;_8RUdP1e_qWKy85)R$?PiL
zk**`jtIT!Ih+H*SZsV?iF0WE3Dsxp|l4(Y&{~UimG69j6Z}*BTg;>GuIlIt(6E?x#
zYNuGet3TBCspqEG#?Uo9Y_jntSb6IXV-x2sotCGT=Qr)5du0c|!G-+x#OQdrO4+i`
zXpX-~MsmTe;Tz(_sQzHDuY^g`Mx@+t?;m%WVfN-{?VfV3IAvDYZB&GMwlI7B!q^a}
z>IatY?B}cj_Sc{6)1A0=Pl=;EwfT~uYi45i1&IR9B0IfBIi~)MnY)ddFMd*<rQpmY
z!gpH`TGVFvcu}!-M(1%{u_ybT_`(>5!&b3N*$!ILT9o365BGZ`>5K@fpS;C?*~JWz
zuDjhG%hny@pJtMBD-U5*tdYS7i`6T$IbW+$ezM_do{D<ky~#c7DnjXR!}xpY-4Fwb
zi#^H?Ij(C9?4O=hFjz%D%vP0jYX3xaoVPC^%U7dLImNKgVtT^-1)l7)P=B`PuO_cE
zmt?I$<m<cXdy47tV&3?)Lr8N*{Bj>#oV4YKnYUyur%TR}bFfgT2wF|(HeCNy-@)|b
z;XKVmnx&M5y<e&-*Nwx$4=j9_D3fMN$wwbLR~6on^ygr5Hm0v^B+Bh*Iq`0KMzRmW
zoR&Y8;k@8O1Z$X*DGY~p>wcko(-Cw`z5UJ6`EpZt_^Z{0d>I{^I}Q?#UD<A{6_unR
zlAh+KYEoJXNrZCbsE@rqnVwNJhW8NsByxnEMFIRO#gCVeJYHNOPAYrH3JxGj?ibZv
zr$|XMlyb_rU)>Np{eI@BEXTKs3$q%=O8BkXYZ<|(I+geT$P#ow7KJPCLuK{7BNBS>
z<Z9=q6Y>-Lgr&?97rbJ8ulPAHDs{vtxKUT&w?52!nfGx>OOs@1AmSdq$B=7?Y}N}M
zrFOVH%V}{-2dQ*QxpVg}&#IPBrs^?OKX&YVqeK$ZZ}+`B#Khsq^l`2!mh(@bxsW@B
zM{RLD^9q(feqkXj<}Dq!rEPO24C<)2PRkr~q<za8Ba(S#!tB%HtrE&=j;}(Q^l_C<
zOA)>yBzFf`(i5Wg)WR(sq(@DunzaV13+u|JJcGWp;6I=5)4%nNaw5cXN#XiZ)texm
zx%ntlKmU-r7wNUS?uX%Rl9AVyHPQu3$}7cgROgqo1%5K59nNTzj1O%kHf(yvtWj`W
zRe&Vc^}(V(Qoo~0`taqv{%4{wvr@0a7KiLv@=T|T%8psQyXH5c{uH6!%y;wT5v#hH
zP_J8JUkYw#cD68cHuc}+x0k0~IpHFeCVubHd9i0?qqK(aS;>_3r1?Y1-9LSjA<4Ts
zra>MV&DVDRzG57$@+UE7+>1nJZvra<Y2B=Y3A(AsEv?y(|A@48NNKW>`z~C6G{f`R
zPYO}(Cu`A<ee#|svg^D`W+<0l<`g8mt5P|@GtuCm{@Jnr1>`4U_N$n(Fp2Xa4MrM=
z+`|fc$me(-M#64X?LGg5CMuarx5}PmGJy2necQalJd?y`Kl0Dx=H0aa_MXOg=K6!;
z7BT7bj9zkjYCHr@dea2;KJ?M;L3aK#QOA|S*#?$<6YM#Qd&q9Lez%e3ruGyi>ZAMW
z>B~$Vmr9_m=j~|u_!I1YWsd7HFLSHs_apaxG&VvVA?<5rhC&$L9g!4q@_Ez(rGLc&
zTLUWVxUq~M-pNvCww#ERnh6-oS;<;>60ttFG;?LY!$66zOHXAp>EhM8+&4n2rDEc(
zNR*QW$7t}u<r5y~4Fqb<5|<?8J!AJv7uBDs(1Fg&(fg5j6H;Y`C{Ww`DdBZRju)z}
z(cC6o5)xlw$tS1c4|}h@XBJ^Xb2Fe(=`>SO>A}hJ!^D=*_{UHDr4mJ5F3)F#a7BEL
zy6{qrkH+`d9?^#xwsNk^?=vEmy*@DcMY<@N)tPFYE~W76f(`d7hU+BuJUM29f2{Q&
zUD0dzW!%z>sgqCGWW85XP(9WBGTEMGgYyd+aa2%MikVx{?=KeJP0$)V9Q0CAIse&E
ziEj$xz}1TSG{S6or1t{?ceX&|lWv0d28HNJNbCn2C3VW1J`(uoQ)Lk@`f-rga(3v%
z21@Q3f*cKGOM%Qx>(F`aeei7MLr?JJrNc^(6xps{NPQD>$b9%{pVkG9lIa>gq~I4P
zv7q3Pr<zU_b>yLor{*P3ME4gk)LaV*z38CG^X7fYzSOu&KNQavp6hHlAmEjPs-N}t
z61f+o^Y{>r=@k7Zu?Ow(nQStxS7?J`9ICwA_bfGqQx5Bw$?=Vd=G-55ArE4D{D8i|
z?d1OX#gvgt@?WN2;-?}jO|%<59#!@E>=!c~)J|5#{i2Lh-a|4cF!AJ&pTwgMM4O?D
z*@XHJuk66*ZqzTV{QLtCFd-6sS*FcYOCBI1T!|AAs}TWLn%th_rsDS1yu7^_@x#(?
zyqPc(|C=r034#U21nqC?K4nt+22M=G8SW>Id02FbG<f_jUGAS)tln?_zJ=+<y~iC#
z0^EgodpnqtHEuEnHMN|%MN+yz2nl?pS<Drs_*fu1-1AX{{@rv~eAuY`sw<smbE%p}
zf2Lc?`|hh>?=x6;1zsl#EtV6TIT>zxh(KAeOa?#A#jt}W;CWI<#7|N&)PiG0#}h}P
z!;nEFZNggdr8#dVM|m4dDfT;Iysdec^1>%ds+EulKe#-)EPGXtx{8gmzMw8Ir<YJ|
z9ac6_ZmX7};jQV@2@aihwPbxd+j#uLSLZc0Mdt%d`D2SV^sTJXD2r~l3YoxY1%`{W
z>`OuJya#K-6=cbW(>+|Jqm!;E#yu#ZTcdL+jt@Wh9p6ko)Wba!r)%_(SllCeM!H;u
zC$ti^4_{Ti4HkLKx-i>`7_d?JVfBQ8O?f)b+>lO1@608cBb^By`KO1u7w<hx_~Llq
zVeiM0&y+rO-C=mSRr|TR`&B&Z9zn;?=^1&PXHyB%8E)raDL?NvA+%ShUP^5aeD(SH
zwRNFrp+c$9MQ-T>BR3emI_(e>AtYe)s=VWDes5n*A6%8OxqnVPxh%-qTNailC7PgC
zL)dykFt1=PX@E|ny{Y#M^Upm#w1uda>MxVQ&>yG!PQG*P3`?u`y_aTlM484rYI1pn
z1VN5mxOvr^C&(|FedP{+1h>}NeCEvBJ^iMyn8&L1E!b;UWWS!#kGU;<fyaVN_*-Q4
z+i84wA9>#$gN4V;bT}ror+GTh9r0lpx~a~HIP`j%x6SQaGvaQo>nlTpnxnb7pJ(1x
zjF;4XE`7#Q+Otae@i0keq+A2BG1Vg};>9mD^_9`eFS?J*Or5wtN<V)m;`<4Tv~b+a
z@%l>*gCn;V2($xI%&JypBlI$q`0hYOrR!tQ_`B_YFn=SJR}6Mi^dMo>$bJDUDZ@!2
z7xO~#CtgC1;Wyhtk@<sXp&y}HkGN)!83;UUk}#Vqc#cw}g%!_R(J0-VbUJTS<oqVZ
zuj*zvOh@(T&#N&WnzrN%!7SH1?l3F(<BeLi>I{zGUp=Y5_9Co($YLPHpnCOK=cD(x
za#ttG%VPX3-W{ZSedF7Y`)6d`?oKw#JZ+7(6V8h$n|6LcLV-H{xGUJ$mR|<aE%qYx
z)KqnzWbPy?-uzB~G|wBo?>eE7Gj<1APQ{$7U3p;3X$V!)nnKOA9dt1)8Jmd?s23nI
zsFg17YPTql)nDVtD{2?NGWc}R)r&f~59MItZwn0-ZhkpEnM|cVzsw?lE0*~-foSj%
zZK8@K?4g479cYI3W5mJYGfr^~Syy1B=7&jcX~W5(Pv<I`)dcsz_IJO!IqxqI6QOD<
z%By5)i7dH%`MCVCOY+(Gno39(*iwqy=EIga&xiCDv8bL)EGoN8>Q)ogLn?Uay>yyD
z1uFE+1Kgj(gGv<&RW$i?FO^hMee_xv#*%cR(kf0lkI^ZlwsE3drfJN0F7Dxnuf<Nb
zCvnIwvYjM(n4RNeTiDT+&Z;qH@=|l9y6JErvN`?iw?{!DZ|68q&Xl@5(Qr68e;dM1
zsrIIir0*k}k6EBhR-IApaXCqdwE=^~+gcZGdd8T!DeaIPat4=y5F(ray)QCGc;<30
zZ@^o}BNFOP?+l6l6UFT#)xm;3`-@duXNRBPn?2AFfIs@_x?5mO>5!~N^-;U{<tCw%
z_ZTXg&J-Sh?|{m_$akEPCoYEQly&v6-9c#K9e%wzDUr*GsahY*vJXTCyH>ur^y*rH
zAYb>q5YdTyvr7YRp|@{MH?`!vW%1<0E9p(ld(od;yL7)C_lbl<(bcXPqV|(W#t}E-
zYWU!k_H$gx7bzcmQ#%jjm?~ITA0W$rbSA#hSKI3F7m^<Q6}x5Ek((lu)i8%X-rmph
zb8W}J*VK{<+FQjJvfRau@*F+JO4D<IQRw?QLXyUh#o_XL&^mqv{VR00!a6T4ebP=E
z88PoaG&j7GA-P|Y=11*g;(}<`o4xF%{dqL^_b*DP+&Z>DMD$fjR)yFD`w-iM=Moz;
znWTshq?2ZKHy*|L-cZLK<({9yB2C5W$RGZ*AYWn(U!8_`UsNs!UcX6k<LG2st|e^G
ziJ`j=RwvzYmyCn*eKgY^aX4ODDWypJa^cRCCwH#zQKMf_6A0{nnpZo%mn24StWP@Z
zK(kSjs!oh-TRPJj@y`!0SysKP57{HN*!Rf6)*|riM2U9??yS4WFmDe%yWGrAqJxUX
zR2f-MZ}t^fX&rfcwYB(suFWy$0MWa&#j|N{PZ!N^Cf*8*erY)&D<kT6;#KC*xlp-B
zA2O}DPegrJS_rks<hLcb_LV){+v*77%HSc#i7L5x|EqyU)M-Z?pP}fxzQ<(CuK4B3
z>G9eS)WpOFlI3-w_Go`Iq~z;){NMuO#pe_)JHc-?llt}T6l?u0xgwWy*tMSYjm^-X
zO-**yVE%Z-befd)YBuCO<SsAa$z@h^#eL^JGmiVueTL}lxjR#|ke!$i_Au4?+UT4`
z)7W=r@d!&_i0wYl*Tw!>{cYiUjx(M7W@1cLrFW3TK8EB9joovL9=WG72N&2lsEahF
zS2@pI&QobSbl~C8(3_K7h4X}!2*T6lf@HK6Q}>Q03!e5o<>Snxq?GvP>GZ`h-XNn(
zmS2`g`b#@m%1R!$Lc&WPO;wE2dc_~Tw+yR@Y%te1iH}=7*P%Wro}+S6L5<@hWz$KA
z)gaHK7b03rxMg$jTH9Pv#>r=wg`Ipmy1yXx_i9G;$oerQKip%?27RTFXXlDYIB=M2
z<?!(HJC7D3ja%-JdaUTBn6)<Ai<Ct5^sX9|<Fc{K1b)1{tQg?q-#@{vJ;Abu`<zy$
z_vc7*LBr_R%DaKSa)*feeq=n?`9|zUH@Ye|H*9+4W&G3)v4(NnG*R_qCXX`V{TEnQ
zhIu$?)y@UED*u?NJMhy+mHWw=J#x$6q8}?C6fH*ao>=pj_hobHLG)Z~=bT?jIepDF
zRJC#9O7PbYM*ciM3Tc~c$sSw}3XM$bBuPE=rJ74T%K0KHNy)3+I=7TWUq5?5ulRkk
z(M!39Uo4@c?V6sgd;^2o2IJLnW6?_kO4-e&@y={iZvx1kmgD{0U;5|(^(Z~eHo>p-
zCZZ^nQ@J=QF?jDNWzoT&eB-dwCj{QRC%{#oL7Wu0Vw<ji4RfNYe7DrVvELS!b=Y5%
zF_%Z$VIPwUb;g00aI)5qv3;fU&yP)AJ$QR!GCHqCdot;ylh=#j@A?;$9i(g+@&wr}
z7(~|g$``YSV4D)NZMG*m)}^Eo%Fx#Anw>Rz;vd*QYhVdx{fQ|lh|4RnODL(U%Yg}w
z=qZ6FC<t;R%db$lVEy4~I51>}aRF0hUT!`x`A`gHXk=vv;Q@<op%MnyWR1*BOn>d7
ze+Qj*8ir1kvopAAW+-N5a@7bN0Y9K_XJmN+%*la3HGh>t0(*U^gsFiodP?Ofr~&jc
z)DUWDebw3udIf3(HG^6}uR^V$*3fHETc{n>0qVrMQ9Uq?(FlTEkD`U1mbp3o^0!a7
zHXK-Zq5px1wwwqEFAt292ZH2*b0UBJ<>7{Nf+?54MV$x9&&dzrfpdc#yj)0-o0|`T
z{>#n734{E~$HxPXz<IzFO$d^kpOYKH!;1vl+;BcnmJgf(99hr93x|W_Jm4=c<kvQW
z3!M{ODGv`X`ZvK@(7(zH;Ro|gH)_BO;pXQ9b%O)15F{TzsL7VCUsnRAh=Ow5TyRhm
zBmzvW`<HDvKR-CKQFbE_9Q-b*GdC}&8<LkBT+7B0UQlyzbsOhd|E-PfjlUrL5~w2r
z<VQdd+z3#UUpbMyXq0%k(PvxVUjM7R3H|mMS?f_&q3YK!+c{kWbVE;R1v6U34NyiK
zDckt>uS`X*hd5IM_uI@CWrrpSxG`V`m%*<c7??o{)iAqaX9`HW9^z{&6R?J%0<&|G
z>-9wE{GX=C{^rtOj6($ntN{kCfOb59o;#S%yS1|aQ(du5VcVSK219Ufoz8|(;lg>7
zXvbxgK+#uiL}YwJRu@GgA#8{;+?>$Mm{9OAE;&6tOkDP`if5sBPF!@dx45jo_;YE`
zl+3U~X?J5iIrFfTOqg$m*_DT?5qGq%B&S<kzWP{o&jGG4r}oOlm@#>_+@<iA(!bju
zlf$5=Lr4sHQGhDEeGL~!i-1uGOOu5)hSk=K|Mz$5FHKH`A(5P1AVB2*iaQ1S`|gyn
zoVn^{xzZGHyTF~w35zDY6}x$#!Xm=6(n$`;SKofm;@QV8b`^D{u!RE3iBAw3&{3!(
zQGla4z|4*9E@9@i5<?BHUNbd-f#uRdLZJB;gK&Yy1!`!p{y(^o&2;qq^vw*L35cx)
z5x)u|AY0S76x@=)y|p0suY%l|f*^fM!7T~AApF8+O@M{oMqR<i=5K-=TZ(Q?0hhU1
zc;hNzm}2O}&2sC<w{%8Z!hI31w|;=g26kZfy%q8Ajg4vk=v^}_yY(G7$ZB@g%*y2V
z-bP2fG5sIZ1PRe4hH8SI^cv`sxrm_`*Ef;G|FR}vy@J1&4sd}M`!p9P9}f=~I>aYt
z1ARxiz}bGy{zo^b*j5Jsj8JyAMh2F|j%}a~QQ<=3T=q^p=Z)mVp*@dJjEyUpdAa}p
zUijuUuOqPa;{QLoaLn7+`Yncmh1<w=8mRr+2d^LNk<HYIw-V*|TZ@p;=G}ntqdU8Q
zcRzw+YBIkgN#D)Sv%WNjz4eO~)xLG+vXA2)rZYo&3@uYjOqSv4u@7k7>Qn8Oj#LiI
zjSmlBZ&yBI>EXUoQh3mM&AmWFu`n2sy5wQwzC1V|@|9Q4X4%~9!0g=OH&kh0+lQJE
z(fsbx#RVtH#PRu%_xBH6dZoFS75P552qX>%aC;xhr=Q1pP!nBPGfx6(KPT9l@{&1M
z@Pj}Bi{H|l_94$7!c9ER`h_g*(&ShB&*rbLl|_bGJAM386lZ-zIJ*Pd%GoW<F5C_B
zsMov*nrO}6JD>5~y1=cwBOxbq_)PoI@gE&^0xxBl?a%e#$2ezN+Jy{+S-8LFc5+Ar
z+qdIJZHhV{)}t(wg6luUJJ<~S)1Dr>pU}c9dnAKyayi91iMt__aGp88j!G_*y;)kJ
z&*ScsS(p1y*3w44+eRc$WGu*w<zIT0Z|$*=IKKSqKx8@bEG@f{H$ecUT)+Qo$2WMV
zxrSSGhskt{c?2>&1A8DQuQh2WDQ2BM<<;c`+**V?b99T{J}~3xRjC&`H$2hWF|T%{
zGgt8^@$))-!F)2GRM*AM`^WE7hs2eKDEU;5SM;-9qT4Tj{76J#3S)?pOvO)*>0d`0
zOOMQ-&YLV=X47j;3gn%57|3g%)hp<7sq#qC!I{_+XkU<$aJP?qSBub0mGa{8Jl9tT
zCoP-|C2Hm$M)6*7E*&+mwi=W2G3O4sq<&O2BXZhH<}-Pm0<+bZTgw7sV%Gdcna9$G
z){6XxW6nOHy^c8B&wZO9@+8Myf=HI9PtBckqEgQ*vGCfs=MlynRu{^aUG-lyT8qif
z9=k+ck>){nRs|o?@pKPVBP{gt?N74sa+*ic1H}@Fbitvfw`XOM@mZQ@63qg`Ob=6^
ziWxX@Knb<aKc$dVszJvyk(HAV93%2i;U@?CXA)tTVA6?ZilL^515}Vz%peU8@)P){
z@Y#cXO0az#l*N&1ILcU;SakWOWP_$;q8W!+9rE5o(-?LY<U2pFM6=)o)0kP1p8%9U
z1kNp%$o!*)8dN|*78zs&4iX<wvZ(`wWI!e-u#X2$3@Y^COdaxIm}!68(q(aQR>nHy
zg#=T71u}4on~7%C5+D_v*$@=i18PA8YK)i2>|p7}ieE>1;fCqqPh@_^eBz08)r@td
z2f@V~GR1t`<7ZspD~ZqnhdCMRR`P!I-;`9>v;h^_dNttGETAq@;M7#0E_C4ZM&!zQ
z=JF95?<WUEQag*M;~)dQ$+sUUw|H37@LzH~IyjPR5SXPhxFSUMkzKl#-shD?bw>@)
zRg}YA)<i|AU%TiN6N|SF*}Z8sN#Y^Nen%C;A4N(BBy*c^y)1uZ>19D|@IZn4f=KGs
zy<u^Yku_Np(pjL8g-JPci#u}QB4<?ty9QhHq>jeG;N7OmXS2e%<UXTrmdgsHt*MbO
zJ4e2|$1n2XkXWdOXb=ktB`7pDjEmwWFA;rjP30qNwyn#&@RaJ(mjkrs-2M(OYj#>)
zxed!d8*@@RPBMu!M+~2RdfM1QQpxyW9KG9*GwG|Pi(~3X_kQB`er|t0#g=NO{(_CV
z@GxbN_tz8trmy!fJ|0xGrISW}J*VE{oh*ZFa!h%)CzY*6Eb{BQfs2s`UF{>Bg9m0l
z$EnrS-Mnf0Twju3i|h5#9|ZAk$ze&jB%f8ia&wWk$gfWob*wsCwR?&?4&)ZSH7aB^
z_K=ITeqw4Lbto!~vfuReEl^mRUn{!|UD)!Z-b-ubvSRV~Zd0C^D`sVNW5^FPCG=H#
z;ZJ`ywOZauPL;{+M*2t0oHylpARPwRzfIXBAzBsL;G6ck6?K@hiJG-4rNP(q3^^n1
zxXa!{bvH;A{7Z#$XOZ&$xQraEgo@dZ4sn$qts;PO$?UHtY<ypLV-E_M;KOge&xG8(
z&-Bb~{=+Kz!GwWaG9|3WEZ|5PDGI6Y$!~tmKFM)dui>i9bp5N8){vfLFQr@tW(AV_
z#xnl*j89iZ9)ON(#R#+N9nMD<sXep1YBrbbIIL7ZrZ`<6`=~XhCs|Z3mzP<=>y9zX
z|K5e8RTBrGReUkyta@!W$Yu3s-+N7Y(ylC)-Zb`+k37y_b^I=e+r=1Nwu{c55eD&f
z?xaR7CLBb|fjlIq%0&#tT1>_ovuiXd=IBV?Z0ugvkB8}dx>ejnAMQdJ!w9+%w72F1
z3BCy%z9vE;1LU}5@QtI^?~Z2|za47$oudo|lJcH%Os>s<hWoqK7Z;1%ve(W>Ptz`$
zR9zxIs65K&%vIyKVD%=Km8Co<{Mz-N;3VzYz+{fkh&6joPX6bwa)e2rQJBng1m^y@
zH}y``LPnYJaOy<KxW{=z@%fsOR@dvjb4f1vryVcynRb0BF8(@e_AoE(N9~fy_|?J!
z)7O<IM=Z%>SLoUfoBhnqq8z(cvn0u@rv<qfWwts_rHi_jdcRq^`Rr(XUalRi5e#w4
zfb5-gZO-QF*}3XL_Ixcls1*%+^3-F>rI7)j*KP8JKXWn$s-0))7i~u)vAiH{373g^
zK{7EShDrbl3<gkupj{8i3EmpEF7+ga{2_%lmY<>zOF7!fsM~=houDN8y-p2%Py!63
z*s5PMFf>|!`&E%pM=x95cm;((C5`OO42{%e#C6fHi|AATN<prNPF+8Prt)QTBSX9O
z;?kBt)uNYNq9wT{9J0P-etjDOR$cu*4)hfIC20LPH)MS|?D{qs(by~tR>Pqc+xl@n
zFskx<n;(pN{N4su6B|k${bRsOVO@={mjHEGk6OI314bz}%YzziRDHb+7^gt5ncc_+
z27fkx9xSO|kLtLQ!UqO~HnZ`A5tQFMpziBY+t#yzmt!yh_WRn=!#SHfU@T?x1mL>X
zqXuo10b@d&H2_z+9(83S1wH<=c>?s1(B=*pW7(_$0AStqf4vMC&;SELziI%`Sa(g|
zNI{QpZKfd618l#uaYNQ!kvFpOK-OJ*H+FczsMTgRKFGT3=|&0|&)PfzKyckPay<nE
zE?svu+}MFb)?N2Db`X$tSGbKGBxK#SY-5KTEmMB~5I`EX?)$Wn#tZf~D*=Fqtvlyz
zr168j&1!)A09*GSSx-X`M`DNwZVL=UMG(#mh9M)kORz0;1a-j>65KQxhLRvy6bwU3
za1UV^T7uX{FbpxlErns2i5}^}P!rr?7>1i5atsVZPH@9v7<Pg{Col{>!TpC}_z9LK
zg0YF;m;pM#FboA@Bw!ed0(QVK90gGiU>K4DvcNDb1t$G43{3%dU>KePwF_2F{{B5c
zB^ZXOzyckHp(<b&48v96*bT#w6%Y-EVJk40hGFOmcn8Dq6^wDhFoXp(gkcy9?0aDt
z$^urxFq{Q$voH*40ZCyP)&g@>7>2fhvoH*AftM!?LtL<h9-aR6gaGSF7>2swI1IyG
z;FJi*a2Mo)W4H^9{ookxf--OncYzNb9K&5S{oxpb!hmra9K%rb;{eA{6fGFx7>=SJ
z4>*RTU=SYcZq^<BxWF+qg@J{ya12k;j}I8|{e7V@VEzNgFctkcq1R^rf&m6vSU85O
z=*J6=Au9~ntH3dAML%wE3|(Qs4+M_kEBf(+V+acatt}kGSoGrv$50jqM!?}1&Y~Yr
zIEJ+75neckwdltcj-f3KxFx_byhT60a13!_V5|d<VJ`Y{2E+H8*9(Sb;TZ0sA8$B@
zyf84DjQ)1;8ytWl97A6iXo=w%{-Pg$IEKJ5Fq{d;Fc{q&z%djCL$Y8O13Q2mjv+A&
z3=P0BEJimMa14!M;FSQ5;W4`TfMbXZ1FbV0!(?=G0>@Ap2I3&2huwZ(5x89l44Gjd
za4!PGW^{9dz|a{6zEmK#@EP1p1g0^DfzYT3Olyp8ju2be4%%wO7Pf<1jo89=&{`w5
zupQia#1^)LXo-j|YzIU@Y+*ZSu@PI?4tRms!gkPRgD)AI*9&NZ*ur+uY9qF=9k2$m
zh3%l-Mr>g_Akjw1?e)$F23l^!7PbRUA-1p`wB3j;YzGuWY+*ZSy*GlBZk!4*ZX<l=
zuU$~9Eo=t_+z3JVD;52wgSmr%j~ij!ex-t1ZDBj0=SC>3U#Xy0Ti6cRx)Dz3S1PF0
z7PbTOZiHmnNChqVM$nL7yP#HE*bdtAjlc`PQbDb@upPAK8|L-DQbDb@upPAL8wTLN
zQbDb@upPAM8z#ydsUT&;=J(ewsMQv>gU14~h3%kK-!Sw1l^Kk>ZDBiT*Eb9if2D$V
z!!2wFE&GP4+^<ydez=A0pl#nU=2}lhcMluZQNMO!V0W{TL3bA$_B+2)L9H-sM|U4c
z4BO$LZAW6*j_yv77`DSf+m6KW8Qr}gfAM+!-5ouStz>k?%s|{4n8g6vgLhYOnS4k%
z7~hq!wZ67~9LB}T3;2ruu!#V-hMOOb{^ZFAKJ{_&f_!4OcK<ioR19p<-+0#_t}UaO
zXm1)|fdiz%Z%HF}4JevlgUbj6=q3K`aOSwKl6CKU%93Rb3Ou~A<4p6sAKtZ`xv{4e
z$K@k;wP&Q69~ZMZtF!Tg*8?w7i{z#y5<8fc1X=O$Ngo;KYPz21n7Pg?QxpG8_ps~P
zPi_~kP8u^9L{!~0iavQ>wX141)8RHrer@bLZ~U`yl<K5H{DiT`qBo_q?+F;Qq8dL5
z&pY+G2eD%$`(is|${mJFoj*0`-@jU4)N1&wCbD9D)pAy|Psv&>^4-1mM>?y|yWM`a
zj&(6N3AAV!O(!~7v^#ai5!76aBX_YM&Y!5=?{;dRY<6LE!wn9;K{2bCSqT@n+2Q7b
zW|M*p%0LOHQk+lsu3vDY2-N+d%FhrWJFgr4AV6QN0d*`<Nv*VlGuJjcRdk#?E`cq4
ze6{ctuUv9~iwAG+F{hHIH^LvHq@SyJid|ngdp*R|S+W6Nd^TeIQn4X!>|&wF55!Q%
zlV^^dv6s0{2b|#{>Vw_1sm)Jyt>TV+ZW^m|ryf6H{)8(>sk?@GrAqaFq2-)Hv-?i_
zyYdHgQI<TC`l39%mWV9;+zh8Ldm<-S=f6B!>6$6wAlBqRc4P5WT46fVMhRV$eD2AZ
zQ49ZZ4-_4P)Ju+lpT`~fYnU=TE)u->Ks@@^+jBfe#Ytaqq{I7laci$)_iM}U(c(#a
zktvsv=DW9Vv*e$gI*K1c+j@cg5(RwNs7xeM!9vj~Un#skXoyJCWhv+#LWq~^i1NG9
zq;J#YPY>pnkC}A3+dBnw;yeG;|3Un`%^LrD?8*4t=MM<t!&lB*k0xd_&Q$mCr?-{Z
zy$+r$9au7G;!WQ_f*8;BpuhdyNiDO!SvL5{_zy3LgVW<@<J#mZ!k*c5qAmjR(m}UV
z!;^Tta&0W0am*xMwZAv4?m&ys<j>K(n_lW`>5MOMc{XkShvM@(zYp3mgYWKj6;`Yq
zh~Zr_IC6se+o7}*^k4NP2gE!s=ewMCa3XINhs`FWe!g~%V@NjpgLaM3ff?k5*lx@2
z<ZRBnPeql-UVIpQA!dbar?4mHZZ9Q0L7e<Blj{OY$}*Dw<tdWGbT8iMe`Pc{pVdE^
zC6zy!Dqdru)FqC(BB@+6!_PkReB>Af>QnN-sZOCBk9x10;R4wS(n|NN<z77|8>)$>
z*FBWHK$mNfGh+Al>Laee@sk&m+u4Sl@+i7+VQO(|Z%sPpC!aR|@H2|bXl~-NBsqVH
zT3cgUHoe90_G0dru;32+=DAZ&lq5B+`=7|SC0b=J%bK-&RajlcuhbG{UcT;O@b>Eb
z%#8AM)=3tY+fwI$>^Cc`i5VIH+M>5d5#J!wu=>~+(R}$=7fj+mlm8g-?X;FC>aK44
zxk&G^Y*ABf72J|>X?bF-*|#YR|H-8q&HaA854(=$xN!QLYo8Mi@i}gYw10cU>iF0N
zi>6O&YTZt^pVK#-Pw^oppIg@MT(0DheWCnP-6LPu@a*?8Q$ziuPG&FnTD6s;+^hMW
zxPq()-7-For5`LMvTrI4x)c~KS;4e0Eb2^JxLC*?RH2w|ezmDA-$|364%K`}ohN(X
zY9lv)85gRl6a_b~CL8!x78P_@z3vdb({ZJ}f_)N<hYOBZ`3haRwLilsmg=Wtbwiw!
z;>}dMCarf@8s+zAG#1=-zcHV2*q<-IN8*xe$3eq@hrA62rvr4KeSZ96j_qf<@(d%b
ztNv>lj=r+XV;m31v~T6f-5qAFe|$`Q(Aa{u<;;XJDwa{0M%E#v!VB7}p4Y6e9KxRK
zw#MZ?m)2g6_cGSx>1U0&;^e)Fax<L*Ik%mkt}$m9&G}3~E?#II^t>DUSo2A<Y4|y#
zxjeeQk;>%<_ie-w4#S!YtI~dFZW}vE@ru(7(-D1n*PQpcx}nL(q4Nu>VEMi#ff~6C
zPYR`N?``j=U*LkTGszJ-_I7q2)M13qb~s~GkLbC+IL%zMwN~|gAI|9Z`X`_NT^5;$
zC3z~N(r1!1)BUX9BU4-2Put{kRH0sX70)r~Po8_huGG}!YIb6NKw+Op>4}g<f8haz
zd&M#);tNlSLs&-(oj-^7E95R2M?Rmh{%m>a%PTWo8~nh6#)8B5UiQCZZYY1*mTcx^
zldhh=$4t#kOGN0lB=We@C!fks(ua+EY(FNEh%?fCru#;c!ZIBE0`@wyR(C0olAl1M
zWPFTt9#++kt5xhv6HhSO7#Q|FjO4?xBDtP}F-s19<<-2rwLu2%CnLGbK0jy6wa8!Q
zpU+v8Uaegd={!6%cp?F|M3Kh0hqKP7>?nh*zsQ~mA~EkF2?|}JB=Y@;{R?GgAIeI)
z&E$I9$`1ufl~z1{uyD8@HDU4eV{(>|O+UNx&4t4aBqQv~Z@Tx?wZA<p*)`K=8SJ=P
zTI9%Qrn4B6bLdj3F-OI{EV_Vc8mG@1W3WqI#vIMj6e3UF7=9G0(d^)a#^vzo>WLWI
zyexLCz?*h#4J7EPD-UFTBWe^D9A<nqg#$J~6k$;>#XLtLu0izRGC6~-H$;4l;cX_x
z0hvR1%)$Fyf}*7vPJBL)efB0968~vn0)7>)1;fx8lCqJ#0<|-4wUi%^sJ?EYKR84o
zyHKDab0)abTY#`$fUPkQC;I3?b7I4p&=YSy-%t!DM(&04v(@6g?@K$Q00~dR%RnEr
zBf4UHE`jk>c#`-riRh!$=4TVYQEqRLwUM-<PYsmy1P7kupvzLB%d(@(9>a@1N;5=+
z-mg?SS$Ru~0(}Ohhv>3i=(0yE`h=u!T?BQYG2Ms$37XIks-9%%bJ^`d?;AqG4e?Iz
zi};}WFh%90jCYI}$ZAezWP2`+An*fSqQkNp`Zr7o(3vIHe-jt|n}^=_MV(>CLmy>D
z7dBom4606Jj*BkL9Qa`$C@jC8nF3uwnKLJ42>wr7*B(_>m4~MWXX5H=5@oWQHQ@vF
zQo((lbM{f2%m+n)0b+*JHHixeHX`87rde8APKjs=I$&CnPcmiF)RpM6Vp^hPNon|4
z<B&@uEt_0rvn<Qz+h^enXMfv2O24}wzy0|3-oM8_d-$&cc2fJmYh*L}OEf42{y+-6
z<7p{yT-%w(?AvarZGC>qSI-Ts-k(#jWa`Kzp2|LJ%QNc-Ut3t7SMRNUFQ=f`cl|vJ
zR{qip(XNTi%xg$o`cBTg2S!~<>oq*)quI}&oin%pyJv5S?;4QSS?_C&N$YI%O&Bm|
zZOg%_7Z-bi&wXSiAH4S7PB{c~<PZRyb=|tMyoM1e+n@UR^XjI~za-w89Dnep8+(uY
z>DTdhJX_V=Fstj*Zhx<|+08>gn|m?i+j*<YDkopI#{RqbyIalXEuWnFx~%%4*Yk3=
z^h@gO+iPy;qP~S&r)=JStUc@Ujde|JBTr{__4>!EKS#5=2EQ5{S9&u3@LS2>RxjPR
zXhrEw&EGd2Jmk&KzWK$UPsGi+w`;-p9hW{l*g9#*lue`3*UrWT(B{6!()X7R-O>2+
z#?8S;K7M4|_zN8)b`Sb$kY{<IW!#|BAq$%B-CZ8H>_qw#UH;%7$`j5S`43fXT3vpA
zpopIsyYH3uhu1Zpn9^&{z?l~&zH@O*OJ&ujC9gki#?+k{@Of?BEvGBKsVE$|de1)x
zypWV?b^NRMTQLRKz47DFMA7lUkK6mVu9-Q%rZ5~XT;ok{h%WZ*c(%hxKH`~tv@v@`
z$EBFNy}vp4#Hqw#`})>pJf1nT`kCA7&ety;&^j^Y_|~0G<qOyQr-o0D{$O5oaqk_!
z=rB@_#7#arA@0_GBL2J4)57<s&&~Yw$dStOwJqYi=z$kvm#^8I^1Xjv?mO+?yV6=>
zJxinSud2;W7(C#qmal6PzN|p~^Sf?7d?-FVC=zbUd38y{u6-AuiL2|AUpGCoF&v1T
ztUA6tvcEb0P49uF(c6k^YX&aLzHe_Vs>)dSSXFK$j5-%>e*H`D!e!Cu;)NfL>$7Eg
zUWGqKwr9e$FlHngS#oiRy&a|hw!JxR!Q|0j-W*@rD;&Eo^khYPP3-cj^C?+@dAT(w
z6YopPF5R{}??7|2{OtgK`{BKmeEimMEHNV~JGuYv2!4CH=Ju7NUh6m)m(*|AXT!s7
zk!#ma*_iR76r!XycKL~DN;$IZ`EdRCr0h*ccbz#9({|VOr)!4x8@Av=_#31?`}d5O
zGe7Mwp50rvx@J`VWzS=|o}!pjFI*XO_WiWHM*{7$7f*=uPOrPPVO?%V(}k?`4V%NC
zuByq*|Ks7t8Dq~*Ps?j~wd|&xd4Js-SzVKzKVs6@=-|;CtLjI!wyzwN+}^$r|LODK
z;R(67^_w-nqG<m8*-e@IW7}55R<C(GWfx@oqCIg^ab)M*{VRUD>rC74R{lJ2(tn`6
zeEsN^=N>xVx_^be?aG*@^JC7mbq#+hFep%e&l}5gHypYlHha9cw)duKb)QUoH~rX#
z6?cdcuN)ugofMt?N^REP-oEqvw?7R(zP|JEaXYts92nNNar=*n?dPuhedoctF_#y3
zx89I*(Z9@dXIpVg<+_#E2Vbi)kF@T2aBuz8KdtRO>ESOXmhOG?r7J(At~zv}fqg?#
zt#YeT*#(alRZ7=T6k3<xlbTTysf3Of>NZh?d+1;x7=f;4Plft5awGx=OCFbQA^AhD
z6Fiw!7AYw%DT)jmI~*FS&u98<SRpN+-;0ox9#JWdh0iU@m;!|KmOUzCSs+ul3YeN*
zQdC%!KBKq{GC9?_B>MlSC#5D@R3L+phe5UkD<zQP4|q{nMOh?Th>w!GVJC4(*^IGL
ze7CTM-#|VzMDv?$NDl#?s3<D-KuZsWJc!lm{*&PRKcP6m!C)yhr2#<xNtn@%!4xSU
z0_cf3|3GsbtUIss57_evyPCIWx~p~c#1Q)9<n6B4=M#<vP{9mX+0=9-GxAz;(3HGF
zZJSRHIF)Dn<wL&iymm<1ITg&16Hmbe?29U9%WwO;(qWqU)x2`*UG2ZCeE>VRe0o*M
zYs$%`V79cxxiQ$=;Wtt+M1eB~zlWO6l%_2uFZU_t1}vsZIy6n~Q<$WsWz=0>fcw-R
zuviuZfKz!v$Ajc94+d1K12#u$FHDOhGpupce=xdHAHYsU<AIQCsso|%G`=BI8uZ;V
zhiq7W6$~O%9fo5&cIP!5d!vGx^7U8+vuIAj%SHS^tTyoj9xviPF!_YLyAIQmR#gQP
z(oW~bEa@{K3<f$C3rT~%lGl(f4+Rsjeyf-bZ;65l$Cu>Ji}i)-5V-L`dF9OnwLDvz
zy4~g3KIXI?WG>i2T3_IKlXhpNJWE>UT^N>P8PuR+^1UYobBw?WhFc$$4);L_6EZs1
zofm#EwXXv3V5#E)_nv$SQ^{+<_n~4!Is=q+wse=cu|PmRXrN-WCI)c1gZc_iFRCM8
z(^>-8pnO%xT?afU)K_qnP#u7%ui!$U@qpEx`U<a*P#uVNp}q=*;QvwN!!;J_tB{Zm
zC3iav$Lp_RG{@m;)L;OqJfXpC9wSpTZny<P<(Y;CGikjF;EolIk;VI4z;gU^?)F;D
zH+U_HgV=hc>&u;2_%xXGO{?jgm#LJz@>BL|I$kGn%8?g8+<DQZ8caHNlyo5tX25T!
zrV|>B_czRXny0|zvy`rSgFZupnY2D*^<xYN9V!pLV+}^@3wA*bX3-vsqksluF2Jiw
zoB>8$2x97zeAqtJ@qlwjgE1F^L4!C0jJN=2tOjE)z%5Js2;zbl$rmzp7;zyOvKcd&
z3%D0Y{D32u_<^SBFyaUNx*CkR01q$m0~m23h~p6T0j?*~SXzD!rs0BxdyXm>EDaYd
z!Thi^T)<XK<>A;w@(ELiF&Bi*n62T0jgeE^VQaWxTg(rexd0zC@dFPd5*OfY)?my9
z_??L#z=#WYTS|j57w}RK@dN&8;z9_)QtEgBBQ8iaK<&R!fVlw2HSq&?b%_h0stzMA
zVAg9e<^rzP5I=ws7hpQkV9W&=4Tv8&mJk;(x`Y`4CpgEoe;5dLQqNMr=p1XHo7J-v
zY9*Zn>JjyyRGBC*B2?7#sUa6Jg$q!vWC)F0=Ae*q7H$laLp_HXAvA>2@t$npIIhk$
zST{&#0wy2Nb=PYINv|~wKA$4MK?ZXvc`ec*4AaN`41JK=h9fn}4E2cm&$Q@_ZkPzd
zP-U^4h)%bB7BmY=2SXQQGP+F3D@dm^ESqdbh6rddCtS;2o-j!#G4Nsrjjym7v$5Ez
z^4U(rl)Dbwd5%!QEYced8~4?f@=TvkgUR?fS=|3@c(80bVe|@S(D-6S)M3OU6S0j{
z2f`3ZKEI*Ei07tXXfT^>GWeQ+1`CqDWg`BO<daH-+AsK60AccQtER)@j4>xl$=#>W
z{kS|F2GsIEBgUL4Wp{a4WU0K6Az$ZG(g__#TrdqMZrGjIU@n*j+gVM6xnLTsvjZb8
zn9%)5W|O&qFG3J!@T~~q0w5j6T!8LJ{ReXx$pYPvF%1_4uZgD6a6zz*%oG|fK=&j0
zgoX?FKnBTYYq($=%nw_`1>0tR*vthBx}Pc^PA|j-%ZE`{=A`9gE?Ch0s62QbC@;*r
zj4>B1KiffYVkIufErrTMv^mKF-H$Qmf(6}=%VREBK_}qd#W!HY1?<QgjJbeWO8l^J
z_$FC!pwwW*1q;U^YA?2P$_w3(G3EkBgZ2+#v{zU#*Kv8w1z0DEAHawU7IZ%<4+|Ex
z4Z0s=%moV{tfBH?)}y@8{TS150lFWRCp27u?nmVb4HtyT{16&0K=-5aYz-Hn`;mOM
zh6~XBxIE^9fbOT3hvw2*3MY9TMqCik{kS~lf`FM?l@FREaRE!V4kIpLC1EVUToBOx
zNIqN?Az7gNX)xjfv=0r&TmWN;AHawU0=gf`r}23SD8(cnlwHaT-A{uN7hoaNV9W&}
zl|N<wz;;Po5QeG4hzkNAk)rZU<^n81#1E_x#04Drbr^9$SUwG=;er)nepnhVSc3Us
zX}BQ#%n#@|G?qesean4z#0!AT1!3zj=7P;WYgoRR3%04jm<u*UQ{@9jT(EsX4dz_<
zb?vqAypwJZjJP1f08||t%Ou%6u*1unC+<3M)ljty*to2xv_pF1RT~Aa!1+}BA28>`
zC_U={W~jO@FgRk}vfy-u&SFgUEy`1vYI^`ib^&0vI-g-^!D6S@feT2AT>ve^;YUe_
zcmdUi2G62%<I~k%TyX0dFEFd30AIPQz~>e`sS`?`#MN#Dv83jfmCeH6U{k;)@6_(F
d8%TzKIbZ52sF>x%yr70)$nQx^9Cv@3=f8wj89o32

literal 0
HcmV?d00001

diff --git a/docs/source/views.inc b/docs/source/views.inc
new file mode 100644
index 00000000..4a14df29
--- /dev/null
+++ b/docs/source/views.inc
@@ -0,0 +1,863 @@
+.. index::
+   single: Views
+.. _views:
+
+Views
+-----
+
+.. cpp:namespace:: zfp
+
+|zfp| |viewsrelease| adds array views.
+Much like how :ref:`references <references>` allow indirect access to
+single array elements, *views* provide indirect access to whole arrays,
+or more generally to rectangular subsets of arrays.  A view of an array
+does not allocate any storage for the array elements.  Rather, the
+view accesses shared storage managed by the underlying array.  This
+allows for multiple entries into an array without the need for expensive
+deep copies.  In a sense, views can be thought of as *shallow copies*
+of arrays.
+
+When a view exposes a whole array :code:`array<type>`, it provides
+similar functionality to a C++ reference :code:`array<type>&` or
+pointer :code:`array<type>*` to the array.  However, views are more
+general in that they also allow restricting access to a user-specified
+subset of the array, and unlike pointers also provide for the same
+syntax when accessing the array, e.g., :code:`array_view(i, j)` instead
+of :code:`(*array_ptr)(i, j)`.
+
+|zfp|'s *nested views* further provide for multidimensional
+array access analogous to the C/C++ nested array syntax :code:`array[i][j]`.
+Finally, |zfp|'s *private views* can be used to ensure thread-safe access
+to its compressed arrays.
+
+Access to array elements through a view is via inspectors and mutators
+that return a :code:`const_reference` or :code:`reference`, respectively
+(see :ref:`references`).  As of |zfp| |crpirelease|, it is also possible
+to obtain pointers to array elements through views and to iterate over them.
+View pointers and iterators allow referencing only the elements visible
+through the view, e.g., a rectangular subset of an array
+(see :numref:`view-indexing`).  Those elements are indexed as if the
+view were a contiguous array, and pointer arithmetic assumes that the
+possibly smaller view and not the underlying array is flattened.
+:ref:`Private views <private_immutable_view>` maintain their own cache
+and therefore implement their own proxy references, pointers, and
+iterators.
+
+.. _view-indexing:
+.. figure:: view-indexing.pdf
+  :figwidth: 90 %
+  :align: center
+  :alt: "2D view indexing"
+
+  An 11 |times| 9 element view of a 2D array of dimensions 16 |times| 12.
+  The numbered elements indicate the order in which the view is
+  traversed using pointers and iterators.  We have
+  :code:`view(10, 7) == (&view(0, 0))[87] == view.begin()[97] == view.end()[-2]`.
+
+With the |zfp| |carrrelease| release of
+:ref:`read-only arrays <carray_classes>`, such arrays also support the two
+kinds of immutable views (:code:`const_view` and :code:`private_const_view`).
+The documentation below applies to views into read-only arrays as well.
+
+.. note::
+  Like iterators and proxy references and pointers, a view is valid only
+  during the lifetime of the array that it references.  **No reference
+  counting** is done to keep the array alive.  It is up to the user to
+  ensure that the referenced array object is valid when accessed through
+  a view.
+
+There are several types of views distinguished by these attributes:
+
+* Read-only vs. read-write access.
+* Shared vs. private access.
+* Flat vs. nested indexing.
+
+Each of these attributes is discussed in detail below in these
+sections:
+
+* :ref:`immutable_view`
+* :ref:`mutable_view`
+* :ref:`flat_view`
+* :ref:`nested_view`
+* :ref:`slicing`
+* :ref:`private_immutable_view`
+* :ref:`private_mutable_view`
+
+
+.. _immutable_view:
+
+Immutable view
+^^^^^^^^^^^^^^
+
+The most basic view is the immutable :code:`const_view`, which
+supports read-only access to the array elements it references.
+This view serves primarily as a base class for more specialized
+views.  Its constructors allow establishing access to a whole
+array or to a rectangular subset of an array.  Note that like
+references, pointers, and iterators, views are types nested within
+the arrays that they reference.
+
+..
+  .. cpp:class:: template<typename Scalar> array1::const_view
+  .. cpp:class:: template<typename Scalar> array2::const_view
+  .. cpp:class:: template<typename Scalar> array3::const_view
+  .. cpp:class:: template<typename Scalar> array4::const_view
+
+.. cpp:class:: array1::const_view
+.. cpp:class:: array2::const_view
+.. cpp:class:: array3::const_view
+.. cpp:class:: array4::const_view
+
+  Immutable view into 1D, 2D, 3D, and 4D array.
+
+----
+
+.. _view_ctor:
+.. cpp:function:: array1::const_view::const_view(array1* array)
+.. cpp:function:: array2::const_view::const_view(array2* array)
+.. cpp:function:: array3::const_view::const_view(array3* array)
+.. cpp:function:: array4::const_view::const_view(array4* array)
+
+  Constructor for read-only access to a whole array.  As already
+  mentioned, these views are valid only during the lifetime of the
+  underlying array object.
+
+----
+
+.. cpp:function:: array1::const_view::const_view(array1* array, size_t x, size_t nx)
+.. cpp:function:: array2::const_view::const_view(array2* array, size_t x, size_t y, size_t nx, size_t ny)
+.. cpp:function:: array3::const_view::const_view(array3* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz)
+.. cpp:function:: array4::const_view::const_view(array4* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Constructors for read-only access to a rectangular subset of an
+  array.  The subset is specified by an offset, e.g.,
+  (*x*, *y*, *z*) for a 3D array, and dimensions, e.g.,
+  (*nx*, *ny*, *nz*) for a 3D array.  The rectangle must fit within
+  the surrounding array.
+
+----
+
+.. cpp:function:: size_t array1::const_view::global_x(size_t i) const
+.. cpp:function:: size_t array2::const_view::global_x(size_t i) const
+.. cpp:function:: size_t array2::const_view::global_y(size_t j) const
+.. cpp:function:: size_t array3::const_view::global_x(size_t i) const
+.. cpp:function:: size_t array3::const_view::global_y(size_t j) const
+.. cpp:function:: size_t array3::const_view::global_z(size_t k) const
+.. cpp:function:: size_t array4::const_view::global_x(size_t i) const
+.. cpp:function:: size_t array4::const_view::global_y(size_t j) const
+.. cpp:function:: size_t array4::const_view::global_z(size_t k) const
+.. cpp:function:: size_t array4::const_view::global_w(size_t l) const
+
+  Return global array index associated with local view index.  For
+  instance, if a 1D view has been constructed with offset *x*, then
+  :code:`global_x(i)` returns *x* + *i*.
+
+----
+
+.. cpp:function:: size_t array1::const_view::size_x() const
+.. cpp:function:: size_t array2::const_view::size_x() const
+.. cpp:function:: size_t array2::const_view::size_y() const
+.. cpp:function:: size_t array3::const_view::size_x() const
+.. cpp:function:: size_t array3::const_view::size_y() const
+.. cpp:function:: size_t array3::const_view::size_z() const
+.. cpp:function:: size_t array4::const_view::size_x() const
+.. cpp:function:: size_t array4::const_view::size_y() const
+.. cpp:function:: size_t array4::const_view::size_z() const
+.. cpp:function:: size_t array4::const_view::size_w() const
+
+  Return dimensions of view.
+
+----
+
+.. _view_accessor:
+.. cpp:function:: const_reference array1::const_view::operator()(size_t i) const
+.. cpp:function:: const_reference array2::const_view::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference array3::const_view::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: const_reference array4::const_view::operator()(size_t i, size_t j, size_t k, size_t l) const
+
+  Return reference to scalar stored at multi-dimensional index given by
+  *x* + *i*, *y* + *j*, *z* + *k*, and *w* + *l*, where *x*, *y*, *z*, and *w*
+  specify the offset into the array.
+
+----
+
+.. cpp:function:: const_reference array1::const_view::operator[](size_t index) const
+
+  Alternative inspector for 1D arrays identical to
+  :cpp:func:`array1::const_view::operator()`.
+
+----
+
+.. cpp:function:: array1::const_view::const_iterator array1::const_view::begin() const
+.. cpp:function:: array2::const_view::const_iterator array2::const_view::begin() const
+.. cpp:function:: array3::const_view::const_iterator array3::const_view::begin() const
+.. cpp:function:: array4::const_view::const_iterator array4::const_view::begin() const
+.. cpp:function:: array1::const_view::const_iterator array1::const_view::cbegin() const
+.. cpp:function:: array2::const_view::const_iterator array2::const_view::cbegin() const
+.. cpp:function:: array3::const_view::const_iterator array3::const_view::cbegin() const
+.. cpp:function:: array4::const_view::const_iterator array4::const_view::cbegin() const
+
+  Random-access const iterator to first element of view.
+
+----
+
+.. cpp:function:: array1::const_view::const_iterator array1::const_view::end() const
+.. cpp:function:: array2::const_view::const_iterator array2::const_view::end() const
+.. cpp:function:: array3::const_view::const_iterator array3::const_view::end() const
+.. cpp:function:: array4::const_view::const_iterator array4::const_view::end() const
+.. cpp:function:: array1::const_view::const_iterator array1::const_view::cend() const
+.. cpp:function:: array2::const_view::const_iterator array2::const_view::cend() const
+.. cpp:function:: array3::const_view::const_iterator array3::const_view::cend() const
+.. cpp:function:: array4::const_view::const_iterator array4::const_view::cend() const
+
+  Random-access const iterator to end of view.
+
+There are a number of common methods inherited from a base class,
+:code:`preview`, further up the class hierarchy.
+
+.. cpp:function:: double arrayANY::const_view::rate() const
+
+  Return rate in bits per value.  Same as :cpp:func:`array::rate`.
+  
+----
+
+.. cpp:function:: size_t arrayANY::const_view::size() const
+
+  Total number of elements in view, e.g., *nx* |times| *ny* |times| *nz* for
+  3D views.
+
+With the above definitions, the following example shows how a 2D view is
+constructed and accessed::
+
+  zfp::array2d a(200, 100, rate); // define 200x100 array of doubles
+  zfp::array2d::const_view v(&a, 10, 5, 20, 20); // v is a 20x20 view into array a
+  assert(v(2, 1) == a(12, 6)); // v(2, 1) == a(10 + 2, 5 + 1) == a(12, 6)
+  assert(v.size() == 400); // 20x20 == 400
+
+
+.. _mutable_view:
+
+Mutable view
+^^^^^^^^^^^^
+
+The basic mutable :code:`view` derives from the :code:`const_view` but
+adds operators for write-access.  Its constructors are similar to those
+for the :code:`const_view`.
+
+..
+  .. cpp:class:: template<typename Scalar> array1::view
+  .. cpp:class:: template<typename Scalar> array2::view
+  .. cpp:class:: template<typename Scalar> array3::view
+  .. cpp:class:: template<typename Scalar> array4::view
+
+.. cpp:class:: array1::view : public array1::const_view
+.. cpp:class:: array2::view : public array2::const_view
+.. cpp:class:: array3::view : public array3::const_view
+.. cpp:class:: array4::view : public array4::const_view
+
+  Mutable view into 1D, 2D, 3D, and 4D array.
+
+----
+
+.. cpp:function:: array1::view::view(array1* array)
+.. cpp:function:: array2::view::view(array2* array)
+.. cpp:function:: array3::view::view(array3* array)
+.. cpp:function:: array4::view::view(array4* array)
+.. cpp:function:: array1::view::view(array1* array, size_t x, size_t nx)
+.. cpp:function:: array2::view::view(array2* array, size_t x, size_t y, size_t nx, size_t ny)
+.. cpp:function:: array3::view::view(array3* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz)
+.. cpp:function:: array4::view::view(array4* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Whole-array and sub-array mutable view constructors.  See
+  :ref:`const_view constructors <view_ctor>` for details.
+
+----
+
+.. cpp:function:: reference array1::view::operator()(size_t i)
+.. cpp:function:: reference array2::view::operator()(size_t i, size_t j)
+.. cpp:function:: reference array3::view::operator()(size_t i, size_t j, size_t k)
+.. cpp:function:: reference array4::view::operator()(size_t i, size_t j, size_t k, size_t l)
+
+  These operators, whose arguments have the same meaning as in the
+  :ref:`array accessors <array_accessor>`, return
+  :ref:`proxy references <references>` to individual array elements for
+  write access.
+
+
+.. _flat_view:
+
+Flat view
+^^^^^^^^^
+
+The views discussed so far require multidimensional indexing, e.g.,
+(*i*, *j*, *k*) for 3D views.  Some applications prefer one-dimensional
+linear indexing, which is provided by the specialized flat view.  For
+example, in a 3D view with dimensions (*nx*, *ny*, *nz*), a multidimensional
+index (*i*, *j*, *k*) corresponds to the flat view index
+::
+
+  index = i + nx * (j + ny * k)
+
+This is true regardless of the view offset (*x*, *y*, *z*).
+
+The flat view derives from the mutable view and adds :code:`operator[]`
+for flat indexing.  This operator is essentially equivalent to
+:cpp:func:`array::operator[]` defined for 2D, 3D, and 4D arrays.  Flat
+views also provide functions for converting between multidimensional and
+flat indices.  
+
+Flat views are available only for 2D, 3D, and 4D arrays.  The basic mutable
+view, :cpp:class:`array1::view`, for 1D arrays can be thought of as
+either a flat or a nested view.
+
+..
+  .. cpp:class:: template<typename Scalar> array2::flat_view
+  .. cpp:class:: template<typename Scalar> array3::flat_view
+  .. cpp:class:: template<typename Scalar> array4::flat_view
+
+.. cpp:class:: array2::flat_view : public array2::view
+.. cpp:class:: array3::flat_view : public array3::view
+.. cpp:class:: array4::flat_view : public array4::view
+
+  Flat, mutable views for 2D, 3D, and 4D arrays.
+
+----
+
+.. cpp:function:: array2::flat_view::flat_view(array2* array)
+.. cpp:function:: array3::flat_view::flat_view(array3* array)
+.. cpp:function:: array4::flat_view::flat_view(array4* array)
+.. cpp:function:: array2::flat_view::flat_view(array2* array, size_t x, size_t y, size_t nx, size_t ny)
+.. cpp:function:: array3::flat_view::flat_view(array3* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz)
+.. cpp:function:: array4::flat_view::flat_view(array4* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Whole-array and sub-array flat view constructors.  See
+  :ref:`const_view constructors <view_ctor>` for details.
+
+----
+
+.. cpp:function:: size_t array2::flat_view::index(size_t i, size_t j) const
+.. cpp:function:: size_t array3::flat_view::index(size_t i, size_t j, size_t k) const
+.. cpp:function:: size_t array4::flat_view::index(size_t i, size_t j, size_t k, size_t l) const
+
+  Return flat index associated with multidimensional index.
+
+----
+
+.. cpp:function:: void array2::flat_view::ij(size_t& i, size_t& j, size_t index) const
+.. cpp:function:: void array3::flat_view::ijk(size_t& i, size_t& j, size_t& k, size_t index) const
+.. cpp:function:: void array4::flat_view::ijkl(size_t& i, size_t& j, size_t& k, size_t& l, size_t index) const
+
+  Convert flat index to multidimensional index.
+
+----
+
+.. cpp:function:: const_reference array2::flat_view::operator[](size_t index) const
+.. cpp:function:: const_reference array3::flat_view::operator[](size_t index) const
+.. cpp:function:: const_reference array4::flat_view::operator[](size_t index) const
+
+  Return array element associated with given flat index.
+
+----
+
+.. cpp:function:: reference array2::flat_view::operator[](size_t index)
+.. cpp:function:: reference array3::flat_view::operator[](size_t index)
+.. cpp:function:: reference array4::flat_view::operator[](size_t index)
+
+  Return reference to array element associated with given flat index.
+
+
+.. _nested_view:
+
+Nested view
+^^^^^^^^^^^
+
+C and C++ support nested arrays (arrays of arrays), e.g.,
+:code:`double a[10][20][30]`, which are usually accessed via nested indexing
+:code:`a[i][j][k]`.  Here :code:`a` is a 3D array, :code:`a[i]` is a 2D array,
+and :code:`a[i][j]` is a 1D array.  This 3D array can also be accessed
+via flat indexing, e.g.,
+::
+
+  a[i][j][k] == (&a[0][0][0])[600 * i + 30 * j + k]
+
+Nested views provide a mechanism to access array elements through
+nested indexing and to extract lower-dimensional "slices" of
+multidimensional arrays.  Nested views are mutable.
+
+Nested views are associated with a dimensionality.  For instance,
+if :code:`v` is a 3D nested view of a 3D array, then :code:`v[i]`
+is a 2D nested view (of a 3D array), :code:`v[i][j]` is a 1D nested
+view (of a 3D array), and :code:`v[i][j][k]` is a (reference to a) scalar
+array element.  Note that the order of indices is reversed when using
+nested indexing compared to multidimensional indexing, e.g.,
+:code:`v(i, j, k) == v[k][j][i]`.
+
+Whereas :code:`operator[]` on an array object accesses an element
+through flat indexing, the same array can be accessed through a
+nested view to in effect provide nested array indexing::
+
+  zfp::array3d a(30, 20, 10, rate); // define 30x20x10 3D array
+  assert(a[32] == a(2, 1, 0)); // OK: flat and multidimensional indexing
+  assert(a[32] == a[0][1][2]); // ERROR: a does not support nested indexing
+  zfp::array3d::nested_view v(&a); // define a nested view of a
+  assert(a[32] == v[0][1][2]); // OK: v supports nested indexing
+  zfp::array2d b(v[5]); // define and deep copy 30x20 2D slice of a
+  assert(a(2, 1, 5) == b(2, 1)); // OK: multidimensional indexing
+
+..
+  .. cpp:class:: template<typename Scalar> array2::nested_view1
+
+.. cpp:class:: array2::nested_view1
+
+  View of a 1D slice of a 2D array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array2::nested_view2
+
+.. cpp:class:: array2::nested_view2
+
+  2D view of a 2D (sub)array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array3::nested_view1
+
+.. cpp:class:: array3::nested_view1
+
+  View of a 1D slice of a 3D array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array3::nested_view2
+
+.. cpp:class:: array3::nested_view2
+
+  View of a 2D slice of a 3D array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array3::nested_view3
+
+.. cpp:class:: array3::nested_view3
+
+  3D view of a 3D (sub)array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array4::nested_view1
+
+.. cpp:class:: array4::nested_view1
+
+  View of a 1D slice of a 4D array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array4::nested_view2
+
+.. cpp:class:: array4::nested_view2
+
+  View of a 2D slice of a 4D array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array4::nested_view3
+
+.. cpp:class:: array4::nested_view3
+
+  View of a 3D slice of a 4D array.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array4::nested_view4
+
+.. cpp:class:: array4::nested_view4
+
+  4D view of a 4D (sub)array.
+
+----
+
+.. cpp:function:: array2::nested_view2::nested_view2(array2* array)
+.. cpp:function:: array3::nested_view3::nested_view3(array3* array)
+.. cpp:function:: array4::nested_view4::nested_view4(array4* array)
+.. cpp:function:: array2::nested_view2::nested_view2(array2* array, size_t x, size_t y, size_t nx, size_t ny)
+.. cpp:function:: array3::nested_view3::nested_view3(array3* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz)
+.. cpp:function:: array4::nested_view4::nested_view4(array4* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Whole-array and sub-array nested view constructors.  See
+  :ref:`const_view <immutable_view>` constructors for details.
+  Lower-dimensional view constructors are not accessible to the
+  user but are invoked when accessing views via nested indexing.
+
+----
+
+.. cpp:function:: size_t array2::nested_view1::size_x() const
+.. cpp:function:: size_t array2::nested_view2::size_x() const
+.. cpp:function:: size_t array2::nested_view2::size_y() const
+.. cpp:function:: size_t array3::nested_view1::size_x() const
+.. cpp:function:: size_t array3::nested_view2::size_x() const
+.. cpp:function:: size_t array3::nested_view2::size_y() const
+.. cpp:function:: size_t array3::nested_view3::size_x() const
+.. cpp:function:: size_t array3::nested_view3::size_y() const
+.. cpp:function:: size_t array3::nested_view3::size_z() const
+.. cpp:function:: size_t array4::nested_view1::size_x() const
+.. cpp:function:: size_t array4::nested_view2::size_x() const
+.. cpp:function:: size_t array4::nested_view2::size_y() const
+.. cpp:function:: size_t array4::nested_view3::size_x() const
+.. cpp:function:: size_t array4::nested_view3::size_y() const
+.. cpp:function:: size_t array4::nested_view3::size_z() const
+.. cpp:function:: size_t array4::nested_view4::size_x() const
+.. cpp:function:: size_t array4::nested_view4::size_y() const
+.. cpp:function:: size_t array4::nested_view4::size_z() const
+.. cpp:function:: size_t array4::nested_view4::size_w() const
+
+  View dimensions.
+
+----
+
+.. cpp:function:: array4::nested_view3 array4::nested_view4::operator[](size_t index) const
+
+  Return view to a 3D slice of 4D array.
+
+----
+
+.. cpp:function:: array3::nested_view2 array3::nested_view3::operator[](size_t index) const
+.. cpp:function:: array4::nested_view2 array4::nested_view3::operator[](size_t index) const
+
+  Return view to a 2D slice of a 3D or 4D array.
+
+----
+
+.. cpp:function:: array2::nested_view1 array2::nested_view2::operator[](size_t index) const
+.. cpp:function:: array3::nested_view1 array3::nested_view2::operator[](size_t index) const
+.. cpp:function:: array4::nested_view1 array4::nested_view2::operator[](size_t index) const
+
+  Return view to a 1D slice of a 2D, 3D, or 4D array.
+
+----
+
+.. cpp:function:: const_reference array2::nested_view1::operator[](size_t index) const
+.. cpp:function:: const_reference array3::nested_view1::operator[](size_t index) const
+.. cpp:function:: const_reference array4::nested_view1::operator[](size_t index) const
+
+  Return scalar element of a 2D, 3D, or 4D array.
+
+----
+
+.. cpp:function:: reference array2::nested_view1::operator[](size_t index)
+.. cpp:function:: reference array3::nested_view1::operator[](size_t index)
+.. cpp:function:: reference array4::nested_view1::operator[](size_t index)
+
+  Return reference to a scalar element of a 2D, 3D, or 4D array.
+
+----
+
+.. cpp:function:: const_reference array2::nested_view1::operator()(size_t i) const
+.. cpp:function:: const_reference array2::nested_view2::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference array3::nested_view1::operator()(size_t i) const
+.. cpp:function:: const_reference array3::nested_view2::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference array3::nested_view3::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: const_reference array4::nested_view1::operator()(size_t i) const
+.. cpp:function:: const_reference array4::nested_view2::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference array4::nested_view3::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: const_reference array4::nested_view4::operator()(size_t i, size_t j, size_t k, size_t l) const
+
+  Return const reference to a scalar element of a 2D, 3D, or 4D array.
+
+----
+
+.. cpp:function:: reference array2::nested_view1::operator()(size_t i)
+.. cpp:function:: reference array2::nested_view2::operator()(size_t i, size_t j)
+.. cpp:function:: reference array3::nested_view1::operator()(size_t i)
+.. cpp:function:: reference array3::nested_view2::operator()(size_t i, size_t j)
+.. cpp:function:: reference array3::nested_view3::operator()(size_t i, size_t j, size_t k)
+.. cpp:function:: reference array4::nested_view1::operator()(size_t i)
+.. cpp:function:: reference array4::nested_view2::operator()(size_t i, size_t j)
+.. cpp:function:: reference array4::nested_view3::operator()(size_t i, size_t j, size_t k)
+.. cpp:function:: reference array4::nested_view4::operator()(size_t i, size_t j, size_t k, size_t l)
+
+  Return reference to a scalar element of a 2D, 3D, or 4D array.
+
+
+.. _slicing:
+
+Slicing
+^^^^^^^
+
+Arrays can be constructed as deep copies of slices of higher-dimensional
+arrays, as the code example above shows (i.e.,
+:code:`zfp::array2d b(v[5]);`).  Unlike views, which have reference
+semantics, such array *slicing* has value semantics.  In this example,
+2D array *b* is initialized as a (deep) copy of a slice of 3D array *a*
+via nested view *v*.  Subsequent modifications of *b* have no effect on
+*a*.
+
+Slicing is implemented as array constructors templated on views.
+Upon initialization, elements are copied one at a time from the view
+via multidimensional indexing, e.g., :code:`v(i, j, k)`.  Note that
+view and array dimensionalities must match, but aside from this an
+array may be constructed from any view.
+
+Slicing needs not change the dimensionality, but can be used to copy
+an equidimensional subset of one array to another array, as in this
+example::
+
+  zfp::array3d a(30, 20, 10, rate);
+  zfp::array3d::const_view v(&a, 1, 2, 3, 4, 5, 6);
+  zfp::array3d b(v);
+  assert(b(0, 0, 0) == a(1, 2, 3));
+  assert(b.size_x() == 4);
+  assert(b.size_y() == 5);
+  assert(b.size_z() == 6);
+
+Slicing adds the following templated array constructors.
+
+.. cpp:function:: template<class View> array1::array1(const View& v)
+.. cpp:function:: template<class View> array2::array2(const View& v)
+.. cpp:function:: template<class View> array3::array3(const View& v)
+.. cpp:function:: template<class View> array4::array4(const View& v)
+
+  Construct array from a view via a deep copy.  The view, *v*, must support
+  :ref:`multidimensional indexing <view_accessor>`.
+  The rate for the constructed array is initialized to the rate of the array
+  associated with the view.  Note that the actual rate may differ if the
+  constructed array is a lower-dimensional slice of a higher-dimensional
+  array due to lower rate granularity (see FAQ :ref:`#12 <q-granularity>`).
+  The cache size of the constructed array is set to the default size.
+
+
+.. _private_immutable_view:
+
+Private immutable view
+^^^^^^^^^^^^^^^^^^^^^^
+
+|zfp|'s compressed arrays are in general not thread-safe.  The main
+reason for this is that each array maintains its own cache of
+uncompressed blocks.  Race conditions on the cache would occur unless
+it were locked upon each and every array access, which would have a
+prohibitive performance cost.
+
+To ensure thread-safe access, |zfp| provides private mutable and
+immutable views of arrays that maintain their own private caches.
+The :code:`private_const_view` immutable view
+provides read-only access to the underlying array.  It is similar
+to a :ref:`const_view <immutable_view>` in this sense, but differs in
+that it maintains its own private cache rather than sharing the
+cache owned by the array.  Multiple threads may thus access the
+same array in parallel through their own private views.
+
+.. note::
+  Thread safety is ensured only for OpenMP threads, and the |zfp|
+  views must be compiled by an OpenMP compliant compiler.  As the
+  |zfp| compressed-array class implementation is defined in headers,
+  the application code using |zfp| must also be compiled with OpenMP
+  enabled if multithreaded access to |zfp| arrays is desired.
+
+.. note::
+  Private views **do not guarantee cache coherence**.  If, for example,
+  the array is modified, then already cached data in a private view is
+  not automatically updated.  It is up to the user to ensure cache
+  coherence by flushing (compressing modified blocks) or clearing
+  (emptying) caches when appropriate.
+
+The cache associated with a private view can be manipulated in the
+same way an array's cache can.  For instance, the user may set the
+cache size on a per-view basis.
+
+Unlike with :ref:`private mutable views <private_mutable_view>`,
+private immutable views may freely access any element in the
+array visible through the view, i.e., multiple threads may
+read the same array element simultaneously.  For an example of how
+to use private views for both read and write multithreaded access,
+see the :ref:`diffusion <ex-diffusion>` code example.
+
+Private views support only multidimensional indexing, i.e., they
+are neither flat nor nested.
+
+..
+  .. cpp:class:: template<typename Scalar> array1::private_const_view
+  .. cpp:class:: template<typename Scalar> array2::private_const_view
+  .. cpp:class:: template<typename Scalar> array3::private_const_view
+  .. cpp:class:: template<typename Scalar> array4::private_const_view
+
+.. _private_const_view:
+.. cpp:class:: array1::private_const_view
+.. cpp:class:: array2::private_const_view
+.. cpp:class:: array3::private_const_view
+.. cpp:class:: array4::private_const_view
+
+  Immutable views of 1D, 2D, 3D, and 4D arrays with private caches.
+
+----
+
+.. cpp:function:: array1::private_const_view::private_const_view(array1* array)
+.. cpp:function:: array2::private_const_view::private_const_view(array2* array)
+.. cpp:function:: array3::private_const_view::private_const_view(array3* array)
+.. cpp:function:: array4::private_const_view::private_const_view(array4* array)
+.. cpp:function:: array1::private_const_view::private_const_view(array1* array, size_t x, size_t nx)
+.. cpp:function:: array2::private_const_view::private_const_view(array2* array, size_t x, size_t y, size_t nx, size_t ny)
+.. cpp:function:: array3::private_const_view::private_const_view(array3* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz)
+.. cpp:function:: array4::private_const_view::private_const_view(array4* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Whole-array and sub-array private immutable view constructors.  See
+  :ref:`const_view constructors <view_ctor>` for details.
+
+----
+
+.. cpp:function:: size_t array1::private_const_view::size_x() const
+.. cpp:function:: size_t array2::private_const_view::size_x() const
+.. cpp:function:: size_t array2::private_const_view::size_y() const
+.. cpp:function:: size_t array3::private_const_view::size_x() const
+.. cpp:function:: size_t array3::private_const_view::size_y() const
+.. cpp:function:: size_t array3::private_const_view::size_z() const
+.. cpp:function:: size_t array4::private_const_view::size_x() const
+.. cpp:function:: size_t array4::private_const_view::size_y() const
+.. cpp:function:: size_t array4::private_const_view::size_z() const
+.. cpp:function:: size_t array4::private_const_view::size_w() const
+
+  View dimensions.
+
+----
+
+.. cpp:function:: const_reference array1::private_const_view::operator()(size_t i) const
+.. cpp:function:: const_reference array2::private_const_view::operator()(size_t i, size_t j) const
+.. cpp:function:: const_reference array3::private_const_view::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: const_reference array4::private_const_view::operator()(size_t i, size_t j, size_t k, size_t l) const
+
+  Return const reference to scalar element of a 1D, 2D, 3D, or 4D array.
+
+The following functions are common among all dimensionalities:
+
+.. cpp:function:: size_t arrayANY::private_const_view::cache_size() const
+.. cpp:function:: void arrayANY::private_const_view::set_cache_size(size_t csize)
+.. cpp:function:: void arrayANY::private_const_view::clear_cache() const
+
+  Cache manipulation.  See :ref:`caching` for details.
+
+
+.. _private_mutable_view:
+
+Private mutable view
+^^^^^^^^^^^^^^^^^^^^
+
+The mutable :code:`private_view` supports both read and write access
+and is backed by a private cache.  Because block compression, as needed
+to support write access, is not an atomic operation, mutable views
+and multithreading imply potential race conditions on the compressed
+blocks stored by an array.  Although locking the array or individual
+blocks upon compression would be a potential solution, this would either
+serialize compression, thus hurting performance, or add a possibly large
+memory overhead by maintaining a lock with each block.
+
+.. note::
+  To avoid multiple threads simultaneously compressing the same block,
+  **private mutable views of an array must reference disjoint,
+  block-aligned subarrays** for thread-safe access.  Each block of |4powd|
+  array elements must be associated with at most one private mutable view,
+  and therefore these views must reference non-overlapping rectangular
+  subsets that are aligned on block boundaries, except possibly for partial
+  blocks on the array boundary.  (Expert users may alternatively ensure
+  serialization of block compression calls and cache coherence in other
+  ways, in which case overlapping private views may be permitted.)
+
+Aside from this requirement, the user may partition the array into
+disjoint views in whatever manner is suitable for the application.
+The :code:`private_view` API supplies a very basic partitioner to
+facilitate this task, but may not result in optimal partitions or
+good load balance.
+
+When multithreaded write access is desired, any direct accesses to the
+array itself (i.e., not through a view) could invoke compression.  Even
+a read access may trigger compression if a modified block is evicted
+from the cache.  Hence, such direct array accesses should be confined
+to serial code sections when private views are used.
+
+As with private immutable views, **cache coherence is not enforced**.
+Although this is less of an issue for private mutable views due to
+the requirement that views may not overlap, each private mutable view
+overlaps an index space with the underlying array whose cache is not
+automatically synchronized with the view's private cache.  See
+the :ref:`diffusion <ex-diffusion>` for an example of how to enforce
+cache coherence with mutable and immutable private views.
+
+The :code:`private_view` class inherits all public functions from
+:code:`private_const_view`.
+
+..
+  .. cpp:class:: template<typename Scalar> array1::private_view
+  .. cpp:class:: template<typename Scalar> array2::private_view
+  .. cpp:class:: template<typename Scalar> array3::private_view
+  .. cpp:class:: template<typename Scalar> array4::private_view
+
+.. cpp:class:: array1::private_view : public array1::private_const_view
+.. cpp:class:: array2::private_view : public array2::private_const_view
+.. cpp:class:: array3::private_view : public array3::private_const_view
+.. cpp:class:: array4::private_view : public array4::private_const_view
+
+  Mutable views of 1D, 2D, 3D, and 4D arrays with private caches.
+
+----
+
+..
+  .. cpp:class:: template<typename Scalar> array1::private_view::view_reference
+  .. cpp:class:: template<typename Scalar> array2::private_view::view_reference
+  .. cpp:class:: template<typename Scalar> array3::private_view::view_reference
+  .. cpp:class:: template<typename Scalar> array4::private_view::view_reference
+
+.. cpp:class:: array1::private_view::view_reference
+.. cpp:class:: array2::private_view::view_reference
+.. cpp:class:: array3::private_view::view_reference
+.. cpp:class:: array4::private_view::view_reference
+
+  Proxy references to array elements specialized for mutable
+  private views.
+
+----
+
+.. cpp:function:: array1::private_view::private_view(array1* array)
+.. cpp:function:: array2::private_view::private_view(array2* array)
+.. cpp:function:: array3::private_view::private_view(array3* array)
+.. cpp:function:: array4::private_view::private_view(array4* array)
+.. cpp:function:: array1::private_view::private_view(array1* array, size_t x, size_t nx)
+.. cpp:function:: array2::private_view::private_view(array2* array, size_t x, size_t y, size_t nx, size_t ny)
+.. cpp:function:: array3::private_view::private_view(array3* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz)
+.. cpp:function:: array4::private_view::private_view(array4* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw)
+
+  Whole-array and sub-array private mutable view constructors.  See
+  :ref:`const_view constructors <view_ctor>` for details.
+
+----
+
+.. cpp:function:: array1::private_view::view_reference array1::private_view::operator()(size_t i) const
+.. cpp:function:: array2::private_view::view_reference array2::private_view::operator()(size_t i, size_t j) const
+.. cpp:function:: array3::private_view::view_reference array3::private_view::operator()(size_t i, size_t j, size_t k) const
+.. cpp:function:: array4::private_view::view_reference array4::private_view::operator()(size_t i, size_t j, size_t k, size_t l) const
+
+  Return reference to a scalar element of a 1D, 2D, 3D, or 4D array.
+
+The following functions are common among all dimensionalities:
+
+.. cpp:function:: void arrayANY::private_view::partition(size_t index, size_t count)
+
+  Partition the current view into *count* roughly equal-size pieces along the
+  view's longest dimension and modify the view's extents to match the piece
+  indexed by *index*, with 0 |leq| *index* < *count*.
+  These functions may be called multiple times, e.g., to recursively
+  partition along different dimensions.  The partitioner does not generate
+  new views; it merely modifies the current values of the view's offsets
+  and dimensions.  Note that this may result in empty views whose dimensions
+  are zero, e.g., if there are more pieces than blocks along a dimension.
+
+----
+
+.. cpp:function:: void arrayANY::private_view::flush_cache() const
+
+  Flush cache by compressing any modified blocks and emptying the cache.
diff --git a/docs/source/zforp.rst b/docs/source/zforp.rst
new file mode 100644
index 00000000..aee51715
--- /dev/null
+++ b/docs/source/zforp.rst
@@ -0,0 +1,884 @@
+.. include:: defs.rst
+
+.. index::
+   single: zFORp
+.. _zforp:
+
+Fortran Bindings
+================
+
+|zfp| |zforprelease| adds |zforp|: a Fortran API providing wrappers around
+the :ref:`high-level C API <hl-api>`. Wrappers for
+:ref:`compressed-array classes <arrays>` will arrive in a future release.
+The |zforp| implementation is based on the standard :code:`iso_c_binding`
+module available since Fortran 2003.  The use of :code:`ptrdiff_t` in
+the |zfp| |fieldrelease| C API, however, requires the corresponding
+:code:`c_ptrdiff_t` available only since Fortran 2018.
+
+Every high-level C API function can be called from a Fortran wrapper function.
+C structs are wrapped as Fortran derived types, each containing a single C
+pointer to the C struct in memory. The wrapper functions accept and return
+these Fortran types, so users should never need to touch the C pointers.
+In addition to the high-level C API, two essential functions from the
+:ref:`bit stream API <bs-api>` for opening and closing bit streams are
+available.
+
+See example code :file:`tests/fortran/testFortran.f` (on the GitHub
+`develop branch <https://github.com/LLNL/zfp/tree/develop>`_)
+for how the Fortran API is used to compress and decompress data.
+
+.. _zforp_changes:
+.. note::
+
+  |zfp| |fieldrelease| simplifies the |zforp| module name from
+  ``zforp_module`` to ``zfp``.  This will likely require changing
+  associated use statements within existing code when updating
+  from prior versions of zFORp.
+
+  Furthermore, as outlined above, the |zfp| |fieldrelease| API requires
+  a Fortran 2018 compiler.
+
+
+Types
+-----
+
+.. f:type:: zFORp_bitstream
+
+  :f c_ptr object: A C pointer to the instance of :c:type:`bitstream`
+
+----
+
+.. f:type:: zFORp_stream
+
+  :f c_ptr object: A C pointer to the instance of :c:type:`zfp_stream`
+
+----
+
+.. f:type:: zFORp_field
+
+  :f c_ptr object: A C pointer to the instance of :c:type:`zfp_field`
+
+Constants
+---------
+
+Enumerations
+^^^^^^^^^^^^
+
+.. _zforp_type:
+.. f:variable:: integer zFORp_type_none
+.. f:variable:: integer zFORp_type_int32
+.. f:variable:: integer zFORp_type_int64
+.. f:variable:: integer zFORp_type_float
+.. f:variable:: integer zFORp_type_double
+
+  Enums wrapping :c:type:`zfp_type`
+
+----
+
+.. _zforp_mode:
+.. f:variable:: integer zFORp_mode_null
+.. f:variable:: integer zFORp_mode_expert
+.. f:variable:: integer zFORp_mode_fixed_rate
+.. f:variable:: integer zFORp_mode_fixed_precision
+.. f:variable:: integer zFORp_mode_fixed_accuracy
+.. f:variable:: integer zFORp_mode_reversible
+
+  Enums wrapping :c:type:`zfp_mode`
+
+----
+
+.. _zforp_exec:
+.. f:variable:: integer zFORp_exec_serial
+.. f:variable:: integer zFORp_exec_omp
+.. f:variable:: integer zFORp_exec_cuda
+
+  Enums wrapping :c:type:`zfp_exec_policy`
+
+Non-Enum Constants
+^^^^^^^^^^^^^^^^^^
+
+.. f:variable:: integer zFORp_version_major
+
+  Wraps :c:macro:`ZFP_VERSION_MAJOR`
+
+----
+
+.. f:variable:: integer zFORp_version_minor
+
+  Wraps :c:macro:`ZFP_VERSION_MINOR`
+
+----
+
+.. f:variable:: integer zFORp_version_patch
+
+  Wraps :c:macro:`ZFP_VERSION_PATCH`
+
+----
+
+.. f:variable:: integer zFORp_version_tweak
+
+  Wraps :c:macro:`ZFP_VERSION_TWEAK`
+
+----
+
+.. f:variable:: integer zFORp_codec_version
+
+  Wraps :c:data:`zfp_codec_version`
+
+----
+
+.. f:variable:: integer zFORp_library_version
+
+  Wraps :c:data:`zfp_library_version`
+
+----
+
+.. f:variable:: character(len=36) zFORp_version_string
+
+  Wraps :c:data:`zfp_version_string`
+
+----
+
+.. f:variable:: integer zFORp_min_bits
+
+  Wraps :c:macro:`ZFP_MIN_BITS`
+
+----
+
+.. f:variable:: integer zFORp_max_bits
+
+  Wraps :c:macro:`ZFP_MAX_BITS`
+
+----
+
+.. f:variable:: integer zFORp_max_prec
+
+  Wraps :c:macro:`ZFP_MAX_PREC`
+
+----
+
+.. f:variable:: integer zFORp_min_exp
+
+  Wraps :c:macro:`ZFP_MIN_EXP`
+
+----
+
+.. _zforp_header:
+.. f:variable:: integer zFORp_header_magic
+
+  Wraps :c:macro:`ZFP_HEADER_MAGIC`
+
+----
+
+.. f:variable:: integer zFORp_header_meta
+
+  Wraps :c:macro:`ZFP_HEADER_META`
+
+----
+
+.. f:variable:: integer zFORp_header_mode
+
+  Wraps :c:macro:`ZFP_HEADER_MODE`
+
+----
+
+.. f:variable:: integer zFORp_header_full
+
+  Wraps :c:macro:`ZFP_HEADER_FULL`
+
+----
+
+.. f:variable:: integer zFORp_meta_null
+
+  Wraps :c:macro:`ZFP_META_NULL`
+
+----
+
+.. f:variable:: integer zFORp_magic_bits
+
+  Wraps :c:macro:`ZFP_MAGIC_BITS`
+
+----
+
+.. f:variable:: integer zFORp_meta_bits
+
+  Wraps :c:macro:`ZFP_META_BITS`
+
+----
+
+.. f:variable:: integer zFORp_mode_short_bits
+
+  Wraps :c:macro:`ZFP_MODE_SHORT_BITS`
+
+----
+
+.. f:variable:: integer zFORp_mode_long_bits
+
+  Wraps :c:macro:`ZFP_MODE_LONG_BITS`
+
+----
+
+.. f:variable:: integer zFORp_header_max_bits
+
+  Wraps :c:macro:`ZFP_HEADER_MAX_BITS`
+
+----
+
+.. f:variable:: integer zFORp_mode_short_max
+
+  Wraps :c:macro:`ZFP_MODE_SHORT_MAX`
+
+Functions and Subroutines
+-------------------------
+
+Each of the functions included here wraps a corresponding C function.  Please
+consult the C documentation for detailed descriptions of the functions, their
+parameters, and their return values.
+
+Bit Stream
+^^^^^^^^^^
+
+.. f:function:: zFORp_bitstream_stream_open(buffer, bytes)
+
+  Wrapper for :c:func:`stream_open`
+
+  :p c_ptr buffer [in]: Memory buffer
+  :p bytes [in]: Buffer size in bytes
+  :ptype bytes: integer (kind=8)
+  :r bs: Bit stream
+  :rtype bs: zFORp_bitstream
+
+----
+
+.. f:subroutine:: zFORp_bitstream_stream_close(bs)
+
+  Wrapper for :c:func:`stream_close`
+
+  :p zFORp_bitstream bs [inout]: Bit stream
+
+Utility Functions
+^^^^^^^^^^^^^^^^^
+
+.. f:function:: zFORp_type_size(scalar_type)
+
+  Wrapper for :c:func:`zfp_type_size`
+
+  :p integer scalar_type [in]: :ref:`zFORp_type <zforp_type>` enum
+  :r type_size: Size of described :c:type:`zfp_type`, in bytes, from C-language perspective
+  :rtype type_size: integer (kind=8)
+
+Compressed Stream
+^^^^^^^^^^^^^^^^^
+
+.. f:function:: zFORp_stream_open(bs)
+
+  Wrapper for :c:func:`zfp_stream_open`
+
+  :p zFORp_bitstream bs [in]: Bit stream
+  :r stream: Newly allocated compressed stream
+  :rtype stream: zFORp_stream
+
+----
+
+.. f:subroutine:: zFORp_stream_close(stream)
+
+  Wrapper for :c:func:`zfp_stream_close`
+
+  :p zFORp_stream stream [inout]: Compressed stream
+
+----
+
+.. f:function:: zFORp_stream_bit_stream(stream)
+
+  Wrapper for :c:func:`zfp_stream_bit_stream`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r bs: Bit stream
+  :rtype bs: zFORp_bitstream
+
+----
+
+.. f:function:: zFORp_stream_compression_mode(stream)
+
+  Wrapper for :c:func:`zfp_stream_compression_mode`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r mode: :ref:`zFORp_mode <zforp_mode>` enum
+  :rtype mode: integer
+
+----
+
+.. f:function:: zFORp_stream_rate(stream, dims)
+
+  Wrapper for :c:func:`zfp_stream_rate`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer dims [in]: Number of dimensions
+  :r rate_result: Rate in compressed bits/scalar
+  :rtype rate_result: real (kind=8)
+
+----
+
+.. f:function:: zFORp_stream_precision(stream)
+
+  Wrapper for :c:func:`zfp_stream_precision`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r prec_result: Precision in uncompressed bits/scalar
+  :rtype prec_result: integer
+
+----
+
+.. f:function:: zFORp_stream_accuracy(stream)
+
+  Wrapper for :c:func:`zfp_stream_accuracy`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r tol_result: Absolute error tolerance
+  :rtype tol_result: real (kind=8)
+
+----
+
+.. f:function:: zFORp_stream_mode(stream)
+
+  Wrapper for :c:func:`zfp_stream_mode`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r mode: 64-bit encoded mode
+  :rtype mode: integer (kind=8)
+
+----
+
+.. f:subroutine:: zFORp_stream_params(stream, minbits, maxbits, maxprec, minexp)
+
+  Wrapper for :c:func:`zfp_stream_params`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer minbits [inout]: Minimum number of bits per block
+  :p integer maxbits [inout]: Maximum number of bits per block
+  :p integer maxprec [inout]: Maximum precision
+  :p integer minexp [inout]: Minimum bit plane number encoded
+
+----
+
+.. f:function:: zFORp_stream_compressed_size(stream)
+
+  Wrapper for :c:func:`zfp_stream_compressed_size`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r compressed_size: Compressed size in bytes
+  :rtype compressed_size: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_stream_maximum_size(stream, field)
+
+  Wrapper for :c:func:`zfp_stream_maximum_size`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p zFORp_field field [in]: Field metadata
+  :r max_size: Maximum possible compressed size in bytes
+  :rtype max_size: integer (kind=8)
+
+----
+
+.. f:subroutine:: zFORp_stream_rewind(stream)
+
+  Wrapper for :c:func:`zfp_stream_rewind`
+
+  :p zFORp_stream stream [in]: Compressed stream
+
+----
+
+.. f:subroutine:: zFORp_stream_set_bit_stream(stream, bs)
+
+  Wrapper for :c:func:`zfp_stream_set_bit_stream`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p zFORp_bitstream bs [in]: Bit stream
+
+
+Compression Parameters
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. f:subroutine:: zFORp_stream_set_reversible(stream)
+
+  Wrapper for :c:func:`zfp_stream_set_reversible`
+
+  :p zFORp_stream stream [in]: Compressed stream
+
+----
+
+.. f:function:: zFORp_stream_set_rate(stream, rate, scalar_type, dims, align)
+
+  Wrapper for :c:func:`zfp_stream_set_rate`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p real rate [in]: Desired rate
+  :p integer scalar_type [in]: :ref:`zFORp_type <zforp_type>` enum
+  :p integer dims [in]: Number of dimensions
+  :p integer align [in]: Align blocks on words for write random access?
+  :r rate_result: Actual set rate in bits/scalar
+  :rtype rate_result: real (kind=8)
+
+----
+
+.. f:function:: zFORp_stream_set_precision(stream, prec)
+
+  Wrapper for :c:func:`zfp_stream_set_precision`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer prec [in]: Desired precision
+  :r prec_result: Actual set precision
+  :rtype prec_result: integer
+
+----
+
+.. f:function:: zFORp_stream_set_accuracy(stream, tolerance)
+
+  Wrapper for :c:func:`zfp_stream_set_accuracy()`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p tolerance [in]: Desired error tolerance
+  :ptype tolerance: real (kind=8)
+  :r tol_result: Actual set tolerance
+  :rtype tol_result: real (kind=8)
+
+----
+
+.. f:function:: zFORp_stream_set_mode(stream, mode)
+
+  Wrapper for :c:func:`zfp_stream_set_mode`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p mode [in]: Compact encoding of compression parameters
+  :ptype mode: integer (kind=8)
+  :r mode_result: Newly set :ref:`zFORp_mode <zforp_mode>` enum
+  :rtype mode_result: integer
+
+----
+
+.. f:function:: zFORp_stream_set_params(stream, minbits, maxbits, maxprec, minexp)
+
+  Wrapper for :c:func:`zfp_stream_set_params`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer minbits [in]: Minimum number of bits per block
+  :p integer maxbits [in]: Maximum number of bits per block
+  :p integer maxprec [in]: Maximum precision
+  :p integer minexp [in]: Minimum bit plane number encoded
+  :r is_success: Indicate whether parameters were successfully set (1) or not (0)
+  :rtype is_success: integer
+
+Execution Policy
+^^^^^^^^^^^^^^^^
+
+.. f:function:: zFORp_stream_execution(stream)
+
+  Wrapper for :c:func:`zfp_stream_execution`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r execution_policy: :ref:`zFORp_exec <zforp_exec>` enum indicating active execution policy
+  :rtype execution_policy: integer
+
+----
+
+.. f:function:: zFORp_stream_omp_threads(stream)
+
+  Wrapper for :c:func:`zfp_stream_omp_threads`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r thread_count: Number of OpenMP threads to use upon execution
+  :rtype thread_count: integer
+
+----
+
+.. f:function:: zFORp_stream_omp_chunk_size(stream)
+
+  Wrapper for :c:func:`zfp_stream_omp_chunk_size`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :r chunk_size_blocks: Specified chunk size, in blocks
+  :rtype chunk_size_blocks: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_stream_set_execution(stream, execution_policy)
+
+  Wrapper for :c:func:`zfp_stream_set_execution`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer execution_policy [in]: :ref:`zFORp_exec <zforp_exec>` enum indicating desired execution policy
+  :r is_success: Indicate whether execution policy was successfully set (1) or not (0)
+  :rtype is_success: integer
+
+----
+
+.. f:function:: zFORp_stream_set_omp_threads(stream, thread_count)
+
+  Wrapper for :c:func:`zfp_stream_set_omp_threads`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer thread_count [in]: Desired number of OpenMP threads
+  :r is_success: Indicate whether number of threads was successfully set (1) or not (0)
+  :rtype is_success: integer
+
+----
+
+.. f:function:: zFORp_stream_set_omp_chunk_size(stream, chunk_size)
+
+  Wrapper for :c:func:`zfp_stream_set_omp_chunk_size`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p integer chunk_size [in]: Desired chunk size, in blocks
+  :r is_success: Indicate whether chunk size was successfully set (1) or not (0)
+  :rtype is_success: integer
+
+Array Metadata
+^^^^^^^^^^^^^^
+
+.. f:function:: zFORp_field_alloc()
+
+  Wrapper for :c:func:`zfp_field_alloc`
+
+  :r field: Newly allocated field
+  :rtype field: zFORp_field
+
+----
+
+.. f:function:: zFORp_field_1d(uncompressed_ptr, scalar_type, nx)
+
+  Wrapper for :c:func:`zfp_field_1d`
+
+  :p c_ptr uncompressed_ptr [in]: Pointer to uncompressed data
+  :p integer scalar_type [in]: :ref:`zFORp_type <zforp_type>` enum describing uncompressed scalar type
+  :p integer nx [in]: Number of array elements
+  :r field: Newly allocated field
+  :rtype field: zFORp_field
+
+----
+
+.. f:function:: zFORp_field_2d(uncompressed_ptr, scalar_type, nx, ny)
+
+  Wrapper for :c:func:`zfp_field_2d`
+
+  :p c_ptr uncompressed_ptr [in]: Pointer to uncompressed data
+  :p integer scalar_type [in]: :ref:`zFORp_type <zforp_type>` enum describing uncompressed scalar type
+  :p integer nx [in]: Number of array elements in *x* dimension
+  :p integer ny [in]: Number of array elements in *y* dimension
+  :r field: Newly allocated field
+  :rtype field: zFORp_field
+
+----
+
+.. f:function:: zFORp_field_3d(uncompressed_ptr, scalar_type, nx, ny, nz)
+
+  Wrapper for :c:func:`zfp_field_3d`
+
+  :p c_ptr uncompressed_ptr [in]: Pointer to uncompressed data
+  :p integer scalar_type [in]: :ref:`zFORp_type <zforp_type>` enum describing uncompressed scalar type
+  :p integer nx [in]: Number of array elements in *x* dimension
+  :p integer ny [in]: Number of array elements in *y* dimension
+  :p integer nz [in]: Number of array elements in *z* dimension
+  :r field: Newly allocated field
+  :rtype field: zFORp_field
+
+----
+
+.. f:function:: zFORp_field_4d(uncompressed_ptr, scalar_type, nx, ny, nz, nw)
+
+  Wrapper for :c:func:`zfp_field_4d`
+
+  :p c_ptr uncompressed_ptr [in]: Pointer to uncompressed data
+  :p integer scalar_type [in]: :ref:`zFORp_type <zforp_type>` enum describing uncompressed scalar type
+  :p integer nx [in]: Number of array elements in *x* dimension
+  :p integer ny [in]: Number of array elements in *y* dimension
+  :p integer nz [in]: Number of array elements in *z* dimension
+  :p integer nw [in]: Number of array elements in *w* dimension
+  :r field: Newly allocated field
+  :rtype field: zFORp_field
+
+----
+
+.. f:subroutine:: zFORp_field_free(field)
+
+  Wrapper for :c:func:`zfp_field_free`
+
+  :p zFORp_field field [inout]: Field metadata
+
+----
+
+.. f:function:: zFORp_field_pointer(field)
+
+  Wrapper for :c:func:`zfp_field_pointer`
+
+  :p zFORp_field field [in]: Field metadata
+  :r arr_ptr: Pointer to raw (uncompressed/decompressed) array
+  :rtype arr_ptr: c_ptr
+
+----
+
+.. f:function:: zFORp_field_begin(field)
+
+  Wrapper for :c:func:`zfp_field_begin`
+
+  :p zFORp_field field [in]: Field metadata
+  :r begin_ptr: Pointer to lowest memory address spanned by field
+  :rtype begin_ptr: c_ptr
+
+----
+
+.. f:function:: zFORp_field_type(field)
+
+  Wrapper for :c:func:`zfp_field_type`
+
+  :p zFORp_field field [in]: Field metadata
+  :r scalar_type: :ref:`zFORp_type <zforp_type>` enum describing uncompressed scalar type
+  :rtype scalar_type: integer
+
+----
+
+.. f:function:: zFORp_field_precision(field)
+
+  Wrapper for :c:func:`zfp_field_precision`
+
+  :p zFORp_field field [in]: Field metadata
+  :r prec: Scalar type precision in number of bits
+  :rtype prec: integer
+
+----
+
+.. f:function:: zFORp_field_dimensionality(field)
+
+  Wrapper for :c:func:`zfp_field_dimensionality`
+
+  :p zFORp_field field [in]: Field metadata
+  :r dims: Dimensionality of array
+  :rtype dims: integer
+
+----
+
+.. f:function:: zFORp_field_size(field, size_arr)
+
+  Wrapper for :c:func:`zfp_field_size`
+
+  :p zFORp_field field [in]: Field metadata
+  :p size_arr [inout]: Integer array to write field dimensions into
+  :ptype size_arr: integer,dimension(4),target
+  :r total_size: Total number of array elements
+  :rtype total_size: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_field_size_bytes(field)
+
+  Wrapper for :c:func:`zfp_field_size_bytes`
+
+  :p zFORp_field field [in]: Field metadata
+  :r byte_size: Number of bytes spanned by field data including gaps (if any)
+  :rtype byte_size: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_field_blocks(field)
+
+  Wrapper for :c:func:`zfp_field_blocks`
+
+  :p zFORp_field field [in]: Field metadata
+  :r blocks: Total number of blocks spanned by field
+  :rtype blocks: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_field_stride(field, stride_arr)
+
+  Wrapper for :c:func:`zfp_field_stride`
+
+  :p zFORp_field field [in]: Field metadata
+  :p stride_arr [inout]: Integer array to write strides into
+  :ptype stride_arr: integer,dimension(4),target
+  :r is_strided: Indicate whether field is strided (1) or not (0)
+  :rtype is_strided: integer
+
+----
+
+.. f:function:: zFORp_field_is_contiguous(field)
+
+  Wrapper for :c:func:`zfp_field_is_contiguous`
+
+  :p zFORp_field field [in]: Field metadata
+  :r is_contiguous: Indicate whether field is contiguous (1) or not (0)
+  :rtype is_contiguous: integer
+
+----
+
+.. f:function:: zFORp_field_metadata(field)
+
+  Wrapper for :c:func:`zfp_field_metadata`
+
+  :p zFORp_field field [in]: Field metadata
+  :r encoded_metadata: Compact encoding of metadata
+  :rtype encoded_metadata: integer (kind=8)
+
+----
+
+.. f:subroutine:: zFORp_field_set_pointer(field, arr_ptr)
+
+  Wrapper for :c:func:`zfp_field_set_pointer`
+
+  :p zFORp_field field [in]: Field metadata
+  :p c_ptr arr_ptr [in]: Pointer to beginning of uncompressed array
+
+----
+
+.. f:function:: zFORp_field_set_type(field, scalar_type)
+
+  Wrapper for :c:func:`zfp_field_set_type`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer scalar_type: :ref:`zFORp_type <zforp_type>` enum indicating desired scalar type
+  :r type_result: :ref:`zFORp_type <zforp_type>` enum indicating actual scalar type
+  :rtype type_result: integer
+
+----
+
+.. f:subroutine:: zFORp_field_set_size_1d(field, nx)
+
+  Wrapper for :c:func:`zfp_field_set_size_1d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer nx [in]: Number of array elements
+
+----
+
+.. f:subroutine:: zFORp_field_set_size_2d(field, nx, ny)
+
+  Wrapper for :c:func:`zfp_field_set_size_2d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer nx [in]: Number of array elements in *x* dimension
+  :p integer ny [in]: Number of array elements in *y* dimension
+
+----
+
+.. f:subroutine:: zFORp_field_set_size_3d(field, nx, ny, nz)
+
+  Wrapper for :c:func:`zfp_field_set_size_3d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer nx [in]: Number of array elements in *x* dimension
+  :p integer ny [in]: Number of array elements in *y* dimension
+  :p integer nz [in]: Number of array elements in *z* dimension
+
+----
+
+.. f:subroutine:: zFORp_field_set_size_4d(field, nx, ny, nz, nw)
+
+  Wrapper for :c:func:`zfp_field_set_size_4d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer nx [in]: Number of array elements in *x* dimension
+  :p integer ny [in]: Number of array elements in *y* dimension
+  :p integer nz [in]: Number of array elements in *z* dimension
+  :p integer nw [in]: Number of array elements in *w* dimension
+
+----
+
+.. f:subroutine:: zFORp_field_set_stride_1d(field, sx)
+
+  Wrapper for :c:func:`zfp_field_set_stride_1d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer sx [in]: Stride in number of scalars
+
+----
+
+.. f:subroutine:: zFORp_field_set_stride_2d(field, sx, sy)
+
+  Wrapper for :c:func:`zfp_field_set_stride_2d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer sx [in]: Stride in *x* dimension
+  :p integer sy [in]: Stride in *y* dimension
+
+----
+
+.. f:subroutine:: zFORp_field_set_stride_3d(field, sx, sy, sz)
+
+  Wrapper for :c:func:`zfp_field_set_stride_3d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer sx [in]: Stride in *x* dimension
+  :p integer sy [in]: Stride in *y* dimension
+  :p integer sz [in]: Stride in *z* dimension
+
+----
+
+.. f:subroutine:: zFORp_field_set_stride_4d(field, sx, sy, sz, sw)
+
+  Wrapper for :c:func:`zfp_field_set_stride_4d`
+
+  :p zFORp_field field [in]: Field metadata
+  :p integer sx [in]: Stride in *x* dimension
+  :p integer sy [in]: Stride in *y* dimension
+  :p integer sz [in]: Stride in *z* dimension
+  :p integer sw [in]: Stride in *w* dimension
+
+----
+
+.. f:function:: zFORp_field_set_metadata(field, encoded_metadata)
+
+  Wrapper for :c:func:`zfp_field_set_metadata`
+
+  :p zFORp_field field [in]: Field metadata
+  :p encoded_metadata [in]: Compact encoding of metadata
+  :ptype encoded_metadata: integer (kind=8)
+  :r is_success: Indicate whether metadata was successfully set (1) or not (0)
+  :rtype is_success: integer
+
+Compression and Decompression
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. f:function:: zFORp_compress(stream, field)
+
+  Wrapper for :c:func:`zfp_compress`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p zFORp_field field [in]: Field metadata
+  :r bitstream_offset_bytes: Bit stream offset after compression, in bytes, or zero on failure
+  :rtype bitstream_offset_bytes: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_decompress(stream, field)
+
+  Wrapper for :c:func:`zfp_decompress`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p zFORp_field field [in]: Field metadata
+  :r bitstream_offset_bytes: Bit stream offset after decompression, in bytes, or zero on failure
+  :rtype bitstream_offset_bytes: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_write_header(stream, field, mask)
+
+  Wrapper for :c:func:`zfp_write_header`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p zFORp_field field [in]: Field metadata
+  :p integer mask [in]: :ref:`Bit mask <zforp_header>` indicating which parts of header to write
+  :r num_bits_written: Number of header bits written or zero on failure
+  :rtype num_bits_written: integer (kind=8)
+
+----
+
+.. f:function:: zFORp_read_header(stream, field, mask)
+
+  Wrapper for :c:func:`zfp_read_header`
+
+  :p zFORp_stream stream [in]: Compressed stream
+  :p zFORp_field field [in]: Field metadata
+  :p integer mask [in]: :ref:`Bit mask <zforp_header>` indicating which parts of header to read
+  :r num_bits_read: Number of header bits read or zero on failure
+  :rtype num_bits_read: integer (kind=8)
diff --git a/docs/source/zfp-rounding.pdf b/docs/source/zfp-rounding.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..6cda53b3f4c8f1747ccb3e6705c814edab5aa92d
GIT binary patch
literal 48460
zcmafZbzD?k+b*fn4MR$I4c*;c;?OWOQqrB$14ttvAuWS2lyrAUNr}=SB_f@kt<Uql
z-+RtKhu;tOTG!q8+I!8~!^Nbdpu`2{=Er23`aH0RDL~6h>jrhgl#syW(Xe&3_i~`+
z7oq+42a`w1(b>z^gO*3h*~-gS!PeT%#uigb3e(fe!`8|L)9(w%7M}qxlAJucnI{9f
z83QUq)RrlC@T93cdREW}8Ezyy{71o<wyrk+8UUj|!TA3xsDZz`EiI3ZmAx${kGz|!
zm#wRpC+(lC^e}n!Y(3q)J*;g#Y5D#EG;M7ht>oPNXiazlKv+<SmQPU73==T-0PJW5
z|7rpG{Tl}i%>Spjf8s#2p-#5eUYI=k-cYZ<k(Jy$T!1=(zkfBrfHMO{2sco;t(KLG
zEiEfIPk@~}mxr6TtBs?pJ-54!9oxS;f0EYnc7Xy7`X4h@S35TaM{6%yp??yV`%ki>
zLcFws0=z)7|6A3crt5gPS?k++(V75FP*9@fF|hUXqUBL_0es8-dzJt9s!9u3%ey(d
zdFZ=aS=-Y7F_-rQ(+dAvHc$o71PKYii2)z2$R89K3DAoEI})M=|7qJFP=przr#XLy
z00#cs-alhLTJWF6`HKtCg8#0=9~?0K+gzZxJpcCaZ+KM~FfAYiEHHtb0qcKq0ml5a
z;J+FC!2wTybNDkBq~-gY#Gf&+e1Eg}GZv=h`*)F5T|{X4{$>H(YJUR$dtdT_g@i@^
zzqcgv(OdO;#uk_3%X7BTI_JybA339Nc#yN;B{w|7#rqXmCRu!(E+n32EI;L_CNvZY
z`So}R{#@~Z83eO(vPYFMl~E-kRN|^&A+-Hr_dKLy^~ark&hf!Z&V^0?uTAGSrau;*
zcD6m7p0Czegr07C-CqnHl>ELwF@bE=tO_TauiWz7kFA9Myt!NmZF|r{54~$7GjAhB
zZci&Xj2a6rxT^NI(z%OThT|;NoCdd2F9&x?IgGVxoqC6Gf80axWN8aJcC04t-nUqk
zTenP{-Zx7%9bJ6QAM;+J`MBLIT_PUh`*!Cjr0ZeQyY;TfYh1W%hv;(me(`urIrQdS
zcxLRzactASAnBRd_DvIu=BZbpME-$y%jCzQ`xevA`}}g6OGlH-#82kQ-d#2NIwdSS
zyAsl^U1EyA0}4p*4+i}AK&8VL<-5U0{%_xfc4*Ib1fCkO!UT#vL(f75BJMPa`?g2?
z?pqy}$QnZ>N%xL^sokHP<YnDouXW6B`i);G|89B6Ajr#`3+WUH{g%{r`su>+{=j<4
zclW&Dz#_}hB>(bOQED_qY4WZrGdPg<%~{(0?#;rkXK~t?#l8J4$9-$OlfUQD?x@La
z!-d;@)!8p<!EeXO)}8&@89w)_PJYRB-YFW<^q$tP6<VuOcY8#9(nZ&2>qijb*`Pg(
z2FZYnXOcHRT~;E~982LLd`CLzL(iUuYVdW@j7uAY21vP1246nYVR?v(3%w`#{eq?I
zJYaUA@wfCeYWawCz1Pu@PaaE`pgAc@fm%(lzyZzs8$|Mo%@l<G_+y5YRAyX(&|JP~
zC42Ji(aXc+qnpKok0Y5?j#4yZaQ)3}NZ65Y<}d$)kb>`<`L#cA9?x3kC*|9QxJqKx
zK-zpIG8^svsg^yz@SO8Q!UHpH1J5|mL^&t2q!(JOQvAgvGk?ULi=J~X2ZwlRJCAkP
zFEw@iu6>(y0P(StQ!ez~$^U6eNh;;sy85v)(597VGC%Iw{j~dt9Q%Z03-@fp1Pk}Y
zo4{LaBKx;u+@xEMPD(mi4y0`#ZkHl2iO48t;$AH2eDXmn+x?o<-f-3;rEPlPMRDsG
z%-zwcaRBx;C7b^>`%{~~GOJVC82z;mLh?+FpSr$6rr3eV$DmlK*PF^@QgC%?Ow5R%
z;+y*(r(fQgWq#9c+>3k@O&QWxrL*~1o5BUnG?Qt}<H^?h%@C5TH0D~ZmFa@A($lo?
zW0RAT;=s{>)qv{U1^X(D8NK=4-cqIH2B`Jq!leNw^}N0htUc4?jjvs<9>|Q-XvMR3
zynwaJv6tE<&3>^F;jVDN9`%r#x$<$-YVt5<w6*5Im~~c9VEw_Sa{97R=DqZ#e<<lF
zY-J?(*z0T4jNX~5pq148ZihtV0E;On%3;_I4JrHdt2f`~R!+Mq2uC}m8qBhW>MEGW
z#p@C7T(|NqLJwU!*}q2{Cm;<sJG*5$yfp{G-09xEbzgZ-1C(D$MLJKTT_l$`CiI1W
z$J>-%=BG3)&||*f-hQ_?dhRFXuVmcH|LGUDVdlse;W;#CXEDQ;q`gX0f!@w*$TK28
zn^|Ft#R1ZkDDM#B58GO^QFS#?dj5zp>PQ{dGnbUcT1K{o)>*$`rFk7=G0DroJ)E=K
z52j<$0~M3C`f<#b2FAZR@!+aO<Kk6yQjs$w*>bjQuQRz1qecg0yI1OYiGPjXySi9p
znhCDIj^2Bb&;K~$zE!$xrW}g}cUsi0SulJ;39TAZ8D(qs3=7mYW28Ya7}_O8KKTo6
zpNA9ch1|7(Bl)W#tJVShUn%%k-Mw{V(-Br*G?Gcp3ytL$(pi$8MHirZvaj}xa*HYP
zOxXS`a2D+Ku)I)P79rT1lAiXyf9s+I0uf%{_bz)al6U{|Z35|%%nE<0Vc_<`!y4zL
z*a+Pqo|77Jwom6dp^ui!H_y&t2W6xXWcIy!c*xsxBhRYVfS1@_7j^W}HoDzB#A{j*
z+ZN5SC*T?$^=k3;rp-^y;svM|EPnUz%qKuheuH@W%W2bWV`!D(XO5&_v57b1Pi6c{
z3VzPC9_aUY^h#pNDIc=iQ9%$$F!b@h^;^~q#@&0hh>_==UL6V4$?Q@tgmt8`siwG6
z2Bn2@4O%yv<DA!g(fW-x8)IrO425W>;f{83Y?HM_Y-H@_Qn-x<idNwW^Inu-Rx*k0
zz~_fdbKE3Fb>B&Jd0^+7UAw)3&#cPc##z`*H;y(wop%)?pUc}@lON+hfRCbnIpO7|
zi8$!R{lr&VCp0f>GjZB(;)_U&9YrJrEDJkZlpa!%rYDp>Z^=Y2mW#G34YXl>vz069
zIqb@oUaf&D`n*z5bk=(tYw4E(^7oM$>8zs%gFr&Yh!LK;?5OuBi;DBsl;0}rd>9w`
z;r#Si(-(H&4TFW(I{C7(9s`|y8}NW`IQM<fH;~CR^>UYj7z3MiyeeHF6_=v@G;7h7
z)UcI~V9}5K<<p{^ZYaAG`?0?Vgp_;LlV=$-zleCXP1uJ%BTCoFC2PAQH2Q6Yd&}D}
z;{;~QQUgrN29IZ$N`Bg+izXR*THqi$aou%2Os?&L_Za6Yh<c)PQ}N|W2f5Sne#38i
z+$vH=8Hc*%p6F^pVi3#td@WMew^UpdfiNzJU{OjZ?=$1Qg|~_~XHgomW<H~Fn#QA3
z`OPw{21FYMF^vIY7QB=3%vbzb)c8mJQQ)azIigxh4Wjm4Us2YxRSn9iS52j1L5iVy
z>XvW$$})aQyyZy}cRrtJAg=l-Y4KBXSQG%Rr=21s`f%q)S}jBK1lIlF<);I5O^zAd
z$Lq242gwwoX6ksYwX;vx487_@Cr3BD-qpS`h;K;$w$nerDDl!|XfNpHAyi%9CP)-i
zOGK9NQo4-FS8ErGvuMw&JB$&_Li|0*0?H=5vFPCtT!RCB_ZF9YHh54=qC;IPsoWQ#
zdS)>SZEa%jDXgVOXy&a%HJWA8Kj+S$IdQhTLGxO!LO+I*?F(k1Lk$_~we(cr0_EDa
z3}WhEMbzGQCRBePZ?3<~Um39b%xU{Y$%g@yOCF_5i{-j-8B}hZH&OnpnuX*VN{N}>
z5=vI_R6uf|fsjRJ2^Bt)MWXoB(=t2q0W%uI>o;=5rOh>R4*nBPOE8m5a#m06roVXE
z+<M@+l%4iK-=VLWWJ&I(?@%9m&(B&p%O7Giq}n1hq-g&DYP=GhPm*GuMXX!Nx=X+b
zvGF+2q{dHb{d!MoX+>xZF{1xlhJ`{TU;dH_L}U0Q2y~#bRI@?A>}*s@5x;JgvEi4k
z&rY*LjBpzZnbX0>t0Eri`y;>^_*tv=QUYECSg|F=ki<89KB>KdZT=NRL;tlpbTN!I
z*Qds-ngoIZYJ$Xlf30OP{;^~g!@|Z3B+gUZiTWE0A`g}rXCuM$#BAzVx56`&F&@ta
ztbv$*h8NQLKs>$LpWb8s#LltyRCs~o@dPq2C9sK%GfjXzgg6Enxx6J{MCpecI#hUR
zc>F+~{6lea7!uC<CR@RgF_ND>wk4Bd5N!~SuobUj<P^@;O~1u)A?{6{C*&4AuHp4E
z!s<oD5-{y~Y7)RWFJVrJOpTo76J@IzkQwY^gz8;sc+3ewX(`@Np9oh1TQ`4F43{lq
z6x0utUB`)b%t14#OqsQ5bw#N5J2rz}8gaj0HWEmMXWck;X}T0djJL^XaD%Vb(<kzR
zz3EBTx)sq_L&wrj-dZL>QNO{TycgP{b1LdMc)8ul@Y3iQP52HUiiEWvs4VqIgdCe6
zs46S}`^saQXdL5L;#ABrn869fRwgg)k#xvGgH~-YUWOXTV!bn=gq05H@dMw6$A;aj
zD<?@qtl?*2ykZL_Al$+EQFTYlZcA2&Yh2|X(#Ac%UKw6^<IFOAINr_OhMg)pd_9l5
zixN%f4Krs_SVkAc9h1m3bU3Rq`5|4HwIC>`*!;m9wL^~kTg~K949WOGycU@Y-}pf<
zxFqPkwR!1vP<W*Iw5+yrM>`g|cT|TO%c;>uFWIwLEB7sqNAc$hvM|tSIkF{q(eIiV
z1<Tuy;t?rD-vJh;VI%ug)l_>{V)Nj<Xje6IpWK3Odqi}`HSK-W5lb+o=gu!qzs1H+
z9G_U?V?MVGY3L?ap3c&N^KOJ<-n@#43*Wzz@YT^ctaxXX6hDD57vX$_sXL!uwF&h!
z@&)mt4npRFT|8{Cv4`vgDJoiRUQls)+i=eJVA-W_(0NyT*}y-PC&A_f7g7+d-1Ri0
z#t8+z3$Dt8+0{**HD`6>n8+4mt((K3gZk>naBw^fAvI=B&&BlzI2NW#J4=KwHnCV`
zkVupqn#}0Q4?_CJR(y<b$OHQyc85W+F*n+4d^Rxe6(2+8LG~yYWr1}RSC^_h25X#w
z*I>>UCacusFWAt@`7yZ$MBiOGoWkh-N?4s{`?iO(^|`}gqNVhIAcWdW8+Z{bpOU(K
z=3nc701%%c>&#39OpqlxF^>MUC}kj@QCvp;rOo(i5QYu#RFS`6jfOqLq^#O_grk^(
zC@WRr_EQNSOhH6LhasgRfO*5`*N8HbaM&|$<w1&gI%};Kd{-|q2Q<E&NmJi{pbWj&
zh^EpcnBXK3j}5pI%MW5I8XNzEqCcnon{>G;3c076;$J5j8^aKJT^qQgBdzs0IZ*q+
zKM{+jfF|W+KH@WMp(3v$Rvygr=qY3eDrM)J*TMcLSvA?gFPUO|h6R5r2Jw-m$HPbn
zHN6369D~_Fe~KBbxkg?icFfSp4_3B^A${M6A?kcKaR2ABgRQ=zeDl0weDklV$X~2H
zdyV*E2BZrEei@Yqb9_l`;DK2{`~F`Sw%9gsCh{DT;~Tx=eYHD#^r1s<E|K{s-tL?#
z^k<XkNiJ7;KPFM#VTmX#OUz(vk%r8ngA^zTMwLb34igE-rAk{hafjcngjp+9tbx$%
z)KQZ%SSZ}#_!RErPe`cv<<N&Pi;#z4Iz0$Ehi))~fjc(J)A=y#?YKu&9IqbzRroI<
z9G^FM+lxuOnSfp7Q4yR6IVnTg*d16-Dt<N8B(_r2p~E+is65rshhQ^d)>@ToKm$eW
z(*GCIDJjJfv6~e5E7(cL{Ey8ad(nZ20Vpdy1YTs|KHe+=;mmjJLHyD{OUSVIB1IFi
z>jTOFd_r_ks5sI|sZQ-ts63<I|D9J6W06)>KXK>G$8hIlMNWDaA8y@PikmbiEG_~p
zAXOFd<Dopg9&ovVI}8=A`*9V4l`%RMR(zzj22bE%4<ZxV13f3lBMpS384c%BQ*B;t
z5aW?@5D$Lt0Vl~4HZqt#N*Y;@n|fNBZVASP5K5P%qa=mDSrZ8-T+5~!3v=5dM;)>>
z*0m`Di9|mo@>MAU^xy~Os>1FX5yzUD;nq;ewVTvSkO*^kRL4*^*eL2K7l(>r#kefL
z;d3`Qrv>sGu<X)AQQn@Q;e52^>~>)ZrZ-YkmxO50$ByzRB@ujM{J^)e`4c2_SUl_z
z1jEKem~WG#4wvARmL|O>=wPrH@gJn+V<s_qr-#j;pFPdW1c6mNCg(9W)~)mYcqyWx
zBtT5#>|eiWlRJpO90=n>F$pj5+d+x#kHHA4qq>N~%|!TSg!nYWWVkRi(=I_aO6lEJ
z_xfS{^S|2i$j-@8CwWDxg|Vg5zQ6tzyKaB`tzjy1a9*B*X+i)Ar@!PZ@o^$Sxxvah
z62*^?zg&<D3DipUuw_uanL)~(<H!ObjV#zQ3_m7U=umk&=xbx^Kw(W!1(J?E?le+;
z&W_Qgh!loGcnE9=kDoudE*f1=yzBhYzNZco2Z!}Hd-lT=V${)+S`_3*HxjT}tmD(&
zc++7|aBjcx@@;6S5mD32+m8~F3MhRqLO`ZKR*~~+SQOUbwJd5=Aj|qI=I;T@Lo#~m
z3*sV5VLH0jLxWGa1W9Wd!y7RUtvJ7fl?R_d8+{?epy>!j_$E{a-63`KJQi%O`^Bw!
z5#6me@y#ammHL+oO76ieq*Ko0lUOjbs;=kI0OJqry<fpJU?uHbufBm;N%XFnW3VZV
ztA+D}r8z11(IR}JRfL;))bU78Qju$SsC_Yz9;z{A5?rqg<I3B^DqVw;A?y=5=m*av
z2AV<K(V-4?Y-`ORtB-K#ubo&B(N8&({bmrs;49P&drl~fo6`l)6kDSmn~dPyKpkVD
zNzBy-f%V2YyD@(AV#8uTAdKyBFj0UF3_=O4vs$ke`XSCb1mMB7Md8QXc9hibcbc<Z
zqhW0BHJs?qd7V~JNFkEkiDu&lG-IPV+vO8k#L;)O^<PIhGG&Bd(%hiPgg1`(1lA`o
zMHq(D+N6OgCo3tKG+H$wfdrM8esGk(L4bac{dr^p`SUnXF^Q{rV{^8YCtYi~0<bR|
zn2xW^u0gdBv53)U?%)CCL-J%$)udlPL>_>m^<IKk7`R;A!GZkoOs;Rr%y77Y&7nD4
zue+TS%4xHH#z6pV;eh)A24MT%fNH8UK;crwnToejT-=78KzK|tuEbcx4NWrW1?#IA
zUl+HR;1`ZS%J4^Io@xUFFrYWj4C6pAO!f(Y4gZf8DcFz=hdY|nu~Hc}z)#I>C{@un
zkoS2Fy8TRspA<abJ_f|ni%hs~qc9y`om+!Gq$|S&kDc7Xq3?SJ?tm0`v;~_XuWOrH
zPi(?#V-fTzKOz`S%&N5e8%+8kr+*Y`p7V=<&i|_V1JH*@WT)j6rr4?NRRaTuY@Ln3
z>a=x!_DWUg!zClaU@;w!5^9Ff-HX0N<eVnf==%7(gJ;ybhs}ou9)u%P_u{3Lyb2<r
z89%$o)3@gMS^%(4Hw|e(P8)@ogE*7nrG3>TLTZ!1I%A^{IJN2k8#1w{S|U*F?@$|^
zfH1yCF@sKXaRWb?zsoMi;KuuNu(JZ2Z@eHe>Oce`U}B}0(A8N4Vl`yLjTcPcXmM+%
zBIcOnEkll13{eh);ta?DpUZ{7X3k2w#DpCGEI#rN2F5>4rQ9a47W@b-kJE#C7&L`J
zo>K|Ki^pGJ+!YrsN7k`LXl4d!K;mgkb498wxu`gDd{Gcp#+D><ywMMB1y~A!-L)(b
z1XmZZwI?pj-lc!0!}%)%g*YLv3~7M=cm#E5m?iToYbSn?n*4HHL&_bS@qQs>0<mF~
zcv|5P!Hu6Y<$63oUr-eRO*vI`&gn-<;n?@L0(|~}ghBHycBXB}F!I=zOmgVikK-f+
zli8|ToRLs+)JEdWe|;+{jU`?F>akH$N&_MF_r!$}v_r3LlVFNBZyBsp3U`MgB|k;^
zNL52<j16azaL760B7$;bGBXV1q$Y|GInEA?5KZNVsk)q6hZI}EQUsC>qYd;>6|dcS
zKTh<sjLkzCbJ<}en!5JRS!~_#g+R%8oopP>sRQ5GltAIQr#cREf`z(Wsa+9;)J`a$
z*Z8a!HC6rH{1d9vyXP^364In^4Qx0G9dO92*qR9V9(k4B180hoy?|~|*ehFM_Qn4F
z50#y;@;)dNc{ak`fO95F62eIsPfe7NZXMb0q*tdLk8E!gP=XBYR?gIS^aJhkjRpHi
zz_5u%k~6jWC1BrFJSw)Afj24tc^Vc&PlTiv_6U=ONj#-vKV)LhM*>@9RL>50>1kO{
zikZ86+1D5QL!szAP^L%iQR!}xa^50U%RV3p@)_#m+<luVMl(nkZXFA}e(}Z~aPH0i
zF$$P0gi!@ODzwp=mk1u}bK>r@*LlYvbPK$P*z-#ANx*_v4jut-i;d{?TJZU&KSjXK
zSYr&n665<I6?JMnjz-*)(nE!KjeZ(3KgeNZitL^@$#7DG2%@5+R{2rsaZ7m205=sN
z`Wzi&74CQ8Fj2_3p*=*u(f^g5R(yt^b(4Ik57?!=vSn458MHW^b7P4-N10M&{e$Tu
z>ky4nZ)BGiLs(aL`B@zu*Q_2U{^!MVlr8cXp&ul8@T694#9l{;>)z&(?d(i<-+RuX
zKH!+S|3|jj&pvQc8|^V#*se3Omripj`)K<-3VXDylmrj!gaq$}N>WTC<3Jy90Hm3h
z*!F{{qn!T0#U`}k_>l*k0b_tOU`%6)wyloOv4;fQ>P!R?y#*-gNdO1JO{)q0|4Zs6
z0cQ$kkE{!RzGl^PrKqHb7roSG%*$zxC-6{-J^Fl~`JvBOI_UOW=-pmd$nC>T-*IJu
zQnkQJ8$R`5=w_CqksIrXlFw(KzWSy(@a6exsI=Md$)`MoS)|(eix-sQ8#iDf9Q#}1
zWm$tI0@E2b=0i`@$!jH78QT|~C-u&Fqt+kNqe>j}CU)QryDsZ+B4;B<%JZe~(&G`7
zF@0PdN6q0+wR<wto3w-pIo8Bn2`N*Fo1|P9&CiC@dP5-ZjI1Upi^vRCB5}|Jxm?<G
zN8*!$w1oMF^Nk#Dajf((ygIG?q|#iLV*~mV(3U-jc%|7A+0IK;aGWQX)A$079I;vz
z$#wHOgh>~b4Zj2w6P9|7g!CB>vyUicy_Qybx9-*E3D;0tJ{BGgnM#ATHLn8BDo0w!
z$B(>=KJVupjlAux?&s}p`WAZ~YEw$!5YK|vv^hv2X}{dGs)AI`LM~GonU<mSX33>k
zUC8b_V(}}cmxgc?ae6|^nnH@pvaFffezneG)VoHcAS1^~%F+*O@4AmdY+{ZUr>f(U
zz8b~F`VM1?yQUM_c{~kb7^*N;_V@yA6S-1(|I;;lP}EW2AbK^OMpVa<eafa3nV<7O
ze}C~S(simD%FHWQF86BT?kgMR2=&$OBFkKe9&2y5GljNiNI-YfPkA5L*=))lc0Hc(
zaLZJ?QZC{4n<|^2Z>&ylsGA%+R52hZ7<&XeeroqdYp&U`2F4-0copALs2bmxMH5Lo
zYsZW06VbCI%GFRt%6w+HbG&B>dOX)Un}YSMkWDTtW3ZJcoZDbuzPNlV72Ae3m*{17
z9n-rvBV`D)NX~BN+|LXx%Q`Bt4}C7Gq?F6Jqp4{Z-IxqlNwnHhsgd3ui!_|KFh1wx
zWvypD@CKZ@US_L+`iH|2K|*ybI!gWA^WVtr>FG_iL>@V)F++qX!<tPlJ!Re4(Kws0
zPdyAN%p%A!D0lcsaO-CbJ6+1uD|9*uNi#DBnNdAZDa}#~mMiE@C?qS}Gy^=UygT?>
zeBalhqOvSUKk{rYVX@j2$Zud87>BYbwD>sdN>CX&ifilNKo*hqQmIvS{j*Dy%?YD3
zn99OEnWDfZPo8k!vr?xzFR>4vkA59I><t>dU{E4m51&bfi7h)BQViknIvBjLXL?g#
zpQsw05R;^k5-0KLlMC~kwb9zRM#Z_qudR(%dc$BvX0`+QI!6snxsE**DZ|aYwBG(E
z=4bIlpAu(imd*=YhbYyR`9iK9)wkiKMfpiKi_GkB5n{{sa(-7P&Jme7Z0fMSz!$`$
zY?B%&PNQx~aGZcD_A|X;eEi*187f`JG5Ww^Kf~6RRN<KTWmBt7lXl9;F#)3PK0)P@
zbQqywxU9qsBE=xZ**i6|qI@qno8p{k+LdxN9NG7X3EQ&Gx)Y_NL@c!wRsIz}<`a(S
zDgKt%DuO{y=rbuE(&&v7bu;}9=HEx+PUU^-tn9zlT74hhlTAKUjif3U*TzzaOVrF%
z^H}6H)8Ud~$7HF!iOk#*Bb*Hwx<sQ)%ecDDd#K-xUm78ZLXU!6!fP{LkoZxQkKu6g
zh&wqMOK*K1G*_g4QzD@8;%h2Z=$p;MXG=+WZ@6AT`j{!{S!PX@KS8Qw#;8KCRj{vZ
zls}C-5PDdqnH6v8!Pcd;<b{J4J>Ft552iNY+a#szC@8;|&29|YQt10Jc9OLns(oCH
zr}5$kUJ=8<BD$Tn598w^mU7+2@6!#`81PtE3>Btjr7upUhlH0>wFetf3Wp3;soC?X
z^rf^ULt}?ucNPa8*jVYzB63A5856SSLG)1al@T|U3&wK6yOT3U-%$5$Z66e*Piirs
zhV1kxtKgqc7cpPIaVD3mWnp*D$!OvoLVFj0v78~9|F(K<a;<PlkdRg{br&U6s|hEO
zK8W-KN;OMv^o<(zC(AKR)W?CC3Z=?tpOjC??<_d^_r=wci5HX%DI|LvaGRW$n_=o&
ztUPM^V>t}*_1h&W__2=N(<@7G7k<~=NiU2ttlDImgkWvlB}mhj!!fYyp0z3HisUxB
zuqKX0*&q&LT>1y(k~U$gn7Mv|sl7Cd&1jZrCUuVSDfm)9R`THjnM$Hd;wFYUmW+_L
z-udw(-K?+zf`!M=cNr->5JUKZ{Z1@XhEzh)&d&2Tp8NEwug&Ug*F$U&CGOAbDEFc_
zf^AM7hkFwHU&ZYNA4)1PaCgG}o}Lq*yKjaL@2n{I9I}59wDCM<P?k)^yEm2oGhB?a
z%OUM^{UeUL2^yn$*>Xy-{22RFvY<S-hL*R~zHFd6mgjCU)d9yRf%KhE%IG&J_7rt3
zrwCyqW#{#vL!2$TT=A!Yo<YNdY$|V){Q_9gvYv$^rEUammEdk|Ic(|Vh(F!#RNS~s
z<);iQ5qUZ>o0j+GN067cwqcIw(}6j8dKbg&Yntsa487E6s^}_ccSE_os<d1aLCGbT
zSePl)LYI=5RJ2QzBTX&G(>*c3&igv{Sl3iYVZ+u}_fm2+C9?1w&gm)XAx=n1^2pq2
z8gqdmMj;9J<qXaDm(4U7?E{_NU0LZnACx(53p2JNs6Hrbn0px4)qOo(Y|~NV!kT0l
z!~c-J#ceE^9PsJW{w6H2-j61*eDEj3#OdPZoILlX1Vu!_;qXrh!h(>n_HAN9*oZ{(
zxI4Qy<MZKte*<j$Pw$_cw)oM^o#wxNvF<OiC?S(qr|CJ#aDa01)rcBnvY7i}DKlyl
z;m-7!>n^Eg#N*>;0*>d$%@^N5uF8hnrL7IhPE@YS-vUNDCaNXl6JwW81cUo4xLt=9
z8Z6*W-r{@<?~`3VH`FhHmjoK$4;);5VJ9>nT9}9b6zgn~V;7<SGud<(?`*hgTR3S9
zW^PZaIdWR}K71=!+=E-Q`9fUi!~KSL(0~L!?MD%6i#o+E!k-&n0yXM-bbWgc=2=_s
zNX&<_U}A9$9OlY-IH{zW>7)RFZJ3k-M2mmPIc-0%=Ky{_65>@}tn6oNVyC4X$1+yH
z$LOH>HtCoAgGS`Eio{c`p|o3@l82yN5o+y~G`cL&zn5H5>XAqE3WDu1@<N_sqY|_-
zliyfSo7}}l;bTwh`}SCW5TQt?v9Nwk8`z8OLmHC~%IHfLsM%yI^d7b|AJV@%jGgF6
zvYh0R0W->z)fB&av>RRkRQ`%@1f#)^+M>6i!Yb-kwLOUk*}=n|%Y5khl$hMq_+IQS
zV<NU*z*tZcPtWV-SL3@^gx`&VlX&Rhb9+xrh|Z*E;O=4+Ca#HaE#=5&PmVgO?S$DA
zCYyFKz~_3^?DS%)3s(8&i<#cNt9JsvVx5dGy)fA4*UUyc;GeCv_z)+R-B^k`QIX}3
zF8w>-omU&^vWn|@0B#j&6Z4QA>%_E?H9r%1ov7GtPJ@j7?VeC7X(e3~^P(3efW6?*
zsdEYcRJWq)s%4l{Tz}6edNV9zADDIVdO}}*5s6WlWvNL8XIfrO<RE57La+o62UO?p
zr1CVD*XHyyzGRr<VqfgU`jY<Zfj<T}uaoF<OQX+D805LNan|pY<SI6e<%Rm}nFM!{
zf92_qbOWQ9gL<kvDiy3C!`_f#AD=#;zgO_`oh^fz6C=U{jNC=4MG8#*Fhg8raoutB
zLGqWPQtBfR%YnJ+VY*z~XdRubu=#d@KKqfYcTdZmr337qAUvAosS=)s6mQlm>=5wp
zJ8Z>r{WMBI)$p-dt5=}*xHmAQW8i;<KU5s8jUx?&!Cs64)s)->+Nl&k`H6R~IFT{*
z?8MnPe;Ud4Z_#(zC_j=(SQ@ydt4ax<ir`fnNrVf;IBX-~i&_-Z#P#D#5|j$^;eV0Z
zvt{JMH#VG)F-6PG{SrQ<c10U*Fk*%E$}nsA(54R;(f*oHtALpFNp)!&V=n0vmjW`r
z{b3mg@3`^NcsM?`{Dx6ISK69jkNwMbmAPBEfHoF*NL?=nwV{<t`S7Yj8l_P1w}N4I
zd6XA-dJ*p=W_W@_!{L{wSBL7oC<VdQw&9Z(xVgHsC<To_v@fjJ>V@W2Nic%u1vuD%
z`s2nc_6CZZM5U_qnK?$+jdJrxqD3VFN4Y3NigsU36{I2o8`4kCc?P2|DdAG7J3rxD
zTf^^jGVDa@az4`NTFG1G6G17J46}D%EncKkH4S4-N0NmXR0cDzE$L_O7rG!%2IOcM
z=JeD@AjdsRd10c738rM`m`IbW&1NM^A>R28?@nOcWn6fWv1j_G=nNe9Qi<sIxS@W`
zCV*_KLqkyOrGQh?r~DZ+CFi71YOM*`KS#>YCDc2y6rZMPV)+@~@4lEJuKCHSOo#cv
z$W?$jnd$kw&NhV*8q;nwi}H4kY6Q+^KE2f0(C_dWm@@IrxWzc95YA*wfa(<0RAf<7
zCCR_TxvO;vZ5X+xjJ#jjljylPjXXg#WNg)jV;Czv$yKWE;=NNEdxD%(?QN>0k^MQ7
zpqCu+2p4yWo}q-Xn$aX$)r?OL+glx%f>M9v2^hx1p$P+AfEB=O6Z~>(PHI?*0cZoY
zF#JzljjyL)bMeWwhF#H_*u3)kkwZr*xj{~+>D2SOdgRH7^=4{kyUNTBTZ$DUPJPaD
znVM_j6?1CeAXJghPLolU9Ya@~7;#Vg(e5hcFp~J==ca9H)>I0fMlCIJI#er#8e>Nm
z!)T>NzMu2Mvhu}Mi4$TT!#mS+JZheI4}y;2t9|noTi+QZTfca7ED8C1)B95CTZmmM
z8nP;`6YZR|dWoA(XR>z5<i@nr)O~#W%!4tp?*-e@=b91V5j~zDfY!SVZ(ntY$^oAn
zcPmtMqK+h8!pbUI#skN<E`d`*u5~P|GQY|u2vRc))x`bjU78$|R!zRR>OqjMJZLh0
zUU@iB3Qnktt7_mNzrjtyTcQFUclzI=9Ps|<fgik^Y@=5Lo%QsxH!o>0GSYXhI%3Rh
zOO0$<CQ~NW`0rNuf7Q>_!b@xON`Ko&iH5i}K1%IH>H0YONEIk;xrdKqf<xSBE*dT{
zbgUj13+LU5C-s4TuB<tvNldwT9HPyLH+q<oA}>TWW4u_Id_z^|Z^dFtsr_M6tBLDp
zU=>5^;V{zAYA01aW2CFt3_><lN8~jVn42L*RT8Ps%v+c9@f%%jrPxMSp_CFy8_y>e
zk%#n$R>M2^ore^!JeZe(0m+f7kB!U~#g<BS;n!bJ!SkeE`58MPDbkM5MVgqW_%Apb
z(}_tzAy1>F_aC}{R>KSoDasm2eR_j3-aQ*>LXWNy(IW0A4=EXfM7|a#8C7y7>Em(!
zBUokD#<oR|<vEAYLwtvKqTb%XE9i0~ujc7SGSOpK11XedQ2Iaid{X1bx8QeVW699}
z&3b4PkVmpZ8)8xDyeN=>xL}U}R;=HCUXWz;(F}X68wp|k3|0SvfXhB_c|cvnRSw~U
z*Ym@l4AlL{9jpvZGAD5<TR&JY3-FfO>(Zhn?a5S=5f|^?jwE3dTO@x^FB<i>DZDaF
z7GUGjxl83pBi^Cy%rc(9tfzg!a|mn`vh(IjLL3vA;{Fp~N(5p~n4IogMB*j%2$WAc
zsN4**NqkZ_y3!d(-***XnLCMaOz7O{H{I5{ByC)_oc{?gC}ydduUwk%%68Zv->E|<
zywntSQcdHnr<!epKKR%xb$`Lg5q;3UCPi*p)d+dhdhj*w(MpA`G!&H8tF^N?qUH%&
zm5>A)1e^TDgFXXPJNVJ#P}LE4(*&xe2kE|YrqFYg;uyCII`pV~QIN62D}|Kd7f=W2
zEp#wr5+^I=y7EN56<Lq1VPWI43>q6(Z2~TRB!4^!6Z3OSSOuM)+mwpJ+&<-(G=!Zb
z0D9PzSxzgZSGsA|{1if&c&-|$0XhQ<z;^-n_@no)E}51EnwWp^Vm1AFooT6fI9viX
z;uMHW>X|C|PYy=fhMK7~iC^nR&xp8Ket2ZE+EmkZol-ITl*dDQ{QhS1<3i2vy|&A)
z``P2)w{30@R}ab3cMrG64`+SHU0l9XNaG30Gu%ZdtIh5yspVg5{Y_U#Z@|b~Dj$FU
zqNYEuzL_K<x}d%sXb%*&C_iXhHCw$Kc)s8p+WfHUYkV@kYRKJuvU<TGBk5bVz8=7_
z{M1*j61k^sRjo&5^t9vl;uQLTE-QVze{<m<e41rp_WS(i$8-4<#&%t1&yLR0GHG9z
z2jUcI?RZ9TB-Z73wWZ(9I^$wLu_&&eJH{!HIBpA!M(Y=?jWIoV8`JwE6Jrp8oEfqo
zfVrme#@nhvAy=7nzg@!CJKOiCxx%U`Yeos5iD!)IzdBxTC*Dmw7fnfzwu3q^d<1EO
z0-0i0O+@W?hIpoCu5WhiM_e=1%PYo3f-C!l>D?qwSuSS87aT(>LCg<4*(c2#vs`Iy
z@)ZH+cX}!8?qB)h>-h9OSKML94{ltm^)V`3#Z`J|UfDy6n6^XBo|uVr-C~i}lsp>s
zC!5@ip}4x<Da6_AnO1{8X6_%#VK`SIqWokr8qE-&<$|ZokNq95?dR!{swTq+|1|5>
zyXgz}Ri`pZ>8y)dxx8G@>RW+GvqV&<vAQ)q)^@Jru~bGAq30UYkz2caiC(|78?iH?
z<cpb_pJJVt+{zCqjoB$h0+*MMHo6eU>2B|gf8_p3;GA_UzdcXn%y+{x469N2oZBV%
zYc=D1t@=LTcAviZj9>-!mUnV3HvJ1(+|_WBM@?*XyO-a)H=*{_FMd0`cs^fM!#(pY
z=+1;{*sb%E3tFliOVsYBXaCIYc3Z+H`CpR?FBhP%N2D8<7d|W_0%pom!*YFp{C>}u
zd!Mu%6%+h($7fPQ?Y&)EpSNkHa?+luyV9}4T)A#v2>1Iw=g}`=dS;1FD(kx^-yh|&
z%@SMc^_Eml-XofHj-<e9p%W58V-Xyr4cz?iFWQ{$nMzw0_g*nOpWrC!f2#kbcj%p)
zF|rd^-B=eDa7w|nRkx~f8d4me>}XuZb(9`Q+m%r2{}MGn7v=c!;CEqL*G6b)^S<g%
zu0{Q@JC~qu^zmJT2S)Vgv{y~~ypi!;T@TSFU|VW>EEHNJ*V)u4m0cC4aDNw2)q-Mt
z`%7$+>dc|eUHHjjmN?3R%YpZ|O#%~i(Pnp_Y{p8}Lw?t(`C(@}hPv&rn~?$JXb&-{
z=Fe~OAAex=9L$t_5doM>>K|0~JaZ^XcOBBOHCCBjch1LjrB}-0y3s%MTe`OisfLEM
zYnI7Qm)SioV-h(m-Rd$O@E9lB9}d%P$<LKc?PDaNuE6wE$nCVu`#J1sL2CcIQ}g<z
zlaG~gFBtLzo<^{l>Ab%;ru{x(=_P$be#F&u30}<J%>y5Z*((_|?3s3zzQv1x7|w0Z
za1O>fq6n~_D%#D`yD%hCwadVa_O~R?BH0Ec&9WJ}5{;7SkfPKnMekDlyU)K{GSi3?
z5&M(*;0ucl>1UB2BUhO1;Rfo<6Zk)0$I8j{LT9LSqo2``4$~*Rq%>`&(8t&wJIcL#
z<Dk~g%<Azb0rU3b!LirMSDps*`PAe|k{0sBO1`&e?BY+LsBvZyTzyWHz1L;Z5quO|
z?>xCMdEg9ZTfO9?wxhM~6pZcatuvi@R*sIP{4Zx+#rNvKPY6|&n439?Yzw*DvHCVt
z=SB+I&_5S)GG8!%o@bNd{7GJYwp;3-=Iu{H)~DAsl3!VH)CVPgblPV{P<l6B`&u@J
z;HGrUgP#)$9WC>sA;a~#v$sZGkLD>5ZNfWo-0-p{@^3<&oo*0knpFxb{QgUQYvroD
z@KR6zYJ_!2Uu|8C<Bk1QvhB_YzwGq2(G|>2@=JTAR{+DO$7R>dp=AP!yTlj*Us=v%
zH#<89gRbwoB_MP@MrJ|OLh8TL5;BY8nm!7kGR&i}MA(@&W9ClBay5L)Gy0gT+NlwN
zEPx{IwSSv$7@#j_pN%E3G!Gt04B|GeCjpkgM9R*oRMA+5n(UOfLF}u<_NLydO|bdE
zK=Ws}U-s|VPI3y{#jFHo4Mr1xZ@pZ{AG9iaO4?E!w7yN)807vte<h{q1KihxK!9}8
z<9@yEK-%JWN4D?kL##LBZ`*ECi5D?n?<G{foWd_Zs~%?1Zk-MHHu91Ag$`}RvB=^K
z%&Wk~&ts&wIp4FKq>CKBd65oD=V*&Rm(7bc|8;#&g8k;pYme|}MeX*h-KfK#YOPdn
zEC8y0X>H@fIbo<^{9Oi>0{3HwKlJ@=rw@U@7vy^2n+?j(x_=}qz3z%LJNWe@(Mp^*
zRr<@rx#h^|^ZjJb;ISmSvwQ~Ri}L&F&<~-uXe{=P1w0cLEk9=cmx|AJZ=O5F9e+BI
zUg**j3hjv>8alb33&~tQIjm~-PD~&BHS*wj&rwMJ*7RXtdf8&{`3K8%Ep|x<>E|n7
z${RxuEC!<cQhQDNZ)`u-7O-<UqJ?^aw$e=76Kwd1-}t$Y_QY`Q+|9BW6g<oReHF*1
zu%F0$V3)&lpU|W{*_!dCZZzgCu1SG^u5eGXNuBBNCTslHYv_`7gj|J>r%TJ?w@#by
z{arzaudb{t79M^p;N?l>kfa>NTi-eUGT9uPexFxve(NAPA97ZFFmkM(+z7qZqmIhO
zXXBE5`8K8+V(Q02+Bu#i|KYNJb=3mT<LQ@Gmev*)_-EF5mTUrkVP->=w0yA~fWW_o
zH1z}-^Vq2Q9Q$xvPa3u3z=)J@tJQbv2xY+_b9c0d_X*!K7VfSmC{G9NR_k@Rvp}Ug
z)TUqN>(>sCq@7tri>&FU>+OqtRWw}810)urqwtEqAfTn^%jLJdTzgzZSmN2f$|*Yy
z=?Pyi-8Z0B>pnwDBL1(PH>slxPVLaVGweDJS_@pV4^YLqeJY6-dg*FIqkBy!hSk*z
z4o>?UBPuRUIp)%|h+CN6rG!|CO5;egkbX*^;D4SPZt{oEFP~?Fu|M_k%nusjb1a%v
zy_<9FO!bYwIg!^WLfsxYi|tvwMx^7WCFSH3+cMlAalHeYT#89uUyT=DhO^~0(>%5{
zYO^PmXk)=_(^$}ZeVxYae)9Bwy5q6H4r4+BGNtD0%sPdF<|k|Wg2<A7LZ&l5+^a`J
zUMp{0I?hms_Z5r+HD6cB_N;8@4m=PpM};a9K_u91rQ_j+8VQbarS6#XmxZd^l4}i7
z>TC_f!F-xB3jU9Lwp%^hnsxIPiGEOiaJ{GL6*36@fyVP>yVsiya9}$B3s$0)a3)UI
z5Y>A}#{FY(TT3xGnd&H`w}G9Q`@^dPx!{%~tMq4r$g5&cOH2=t(Ru=XIQQh}bmRw#
zIg!#;3kZ;8SV5>~MGfgT!rR7p<Rp7@!TOrC7H;C0Q8Qkn1BlU;AozO>hk5yB?-UNc
zXPK<8JYHgzIiYyhekb!kl_SBJeu_E1Iu@82hakcfw{k@HP$1$y!&uDmusY_*f_3T&
z&7t8E(stfp+lniMvU&}QnX_uoq0K)cmo;v!rHp^5eFvG;ughm5#MDQ?KGor4)2Fjs
zSoq_S1#^&1i+uTj1!;9cIVZIJo#}&EWhG1gatZg!Z;z2}I<^`jxVNI}IC>Ix&`aY~
zvyNhFMW0#+BOP9*lc3^+xA}-#A3ldc4!F!|IXO#WQ<Mme>HDs2ZfD%AgIN+NQEu$e
zD3HRRb-;*L;S}BMcG|VyV(UiU=|Y(AGHP|Ui-NNAUKlvCBFyt;ifpLB{AwQ&8}3Jx
zXzzvhV<eH+qN2v*dV4Twse%&jlTv?*511eN9)|cMe-PTU4im=za9@|nqmq;R6eJr_
z>r<0zvFFQJnVylDQJ{$#2*PocLK#z~Nlg9<3BImy=pUrZ=u&2|Wg!bWLU$a%z2kcI
zo#l?Ww}(#UcDpyfWPw}R;<Ty)gU>}^>M6GW6Djk9dmX_z0!3;^!h&t<PY&;MUKzp<
zmERdDx?S;dRw$OdvO}0v<`93AudkJ#&Q=m9f3BPM;yaJpduE$wFIOr9^vaPdIHwtJ
zZR0C@x%CB@%81kx-j8nS|6uq19A6>EWn}rx%z1zG5<wCdF}H^k&&JNM6VA2qRaE;7
zB^|<SD5gY4W}zkm8}k{7?f2;WP`+bhmTD8H{{hnE?ph&hlqHnON*>tcZSgSW<s-U6
zrCvg8_j&r#_6*bEd9-xqm&6pk4|P<tdWQDTkeaM>T}+YnO@G=N*-N}pc!x4Ell=aj
znjGS2y|?c@nm5)G#4nwriCoTS4$i2z*z_)Oc}@{;-=`6Q6W=<=mDo{F#SC3$_)wF_
z9Q2~Su04Udmf+^11uSf13+2k<GBh4FWUPz*`H^5N2Tfe}YG(pn)y_~XimZzHm67|>
zQoMm72vc0mIFnKs3xab5(PcYKWbdGQv%+74ELB?i(&39^xr3Q}(P+kZZFQ^B3*vEq
za{a*Q%sNEAHrW~x3pcZ6-vDb`)pBk|7IpeNxX#IhMm`alRZ?r%XG5ftx6R)jn!jYC
zROUBtmFSqzt;$m>gyuAPE;=_mRCbHnYTRS79eImcQu!{nURqu>J;v7H*gCUgdb~Ka
z7Zy%YfC~cAmTxjGR#WljWY$Fi^T+lL=0v#jmUDC8<>!wLiAqYHIa#<pC~v`v{@RbR
zMc?|Gf}SkW2fNiyo*65Xe>Nxja3y++3VhF>x!74c<RfP3E@PX!_evJ{2W|@OyDiOf
zgfaB!Z9%V)WJ;~cP0HIJ9$pe137Q+@!;AUk2;NCT@JDa9c&qiG>dmF^j0qS%D|>B=
zQ2`US;Zt0NFN(h}#HA#DACHx8T1bo&)cQpAt*x24nbKZEUZ|L#p>5h|gAU8}l3T2c
zO2VoB`YtU#sB(m{nZUvGw-_Cw=PgCpzB|=AVZ#+JK~7&kS{K=&#bIA^On^Z-ZjPnw
zQ&O^&SK3vgDunD-)kzzk^>02C;A)JA;AwD%_-X*Gdm;`r<_>G<$t!E9tovV}3azg{
zVl5lO1Y!ob`u*%Zya>6qJ}ROF7AhjF2y1AD<^Lt4<rVf~ejZ^3jp~j;XC_TXV+NR&
zL;xSYKTi(~F1Pfd{1<=;yzRmn-~}i{*#MRIKU#k%6WIV|0{mAX57q!^N<^NRN<0a#
zI5jdhvH|9j23(Jd2w(=s0o?rl2*Q6L9WPQ4Gr+q2k5IG}3e`du=hXmcxXb|W_pdfP
zI)jC9@_&WoAa<;@21uPl{S@(50Jkms4{bS46diT>pUiYnh!}=-2D^Ixk@u#UT)z3&
zeDIk|ow#r|q$rkis2@axAb$l^0%QmCY#Vmw1*=7kCgEmTL%S*Jie&&)*~^zit*2jR
z=d(r(G#kV=8W2JT%#q#@^OFKsk=m-I#{;}z+{Xbb0NcJ_#Ai4~&)6v&G?+sp1b7BL
zL7dlHLpM;lRu8j5DL8do9W~h?Pu*o)6uaz2L$B(d{(1R{7pzHf;6>;b2H<JxiKwy6
zOAYMdva@!oj%{&~yuFAq{^&S@R=6@&5wV8G1#4$arWkW{srAifHt6BGB<K`o4PD%`
z^{S1=VrG6~_NO(@G4pyu{b3R|`6o0({qEn>rAl@&nFoE1)NzPVX{I(gOz$|{`Wtj!
zpvD(L4+#VONY%mzcrP_YI9l%*cR$W(MGiD=h@r%SA=n1R4SR7<l^{=!S&!Wz*dEtk
z08Ua{zKMTK4-{L~(W)ETu8_-VZ%5AS0}4bLQyEOaVBO(A=7nH$K)fGaqmylV8JqXu
zV?3sC=%VDd%fYwId^oqIoIEVVCEe64kQo>y6Jg!4SS!p9wL%{F@CC*3T{yHcE-BuT
zrI0*4UQt+-os`;02YhIMurD(Z#7jkzu<l{N`RKC_7`YBaEd~3%pp9GVZUC>qdAd^p
zVa+UtKD)#w?YnWm_cDd=yCYc_?IOblOnEIeUdu}2(0vU@Wa}ftuG_1+A>*2l2cZw<
zRG=!)ui;>m$0bFNU~`T1VF-veHmGeJv4h4-dO)adg**s7!nEX^7e*cYDf^vbSmqCI
zgJj*AX$YXpNP|I|og%!33oefbon6XHaba`ktKo=NWe~3tDF2uNn0E{U^QWS`hL@bE
zgEj@}!J5YV-H^jzAdv@1IAT#A6$B8M!VrrIr~tL^5o{ozrW;_hTOLMyb2tb^0+^Cv
zz@ya-S)_XeYq?idM+KOcBD}yu3IgcQ0HYnla-2=Z_7Ut^JnCOvHMZ{T27}3fpdwU&
zIS9A{TLM&m-ub4_;fNcUhSmJ2QYI?En*48DfHDZMF~bqj@46wgGf0E`{J8)4Qw#^Y
zp<7dQ0}nVL1+pj=;oV7BM*?{J5w&E1V-PREE(9q0=zm!ByukDPFNXeyoY@VL&<{ss
z+5kzgcSBC!W5OD>$$0^`Jm`fgCd~WT!xHX43i#dX2Dnj{05=NNdO{x;*2stoa5a(t
z(Cc{tb`>DxvxGCT@y>UQg(G&@9uHEaqFQsQh9fS_kYRJ>C?l9NSrnR*ymmD&!V#~#
zoh-W{K#1EQ8F(br;`e`Zr#jQ^X`duLH};OcLaz~wqij@)%J#PSeSfJv>Oe$#U$Ee(
zLw~wKMpc+|#h6stbkkJJ=0^sQsJ6<mQO5H$Z~97tlM+$*OiA_k-2_O$N;4|f3{qw3
zVEfyXFJkgSavzxl<)ri$bGWY$l?lxzGRO#hhMRU3<D+|r->%UTIICI_zeVFCbu_cR
zPZ4<67w2B83~x{e_yQ3}rOu#S^AUU2M2ebjP;H87U&NaXr6sA2H7ka;=ocz!YH~A=
zZO8XiIY!dbm6j;@3`Eh1cs@0<n}<1n7Q&ngV^!Bya}+laD<45KRLjP9E^XFra&e$v
zU&?-Etjfj#qmsu4FRFbjE8(r15M|OgDeuCy`OJ_GoknGSsbk}trj~I)kd)>N50D*b
zKr*aziGhpPxf-=p!v*3nG6S{@zjCl778j?3v&O^q)HqoZsl>h1xuMF!rS3+C6yCQY
z0lkLtCAOv2-!n^;AgWDpfq4#zt+1FN#%Xj_M}w)p=rWe-ucmqwakC{I(@ch6xj7QK
zdm09&wZ(9jfNzd%JYR|mJ1Yq3tB7#m;4oi4E+=No?Vq<RE$UH;NIXxSHP@qf^O5tt
zyRJ>W;d4q2VL5gPm1^s$M}m=+Dfh^di9Q0YK|?69pG)-Cl6AyGw2I8R{iSD#^2g@G
zA*|=jr71OR;l73KjHLL>na-4ZL!2gh)A&3FK2xkXT6CTsv+xIdL(=C+gx^tyIV-K<
ztd(;~cT{XPS}_pE*%E=9a3nH;gl|gqF&;Cl3b^zN(#A{k0RQ6cW5=(rM5lx`=1T-x
ztPTrVwr|kwGz}@@YJEN0kygNY8yAf!53|R4&fit_)>MkLzA*an+A@`?4<j>Zvx>W>
zx<t2S85oavjl7p`5=7K{*;s@IE(&m?iTVh<F?bH$ujAkooL5aLX7K)~s_i290pVeU
zqcW7pF{<=5&J|xxLD{%vuOwB+KQ)%MwLjj3B?B!NMHm$oiSmh=A!rDvlJM#sFZjcD
z`i8TrBZOYz|FHKKP;srv+AtOflHl$h+&#Fvy9al73j_)7Zo%CN1b26Lx8T7Yz9#2n
zCUbM{%$fPu%zwYTzAT{AyLa#2&#ro_YFAZ1)e{>Q6SUcvYXjRGbP~u|SBz!H#<DOf
z@YZKMa+3u9C?!_N4=JVND+&d?kcS)wU*}n7k_4@Uqh%ImG3U9ywzpfhx~YbpM5Jht
z6ZssYT<x6MR}m$!69j`~lA6zyjtP!Po{~@<Dg-Gpp49_@noxNOOFNe?1q-Hrj*T!>
zEBrn;2{@oLKF4N0f*<M#dYIY9XfywAs%AUFM?97Zp^7ChGA0(t$bc}`D{s>W=04nH
zs4E63WlgVuKL$a_CtV|3N_gG?vo5Ujb8AItlEi8;QhZLmxEw~_tHy=$z6vX;Lhho9
zm)^lyWa@fyXkGFu-MSNdFKqDJbtcR2w<?|35!~d~Wvj+JD|78(nqf>yiFYvaSw09l
z(7F@YzYNwY#Vt6YYRjG7XvOK2CKe7Y+%-nhQ1BoICnO6g+)cu#X33R<BwS#mzap-G
zj|c_1xyh!V6KK#1*>z0V&nyXJMs9F@D6DF-7{RC9>OUq3$0vs|eTTU8UeYqV_0!-h
zOn)XRTDC_5N6siSqC3Xj?9JBO5=B)Ldr1G6=C1roH6}5%F^EvN#6ngzCZ?)qjQD4+
zH_CDtMlgE`(A5Np-q_SLkqVM|)(YNAL=R*Xl8myn<PCyU6rmJyAHBULhxcNZ`#gx^
zm|O~9WUbpLrY@MHv|526j1xO&KBL~otcX1(sv=JIhv9SnD$-y|LlF|hATZRS^r;J=
z=T@FjLQ^v<I3>L*(*hw%vy2p>IJ~jiosI>C{+7si=rg*GkqRAZG$<Y~YPw-*rewgp
zJYNt(+)yT75659K5AqNgeeI2Je0ANo#WlX#50zyOaWtwU9)=-K`02yB*U87tY>)Pb
z%CP%4q*uX^n3buEG}G1a3v&-qG1K+h7-d5;^L-CdvD-)SQuqPwQb+OG<$IpH{UhPo
zes^7B^7=V-cR4Ne4sWOyX+UKCW(<2#j!-KGJ+RSjY<L)tp<@?OSRMm3*4*Lk(xP4G
zGW-D@M{Uj}Ghbll+Hd4EI8{w-c_`CxKx&q*(V^^ig0(zQQBFUWuxo!y1coG1%x2dH
zt2|Su^lxv!@;MP3)l^Ej19Z8Dc*kQh!FioES+^Ek_pTCRt_0&b&T5dEOkIIo3ekrq
ztMQv0xzWk9P`pacyVeTQP2CrjkvnTzt;A;_D|kXnIdTIABgUpAE4kGITOE?Jc)iti
zWotVZ;u=Mmd85T1KNK<KLG#7@rQQ%Tl?Dw0IZsA;N|f)uAasU-pW;j~kl%X~QQb9H
z_y&C`Y9bx`BiCISUs>*&Qq+P6K%vyS$sRQ()VFTO&rFGIHUb$r4HX4Ekhjl)us762
z3#jWQ&X5@NNbq3YH@q-4lr+>Zt?O4&?S&hD4jJrl0qu(?<11wB`gT3RI0+NCAK~S&
zXdX&}2u;?U=~mg;%-5WEy`M4|?Hb`&Z4_Ptiq2uEg<Epugbh6FniOHEO=+NwOnH%O
z&a$wsu7!RZezw;F#O&|rOx0PnX3ro8q>|K9dWTL3w%|5oxRJI!<J)5uyrcrUj!&bC
zts5Gr%b)**t~s`?tC8n2?L}BO&`vA(TR-(h3@uTYk?DWY)RKR)Za}h9wViC=_nG`4
zbCZ3kTnmk<*aYc5NvQI6&(XzV2&#6#j6&a`p|Kh#>tom9>G-;|K(vdd*1SpK%vO%a
z;ig01E42*=7lVOPt$5|)58b;21~01i35BVg>Kor(`%dIsrNK<X=lODIYE|qOP8cAZ
zQ)R;W1NwMQgX+|psLN)%iM;D3wvhQ#fZjONHyU2Z>y@)=5>Zlmb>pS?r&5=t7^`Ft
z?!L2($i6HREs=^>mNLU6aj>axv}f*=)RRKJ49lC;mR3cMnX}Ftpx9aN-aYLnOI`=s
zws+Z;XsLLOR2E#{CLXG{-Y#^j-9)_<n#o`Dt-jYh6jLz3{K&xtGN_G!p)43oGshdZ
zzb9d)3nVC-T@`R<I%-(lOZ%r#K0+rrXZ4Nmrz2qxrQ%T|cnM4vcTS;TMC}`HG_*iC
z14Zc52gKnhVy~9~n?EhwY9u8*MRF;n4^XIFKybxxvX9?toYKDS+iw~nrYO0pBZ^H&
zG~Ti6gW|Z+upNKt^+Ih6!8f|d5R*-lD~XI@^Y%-TVeh4l^3caa#L^`E1v2?=K#Rlk
zhKR!~z=oL~O(w#cX~l_a0D?5K3DQ}ZxkbeeTk6kYQP_b7I*Ig47;G`qUrdw0lHt|&
zi%byA{mnzG{1*@x6}k1HjU-RLEiB?xG7V!ul8Rhr<4|;|5*n2SWTo-z9u-%J1OhBW
zab&T7Lr+0?wxUre7U;(!GVrwsszxSf#_2iU5`1-MGoog80<Hk_wG#MXuFO$z^Uzvw
z`}j<Sx*DeCB<eQ9H=(a76X_8+#Ph`qNtMH-w@ct(vKJ7ARtfQRP3b<3u_zh6I+?Ol
zQ*)SpZy?7;GRNpNEi#HJix<`f%T1rcgvQ6zYTt`8oJ5^w)r+!>0Z9-Z@dOX}T$Ri5
zsuIzdNMcUD&j4ehPFT}-K)e5ZZU82cx1<3ju4WNJFV!t0iTW224L(wUcS(rZIlYP(
zner`@9UzbeCrV$RQk7jR+2xA>tWfnVXj=h>(AmOT@V6P=@vrAt1!CW#`v93S0*<-J
zR4HJXhtbm?MKaOT3G3%qn=1~5UZH3u*>@!JTK!_W7b<a5NIDAMh?%zlaLl4e@%-@3
z8t`%RW(>P}{T!Dlm}L;MPci0d7gI=i;&On8{0U-;Vw)+r@p#Q!LgC!AsSu>>TT?C5
zInop+LX(mzp?dDQ@F~>}A7%K>jOyM<vclL{z*WfX?b9t9#z<rkRh?3tFwhQFLutnN
zBq9nwAQp^-4lG6Z==^5LgNY#)zL%|ERTXe1qoztz`YcM?Lj<T_4B{jAiXvAN$T%vB
zs&Wsc=V}6FOUUTu=5@r>9%E3dk<fKXUlC75hE{=BmxABH3YFMQBPA11gz5>2Mv|dO
zSUz7K>!xW}G|w!9-c{z-i8zhBqAKtie<I+mdO8ICURC%ahB?~L`NX{2)DaR&xR~0K
zqH29&09C61e5C-|C=?B~@1g!=N(2a`uDZ?>M*CH>Ir$~gVOqCxRzPVnZAIRcqP&!I
zpJj0=hN#_O(pqQ{_&zQo30X<KslGSS=G&RbNZbw_2^~FKZ*vW%7ZAN-(8{63MRqk7
zQ;LMZGIMoGXL|ZfVUy8gws1FeN;t%G)zk8XT`Ex>N}NoK(dr+-g>h9mnbz~Y^o`Un
zChhA;0gk1BjLAMqvZyo(<P8Z&<;B!X_r#b>NbR9~lk{&>Fmqf-Md+0-o)R|o`pG1~
zGD)}-H>UpWQa5y>&##97+^sT${#|~VVef^FHehLPG$#QsuyJ?a+ThliN=c#0KjAO*
z?MB5oI@S~Ae7>QRGoqw8+_VSuYvhUBSR48=NyT@&0XpIK?05U4U(GG<wj)rWQ}pHT
z?#c{AJD6?CsIh9+zi{bAk&3sx!L3jPyz2we!gL&|StWS@z<|o#<Efe)+0${0$cZoR
zj_2jQZlOz|?s^r4q?Ahi=^A+iTeL_jo_Y+>NFx=`RpJbIKz(I(zOfcT;Z99#M9g9u
zI+7zkf+JGI70-q4-iyK&Pkr(HfXWrsKLb#bM^gv-cU~e0y=0=%_!Gn@;auaX)Ok_B
z{LHdCB~sJKZ_q0JCUR0Jt1JMBwB2;yP^C~OFv`Bt?cCx*ecalkbuxmfBhr@)j;`pQ
zPU7QdzN$Na!2pOH?C=v}6AtvbmC~V|amnySLyNiMUx%W5=wH-DARGxF;zwh}er8ow
zmNx)^N1dC?d_NE|luDg~!)kAI2f&D-g-QoYXu>26ek@ut#G?QF@y<rQ+ghlhRw4A<
z&-NYel{k=woN)X8I5I5;By^tmF*-(NAEc~lG~FxH`-g~uC29|RKyOilX1P&Pr93~g
zQ^UDnsgU`aV+Da;Um~_8s-Ae5T6V9!31Wv;3}R+13FwxiJ)pHZ=_ztvpRnc|z`0g`
zYiONDa={dfwk9c&R1)Hti;N`7@*Y=r#OFmUVOAa6%m(a&S7T!uGA|^pVnIJwoQbd+
zAxUXgA}Itzuh<<$j;>0t;~^Q_k->P@C}ZgF_8tot*G6$fT)EYz7p1F@9koAVs7l|u
zPmXB7LzcfrB)ZKtGse7<&@4#;)1MY;nnTOB5~D|4S<W7>*rw6Cz0WdBGj33fPfhBs
zXeFm?ko{iBsWAp{>sX#dOgPr&trG4FIQo7Wx^19hNo=2rpDEiCeliyHXU0T<2;qxy
zd_ZtfFV{v`^G0%dYZ@Ru9f6>WbHxZiKOr$8`YQz#a!-H!<l<npq!0z+#W2ysGF5uY
zo<$x&SCJ~c$Hm^5uuvS#fQvkZTQ7<#z2@p_1eI7cvaoceMI>>DCwW+!zVtL@;`&!n
zv7%B`DfiErxA_)RS3c@lC?J4NwK$k*#F<`Vy5J<@4<#gQY;!e&aWE8TSZ%;r;l<6U
zb7q<JR}oZ|`eIx0n_mRuLJnkhW3z9yph^>vQC#R>bOAbDueFFMX`qlo*oib~?2Qrg
zxb&fC1}MH9u}P9FR9ZJRf~a~$kVFGQVy#o-dk7PHRg<QD;sSyl#>C?C=>44d%L^86
z5Rd!b3+mGq+?FQK2kDijr^9s5r<E0tvz{xj`?aeTud6&<&nJsIe(;6}3@;5wrL>xr
z%Y7fkZmPt)ixNtl#pZmeG3{o;E}`^EuX63O<1fcsOAWi8sinuBP9A5d+L|j5Yqwr2
zyG`Wj?rc+N`CHussD|Lm>;+!&@hyT*$CEl<SBE^qEqJ>|fPJ~jtR7E`_FJ@89!x2t
z4}In0XKEF;cH(-{iOzOZ6P-~u$ziBun~zQ5y>uC#$Z4XW>L*<{6*{n!xdk`1cLbx*
z%B}126xwb?37$x3PKP~ol?jL-ojr}ecAz>QSS{Pr)1)h@MG(q#6~!UUi8r1b8ED<s
zjeR|^YTnb=;wC;5?Y{57b@B*))y4EOXEPZ(RsL)f_07J!$61yB3>WW7zRgV+!pg_S
z69k@`7x-ORR9<2O3Afp@*~$KW6r!pFr~~e2&_rPiLbI431e-?E$C**am6J$BP-j>@
z@lHo7G|E1~YHZ$KuWo5qYLHGvRFfQ0-j+i{9Eyg9=c?H8uL&;@vdHMyq_@Nz3Rjm|
zjgrR0Qo|vd)ys0t?I-r9n~vp}=NHUX+u`hBu}-1P4LJA$QPNZ0)>VIu&9sU*xYR}d
zR&Ppet^OvMhOW$_PYZ*{jr9{5fhy%i4eL!_1@bIxms0r!TvHV1{h_@Fzq*0&6QGO8
zs`ffmGv6I)$W7ielK75_qz6+~cm-#MrR7>ozv)Ju@JUk@b$PEWo~8D5U)Y(!AJm#G
zz(5tgP9&nU1VPWjj8?IS{_vh-U+qpfxuMc}Ool7D%u8IVHMz_#8N})49jB}sEmuCw
zr65GI>mB{9S;hEfdhSkq=}v|3_;xfH`$`>K0TgmFgDWtHHRGa{<<;Q&4H3qBzR|BQ
zS@x|{E5XOUU`-Y3E_jBOoX45OGQ9$he_wEn`|-Z<wmFf0NR_Nz8ox1|3Eb8&`!VU}
zI?kDiuEe2D_Hc!!@Sf~NE)s|nFxClt4RcD;&UDbwu>P5KGkkM>>hch|vysIh<FOIF
zQ(`~6Q<+sl{bxyyd#4udM~EB%#ot|rA}sUDeG)P_Ui?C<ljr)81^I*oUv%=^o0H3Z
zt1nw}DEQ;}Z+Hhju%p^LDj=X5*%G7e)1e~n4aH}URH}K51i?g+LL^-&x1dy)x*pno
zvCe2c)qk-`{<`m-EYD&5arK4!VqRY7{1E>qNljo?C>3A>>Btv8E060G)Ew~_<-3SV
zI!72n>GfJN2oQN13k9l#wbGoPVhibVq)&ZI#@rc)l>}8VYG!5&WX{*yG>_Xw;!|M7
z+CI%}WaFl#pY<dRI#Ekg;!IPqY?OyNB-Z$otB33tt~f1{M1&&p;y7~LP(4?fj{=P-
z%kkfwQj-w~T&s?LDG)qrn2a9?b{2p{mBooA(Rd7r!_l0dfh)+9z%PR9H(G!o6Kxrt
zhc9ERJy_Up(e%ozFU)h3y$s*a(~r(fcMFLunWN{Zx+N<bGFPW6sx()kr$pskXF%JO
zv_pnSmx1_*3H2r@JBz@k*!EcK95HKryg#33&GtOE0a&ljKz!Cb;Vao{ie~x9CrH1$
z`&M!^UwU$(UH=@eEo`+F{S7%lQ%xu7T=`^pSZzvj94<;qzDDBpwH*xf07*RTO+Jv1
zbq~#l=6)|`*${RM^9=n;a|}=-Y|HsO58-h@4)+LRcYAC_i<|Og<4a0|JT7%?q>N9H
z^4Kw91a@0;*9T<#!`qSuFole`h))DshvB_lcof=SY1nEhIEic*@nUiBdX-MoY2|0z
zI}<(!uJg+&AF>-HpjTQCNWe!#i<<@0FWgRz^?xc~vJFa2;spj~1LE0x+1t?hLWC^^
zjqIp1mzj_Su1mp2j6*Rc7yEwIF#8hVh!izo#2-k!NRrk!PI)&@K|fSbXr3FPp@xB)
zmn@avZ#;o=R^crEN{#!>rTT-i7C;TF@1O@@mwjWQt0ygwr(|GjXJTaupl+pR#?yGi
z=U`&44`5RjrekK(Aj6Y&(6zO9#RHJYey7oOwg=GE@&jmk?E!BQ0I>Jsy%Auir=_Q(
zqorr11^l6<Q=_FN1^fW^leE(R2fO^0RQH+T_4y3dX+-(?`E>0J0O#<XCsy+Z?d~)0
z>kkaS(e6GI)BeP{t6=M3@caT4&GY-ev)0n^KL6vV;McX(*R|DsX5_WB7cj8Xvo*1{
zx3UHB`~nUNAemLLQnWOArXj|o|DASL$i&vpp5Iv47LS1ez!j_e`x8cbC>muGeS2d&
z0JSa?BOBld^mq(Rw0Mln^mvS{%y>);40w!eba+gF?Tjo8&#!Afa~uE89sEpK`>%lz
z`~iZB&&pi?S0v9D@Nbd)PL%uKAz}L&34lhJft3kx)@*=Zrtb*ozoTQtV_*g#Vg(=t
zu<$YgkTL=2e4if~0qnaBObp+D{|n0RguwqAOyPe3^LNJIzY2`uci!Ls4kkc)e-Rin
z6Dt79GfaS>0Mx_Z1;xgU$IQlp$IQ%x$IJ+LjUE7#h4E*heJ3^k*Wif$1&*k_uDOZc
zFLGn}ovQdJK)(|f{~16G-?a?@1R#fJd9W}7<Oujb6Eov)j{sRQva;f_(E=!<|Eh(6
zt^cYTivIxgE0W*Hm48}G48Id@|943KKdz<koYMcASR{X-_~XUI@QYbs_y=a;-?^Bc
zuj0Q3Q~Dpk{KaMfNZP+!k^fF&jQ`|fVg{^ECIIU;6Fu|a=->18{nsGL{sq#1x1fHp
z5P&uDcR?}z1nS3vqNfE|2wKMP3+l(>`ZEjbXGi;+llqk#{yPi%@1Kds3`HYiU}9tp
z;C2V_l>><CZLO`$pZ%&mz?=OIFZ=Jpl9$xAGXpI4-!So+*xSh%*z#LhSX)^>lhp&L
z%zqH>191HVqJ3aJomP<bw?*r1FMHpMfYw23P4R%~uOhTf`WZ3;Et(*Z{TfPq=VJd`
z+5MX!{%!~V9z>Qu2T@}Ec4(ax1U>>d61;K<kpvh%!do0n9yMtUXgvo(^$$S)8f^TV
zK>ltt{~pLcDaU?N^?1ojU;;c)f>(y1b$GlQ1PS${A>cqtK1h;(0P)vQ<lhAGcQ5ew
zAOdv%&!{R%a)JaM!;9|_^`)60@bN(7U*=LXW&xYn!0!Hv@%v1G|A?;s?lAry#y=^>
ze{wl~A9e5_f%v-z{Cg1pUCZ(N42S;+$ls0Z-vjw4<@ir7$M2&l{v!~7ckzD@;=gM-
z{v63j;Q=CZ-sd?mme1~vM!>+)M9)A@nC~wRjGmbe5F!2}yO0{^q#!?s5-`%l(lm-d
zfJc0e;R=a2&LM&fCL1XFUIxV9m&8%vg%A=vnUcOjE3j|?fu0}|^ar1JGmxinGTm*m
z0)kshA!d-rB}`|_Ne?}$D|0)wGjm4A9y9eoGWR}U?e;3b?-?^xkdF4-G!PI6I<G*`
z;DBGN0Jp9g8iEO53xG`CH*qE==8JS!T^>=t*Kb&$Y>^?cn||_&Bw*nu0tbQ;oML|K
zNrJis5+h&Lg--tj)_)#LM(<BYkCFkISxm6eS!RBjw@uN|mk>{~nSr4?jNZtY7SbTI
zoJ72q6#|x_kwCm^l;sX#(4-)m1hjSaW@6$xMFNNxDN%cHha|&1kX{f8{V4Dh1n6Z<
zCtIWK>);zU<1`@WT%*k(ut3pwx$rXyCR<;74R24X<Zs^5QN(meu6=Zt7&)(o38O`Q
zUuG322u!SMk!T!wSb}wUs5-rEeA6<wg5xRwz}f41r+7kgFp|a_lBT+1eo#!)gaUL)
zq?U|b)f}q`RBZvyv$a+%eT)Zm`U0qWIpqw;-&`xIa~*~hI{4tg+zvS1jGE#r%U!yU
zRey^=+g0Mg(VY@UE-JS-p(rwgf#6ndUaq*n;2_NF_!0ewE4ZhPRyLENz7EQi^j6dp
zzpsk2Ut=JB1C1N}-g>J<Q0qhadQGi)p)r#}pnIm8AsIVp19|wd$$Sxo=(XYm;wJ<p
zvgCFJ@z_^~ql4;gZZs~5EVB!zcYWNs#f8=gvRsqkC6M2~gbCNTCC;Qo&sy;{Zv#Py
zfUy4r#pq2<3qq;$3hO<CHRv)PknejDIM6syY7r0<9<WMYu2~=`Ub<Oe#}B5~K$G6C
zqhL}#%+}x<V9Kqptf67v3-dtvw~5mNFMFH3^!WydoXJ}lfszfI$Ttm#srJ%^pI3$z
zK%B@Q7y%c>b2Cbkk4FBXFoR!)^PJEgqyDAlhucpOqj-$q55OM2@ChIjy02z|YTnP4
zb6~v`ZXZ|`a7IpnKy6)E6Z?X~^}4nlbq(_y5T&1Xd-O}5-j|?bfG%3h69K*m6(V4G
ze6vV`Y>)!Mq=?tqn028dkv5y4-~k!B31}$Y=o<m5A#3lHbf@2m(M~1kB`8UtZZjLg
zF?iqj<h+}z$W<XN=5~Z+hEEQq>QvGR*HJHzuK=CZJ>bB4-P4M`j%Drn1x<tAlBpD|
z7Ir42;r)Kct2L4n=SK2nq+=i0HoI@{-0_>Cys*4*R>E<+TSy}y)xgYrVDaPdh|Gx;
z;4oh9ynOJ5Z)cSC&X=|)@P~8h)Obf$5jM-)D2hcQ6^$h7PlPH*M3z|XtsrVjph?0>
z(u5z~$F47Ii>At>C0$CCj;Bow8^tXX^M(XTS`15&Z%&F$td7Tp$Av#f?p40!q)K5z
zeX5#tlYobq2aRwvL;P9%Mqf#EQKVVye!Ls8bj(b=Q@l2HI;l3km)K+O{TC3~Hl8+l
z@2qmA4578^c1}N0!KBRCcOTy+y$gKzVV$W<mbfv>JE|5zx`1c5e4IE3AA@fq$7TFW
zwPZE>yumyzwlGWf-9+?m*E#OjrgKOGVgvQ9H(OX+7+Y2Sa^UT>I@b(x=s6((YPpc`
zGsf+@k&TfB<}wqlr$(~a8*)o>_gI2h!&u<h@Inbit0|<Z)Iy6w=}B5s5>s8%In%o-
z>Y~t*yb-6gkyLHQr_K9-z4H1%*DtrG7ae<?d(jw47|j^y7>XG2jNMG=NtQ`{Nn1%f
zj6K!HYS5p-)V9=iK5Nm&CLRqc7!p?nvt^W&9_CSvab+(RahB-iSE(3fNmUIaYDQ73
zv+&kx)XF>YNsH5{&}f&*9%pdLrkA8=w+OeWd-b=D;?3&>^Cq|DX9Q)m=%(prt?3aK
zMiz~cXQ^dzbhGN!*aV$$ZK#u!2d0WfiH7z`%Fj+NE9lgZRFA-zZ5xv?7BMySWR@?g
zsul}Pe`4!++v}d?l6*q}%^9o`oQkr9qD^^7DNnUeiK|Mc`a;E`dawMpW4<%VOjFOJ
z$KP~AuWC4VY5Q|Y0o96Q%b^EHl4ki5N`q7bfcNB*;Ev>u4uUhlIiTtF)vUean+9Kx
zDfs=bYhQ*>LN*{s-AF4YMDtcsRfoxjUkztL!=Qdcouopu9kW^cf@K>?txp}I?QESf
zskjikp|*cJC^-|l<Jf7@xirf;_weP80#+P0QaHFLCNFviV+303V|<x%q;gqp)YZFV
z)8jlYcCJ9KP%gw~$L1mrnrqz$kO#-xrL&oDfDuG@(qOD$#gJd2vcZJFlpwgE=^+tb
zIDmDu0lsPPgG)26O{6i20>XFC@96gyk_ko=`PL4Nnv$`Y<iO>NqO+&76ByMyK>_~G
z29faI@m}HJP!w?#C!s#!F2N~bNkKQkctP=a6-w2rO`#CA5WHMY6kJSXHzG&df~)Z*
z@qmqkb*POaY6gwCs)Kpk&szlpBc&1AZK^ipIGDKDD{xPNi0uVi5~b(bbxvDgGtmtJ
zcTqdt6g#0On%4&Rkznz@mA+#F@tvB1dZMMG$f6q{(!8Hn9V)d?y*CSDB4V=07z)M;
zhD>~`ud65ch;@U?DKr^4Nt<Xo7_(^k>-$Znjua6ydB!3o-q@)cHY9{SN?eG95G50T
zP=&5Ctz0$|Hc0Q69faRL-%Q%H9Kh~7>$~%0GGuacFlu&u^uKRoIx>4*;x}EYu}~-F
zq}f)#cG0{aZh>bZGWTsjXdu)8bHy@fc~bV1Y<aVean<p5H(wo}>{=7mu=;EBE^}n_
z!zIJ264Qlv0<E!&cLVD|>(a!FQTr>Z9?CaMgO|RCO*xl2cTTI0FP#>bcI(en65pw6
z&5f3>YL;vbo@SobHYgu7D**-+=)jtU>xWm<=T|m~m6-}p<?Utfd1HhpZ8z<?)gGqK
z_bkk`tXOzW0EdEhL2%(9VKsX7Udfo1@LF`~F0K`KjO=Ib)wdXVX?qDAikw7_2>S`=
z^t6XNOmR%ku;jDEFt>+BbysZI^jJNtUhumi7VXL~WYt}RvEs9xtoK~w9;d;)opTBD
zDBLA|NWZKZRSQzfm|mO?tjT6Jag%Uc*?m8eiam@?#hv`*ejaj}m{iqoBt8;3jGU6p
zmhR+v%Gd385*&ycz^-i1RbRb1o?2R7x;)?=U#4|ZXIrq)ba$^9Uu0Curpe)v>d|!Q
z{}4Kj(~uUYec)-b`}x?SY1g{yA#S52toen<&aLJh&GKcv$5z-=Dkf+!`0WvOv)hx_
zBImBh^1~QJ0jw7HpeyTx@q^yMMkXngG<jMfcge%l^^9Rb)A_-t8J&@;k4EACU-)UB
zVsFZ%J6?<wj8F(w3+)M)2#<u^ge<vMoOgX22#KHA@M|G^thtIED@yltx@$6e*Vp;Y
zY-~S~TWmSMneBG^(qc#d>*j=$$#HI8&~i~T_oeqk#+}}IPvq`_^tE(sdMLM(JLLVT
zGl_-a_R5v-+1(%9C;o9h>aT?YKo|-r9DJYL_iIVPT-OLNTkyACew)mPN5#m@ibqAy
z3K&sG2gozCJr@CJeoI*KS{j)f;L!qNUpu|$gbfQl;D+M)lb)YY(X#-a@#|Xu$O$n5
zjw^3(V4?K<4haL@=V5_>k#>N=fGWT5!pI7Uqa}5nzbCl?AIJRrnE>Ku13>ckw`|y7
zQ@`Kq8$a$devH6l{1Z2BnlikW9e8k!XF;5~0lf+5a*+6nfdn@Z)rAWXIx6ZSOn4!%
zj~=K|xpJ^}38|?D3EQ)&?8o$s$7BgO3?ah{zTzs;ylB?8X`exg8E%I61_#+I{Rbav
z^iw#v!6Tx88FBShZRd5aOgr978(hXwlI)PqdSKtN@6MpL*a)S2Gvu_ci|kOdYh8ql
zD(p8`Wj8b)_FdGT)Cq!#34~pu^nnG7Pl88ZBGUKOMeS6*`+CT|dt->!eV1z-OSyXA
ziX6=cM{G@9J_<S(Us&U3nh7Rn!TIQx+bDnQyvbrFhSU_N<fS;y9v-fx_mNn$3znJK
z<?s(w<@pZr$MySb8S3w-$}feRKhzgM6#^jq=K+d<Qp|r+U)cUeUjTKT=UnMe`toxo
z>_=vi?oZqxYDNy2cF@8JUOhrlEeMvI??AT8$bo2`^DcS5rwaY>DbBB$^8TtRE1S&T
z21S@bIYj8D!o@Nrl)wu%+LA1&jUCZAjhZ+5>#n(`QcSxFwpHBl`Rom)@E6T(5xot6
z(KoS>kzRQ8J>?$*NEt59dyTBxWXg{yahk&{*iC4by(UDcp%SM)(=x8s#}BtS5(d5%
zR1W1`>NfD3)b80?7u6vk3fdQOrDwd8lK;AxNKb4~B9+HZX!IBxG{iIpTRUBuk4(5F
zHJwN<{^osLF*1@!9k$Ah43Om1D<mH1Qp*e=SDhtZV1=rq3?P;=Ran3)L)b_ns=0Ko
z`BHO}=AVF+;f)}nB1%*O)D)fRD&36zW@u%imA>66ErUPJdkownfS{x_GasfonPM+_
ztw^UiC0qm7@TPwJ1Eu>lSNFGt%wO!&FJ-j<(MtDQGW)Me_r0d~W0CzZQmlW*NJS1k
zE1mDLD-`wwfB6B431~M!>$uJX>OFu|)gXAOpjFwf4>^jm$<Ap5tc;9QOM>|C7G2}@
zPjE;lWTQTuF^|7Z&BC9X=yQ=(8fT9#jWIKv<VAYvG>~F!{y^JTk0OVH`*Dzr+V(;}
zFYV~dB?^_`j<oHC!&loJj(~5Hgf~R7#uz3htzS^FwJt;|d&eBi)*7mdBX9SwH9$no
zeFa=p`y}E3+(Lqf(nO{?Nw?1CU@mtf<Wt0Burt$z=B9CS3l@ZXg&=%l7BCx}a6Uz7
zEIcps(QA2L4smp2!}+MwaRK3f0o@>k13}op$!D-cbHnqc6w*Cjd2s{jzPDg9$R?yg
z{3OhUhTHB^@1dFi0^55F{8SW*_^V!hFFyXb&VS07{XsqRyJj&1>JHB;`4|63OaJWu
z0OML|S?K?x_KXhT4xi8DH~$By!vC#3WB9rB`9rl>{**oYp<2%lP-l+O5#+C`rT$md
zk{d^IP3Xv5QZ-bbtMITafsARoQ+2>HW>j+1+74Eh08I}?cHy1Rx6V4x&0P+J*|yXK
zkuJeDDm3Gb@o23CYZ>1b9RcO;tYs;i!*t6XnyC<{S?a2@N}C0b$}|x<`)EVc<aS7!
zOQ~90BDCb|06haRXL|_|FT?IJM2C`4XDBY4IIHq=%=ab=;LR^u#ve`5y48&$SsY?w
z4CX%8zj+@HU*yq=4F{qGFOElK5Lt)}7iAl74eKkz*|`l`E<NQ73dhGFgg<NLn?SdG
z+T2j@9R7KtZ9;H8xBK)#dhuZWeKxKyzCY_9=+kpG<Db%}pUcUAXrlgy#8H5n`hU@*
zpL3Z%^yvS^QGYs)`d(uHr&Q&an*AT@%m0)(>bIiTPcH4}%JBb-qyGPiqZt0_;Qf~l
zivQ6{_gfY6C#Cy?;GLcgaP#<&iYl68Bc`2raNyq>q}M7qDe2~hYW(2dsH50^#Z9^d
z4v|91pnJE-zudesdx+wC{hGT69y^%-YSIM|N1+%^$VX)X;wbv`EPS@n-{UCielx>y
zUL@nA8RD_Y8=E*u!feXrvC8Z({mH@{u=WFHWn*&ok3?2nupU~wBlzxP0$ifR^Ga$M
zHj7oSX<P|ZH>V|yWSvZwKJ8oe^k9gr3w?d#RjO|&E<Wm?c}a>8DraH$w#KLNL7AKe
zyTf@-E!Lt#wy;!vOab}ho6ih`R3ASTCUC$3GgX1Zn=dn5;rW<$HG;w`2yb(KeBbE~
z1aI9li3bt~m^L=(nw);(rtJwY{m4#c7Y5JYzY@LEV`r0A%8j<&YS()M-ZUEV2laG@
ze>z70gI1RR*|F4b72v<B*Z0PnA2$4l4`h3ezf7L%dU!NK&z&p+2G5O^&$pCz_JGDw
z3n*t`&A$oLzs;LGwxaQgwFW%O)WYhBn-S(4;8Q@fcnF&&@xj_%J37Tpz2TUV{>;Q{
zXyqyQ3Sthkh;w9OiXsjhwmYaX;yIR5kk{K60Jp0e^zNCzjHVRFH@JK~fOSuxOv>qS
z4`)`URmI{MQ8Z!~O+GYLM}sp5E}B`Gy^XV<rhYXZMp>n=IfLPW7{{dHg@_xh7Z|+S
z>m?-@y*>9i$ioZ*g;-Wmi#k@P=;`xma@M|8mY`OwtU!Y%aepQdpTHe|=PNh@@;B>*
za0K&TKdR&Tt^uMe>-2AECqTZdl0{)WAB~gCU+{g+U3;A`$C3fght_TQcH?UKIy2#(
zlg!})2S4Xn^rlNAWa|#LzOU<r=t_1^1n;lG)%TX4A6NUI?=9lDGPkmox7OABORZJd
zR@dc^dXD~1Wtjd?+=J=q0D+G5gma>!;XFM(p$YK`v_=7~)55U*0i>UU@&6@|ehq>D
znYNn$ky83O)cs!~rC$@i|JO_D=K%A6iIjc~*Z!}U($5ju{}L(v9Qgbe1<>KqF+694
zevb+0Xj%V6%$X|hA)&5_#C^3ia|Lx+Q_~1xUc#epoHrObT6U805;SfI2oco?O_l<e
zkUj#gl{)zYezXoBa}_KM?E{1$zecZsKpSgxBpgqN2A(lWT&sOh2o)g$_W8PtMc{22
zY2C5=@=DW=(@{}T2@NO{Bk+qi1*g+IJR;DAn!@4OG_W^%K-IaKgX$RPy4ar^KtMAZ
z5gptG6;o+8TRytZRLqDopFMmPRt8M}bN}M!sqwkCkW!^=KI@wI?#T<%vWxi<p7@hD
z(JCnZYkOmg3Q1XW#j6caRL6987xi1*a!u^s5dE^|60{ZUlwtLebJ(Kz&a1IkpwtoI
zFZh{u(}`v}pr#pAGhBDM^Ti_lFM+JzUR%%I&`(B3h?nt^X}-7T!aOwic2edY9>z;O
z9S!G$0``%21!ip)sx7&QmSEktofhHklj=9VSN82d_X5>Kv5jKXQ5ibn)xrkt9t0{n
zEIxZfx2=^NOE|R)Th*#mKE_TB*nN?eL?3<df`=pYKF=DDxY8{hvqa?8H6|dPouy6g
zFbdAsM8zki5B7x>mZM8DC-)i4C*my0CQj2|pMI8-gLA7~j7hQ_W|vc`{ygWDOP|BK
zU(mQd%wxg_;<&n?lN~CsiiO9E@C{9QRb*T#B00{*T3fCAraIHzX>Scw#f<dBCUbli
zOyoy$*t!yffc7sb6Y9wk5(4$&$0r={WKd@aMxU*z^V%ZKKS?DU!|RicM<3)~#d)P9
z$xoBUt6{9vP0Be%GH~e->hFxeiZCY|P-G1(-V0wR%Bct+vEk5t7O9&$9G?^9u(5Ux
z+_+;*;t<VV_xqL%E#O{tNeii`U4_Z;1>y-fDcnu}K!nk4JyNAg@Ej}<Og1d*Ch7?i
zFuHk`rsql&!rhMHpw{M{sg+feYs+13sU`-iN9Yi95j;ie)R<Ujdm^dGr^=ol3i);}
zkdApU<gH#2wLbDW_3%g{+m9t8E01$ac9|;bZ`oO0Yb$E?d|nBsM0C3~V5}Ik?oq`o
zglfjtp0;NNAT8U@;c0H_om^NcJAW2~SNbqu?&PzC|24Wm5kyt$676f|eL;=%A#Qvf
znk~QErZN-7f%eA7ol|9Vf9tb6M{B!<?T4#6`YbPrJd{Ap<72%Y6VP|!Pt7gpYl6wD
z5gX3)t%a&Bfg(hH6&aiwfwX-np`D@#W$q)T7AB+8jvk9*-LBlU9upi`2`7vm`Tang
z349FTP1N{7JNf);$_i=a`N8(9pKu?!9$uW;5vtGbY0D-lZ*wPqvAt=k64boN8;*od
zuTn&iv}Yu*817}B&0?UKN%r5H>*mwe+3OW?cn|L`tnewt+)3zt&uV4<+v?k;^8&jE
zY=NBP1cSR5T$kd^i~Xi^a&h-C@O{31oy-T<wBJTBB7g<K0F%@TdY3z!miOeWO0KL3
zYX#1KjViy_uzm!}KMWF}q5mg?#QbM6b*YMOGBGpAEj!D{6<^yu8xFF_2K26=8-(~v
z4#e|A>nr<w%0yC(k%<&|f&jB_`M`@Id1Zw#9o|s}Vs{%a5E({A#3Uprm<pzZlthE|
zMA(~}F>!qxV|--;c67O@IWBc;bj)(qaJA^xvck6GG+ejR{VH4;0d$TJ4Zmo=e<s@b
zgC*AK%p|iiu|7FGfp$qLWtm8?K1`1e6E%_kw+&}2ho$aZNEf!Sv3ZB0mboW}{<Qkj
zTu66<&EnhB`u<Lr3-B;`gtMooSJ!0qCo3c<l=|$v`xUa8YZ}EK6P+5KYt5OJN^*ut
z7sz!za4RllI{HNk_a|$1XFV_*prW!D@z<IN1Vk8~r0M9~prp}`Vd%~_a_8##>mh#F
z1YpbjZkCaJCY-9&CHU~WI0(Q}rujliIphejN!Evua)ajj!0MvgqZIn!4N~^{3*y`)
z!Ggk+2|O$aev9Fi!3ljt^yr7W#*usqQ4(_BLIm_fvf-N;5u<5A7Tn$EdmA>Hvw6{!
zTA9Ckn@x%sO@QCN^+-(1d1{xp@tnn<3;alK6MdW^S2cijtmQ?E1OtALvs9A5tTP&K
zR;!AvC~IEm#W%Wy<7iy$iyWrD3S3W1IC3HK#;b1=x%;q4nKKrV%v!;1JOwA5VTh39
zSSu<ut172(j9MjSPHt;3eG96LQY-RQLH=&OG3pyEYMgynxS>&&m0tzL0T74uxF|FJ
zTp{TvihPRI78VZO2#loiR>KQd5LM<Pnt1Fl8N@TVW}GlUx@t=|iwu`x94&{+0mzU2
z%|f&i4rP1yyN`%Lf<>VcZ0sIS$GMNhcLzQe+@21Id+IoM4UCe@g2hD8=;d-thf|76
z@1|91-d$!;2U9xBFa<2dW5(nzq3^vWRS$^Pj9}|iamhZma^E!CcO0Y}DmC_s663!&
z_D1dh1d7p`rMZ#*_z@QNb90k#%l%%6=OhZRg~YO$IojBhvoNS<FwM-`nVW1*uBldA
z!%)yIk)u4*RLi)N&1RJ*Yz`})WP^Fo+spH~xZ5V3QBDIL?niV15Ee_!(vzFFaxbd@
zvq<_hlUJmS-+ed$5hY%M_bjNyKH)3OG>Uwmt|Oqx?b-z%dlDC;ZLx3_<<NsZd?OiA
z_WE)6woi{eb)O<_v7BZ{fcncw%7?}g?YsN9{5%W+H(|?miE3T7dLKh7ONT-xcoL$)
z*QxgdNG$rqBjg^NZCyFyxN6QQA+)>pw~&u?;iAz{g>l?6G8Hi1A`H#-5f4P($fT)B
zOkdY)B@<Wa+vuIaYkX8U;%|hs!gsP%G_~~{vx)6{2=(Y1D1kSJ@iU1LrVI_V&*0rm
z=KRSMy-X5;JBuxjcU^^XARKib2p++3-#4;Y!LplSI6dJvE!Ik2BL}_`<#L2K8^pgR
zsOM-y^maz??0}DPzyA;}brwu}mIW-blZEZTbeP$Js&|i2zGl<An&qzV^fZP&3;#JG
zfQb8UQ+>|B;iWX`L255`@T)Aun0u#TrxE(Xdt4N5xagWo;}z|j-tEBi`6!XHG9i3K
z<~Cx$ghD2gneAob`+f%rYJDT%%8rOs1*A!V@$Lf1ISU`m?$cnDgVfS*@OGAi#j*DU
z{jOz3Z-+34`G-%`cTz42I9~Lp%o=R#q<8Q0+SN7T>tY}rw7DW~9m9?G27uyjufBjt
zUMiuI%xRh9DBGO8vr?$XoUe!WtmJ5f8g!RA9!aEcqD|qpljXl_8<f97mA-HRWxy3O
zH0<;@NoVwRNW&+`kL1L!4Vw4Kh<Mkbaq%^f9BAIivwV9Me@{J^8s>xRMD_WM|APTz
zJ$<J%_I{tU$9dg`SPKGR^t+4^n#}ywIU_^XH-Q~^$D|7qg8uv<<%A>T=7{$LrWH5$
zaX=goXQ-TZ`)1%JR7>vMV6O77tCs8TX{}~GYKu3oI51i0I2Cb^seAKW#9!p1)~|w4
zA#-EPY{6;Jp5};FpHoe!zjLB{qCSjiX<bkeXbtX5!GK{yQj|ETuCg9UYN$vu@<1LV
z<n+7mIy}QT32PRTUxly;X5Y5jRCe*LJyAJ)!ZVT3=)jl7#eoUv;N#FUOTSPQ3U8EE
zz+RH~h`;T5>bqM&v7m9q!m@@@^PWa=7&P0AKgor-$6a56P9~t&D>aTZIqTo2=x2%9
z@tv7cTAgkI7C8Y8`2#ohTowPXZjA1GPyCO#nx6i-dE*}>+fo(G%oP;j((1yS8kU-t
zgs*sY3)~v)<H3Oso3q#8p^(Z|wH|r#BK%N_?A!261x4Tp3*;m}5GiFpMjB_zt!aA!
zsqsVk63c)UR*&VPFd`KPzFk-<u&Zq_x!)HZPEBMmYCcX$tTUP!p4o4GdB+1b8zs>Q
zHRml!Y1lORZF)Mz;}wsZtaf}pb}jx8z1j#gn^(eC_3dV9cS=*-7Q-u!n#~7nb7?6O
z47=;mo*FAR0#%mg7243PbDIZ13qAi01|q+9d8>Iur8vl}%FN6u|IAj_)`-<SbaJsB
zE>8vBKHKN(HDJxU>OEJ<`2;p2wOsdM8xpum%Tc;b1rk3aN!;Af>h9_T?)+PlVCRsl
z82;dMtm=<f&aZib8<Tc=;Nl%mZ&jyJ+bnkxX~l`*cF+bhR9b<!YM5vl^1z_PBhB=I
z5}jVkv~qb&wsL$!)_AYZif(32<GA`B?<Mc0{xJnkbQ4w{582>5>juM-VI5YCU?IY_
z_brp3z_P|<{1&zt-++G*hunejGU0g}w~R+U!fM?NN0XS<1XrYCp=IRaHPHRQ5KnnI
z36bjz3Va;%{s0?2rYQ?LyCh54dQ0A|#+72nRB?Z%m|}6|dbx%Tvv9kM1*^}Jiee+W
zHe@OO_F6$Hp`WzUTS;xZe4$fDkZ}_f2KrIX4|c;QfE-H>7dtnsBSvZJ*wCF5UAvo@
zanZ|7NRqd$-H$Wg?=?pDXOt8_D?QLmZ?)ZcTp7C57Phz9nTcnU?M<{q4Q-GzxyJ0F
zxe{j$n#gp@t;grro%njUUkZbylXvGIe2!X<G*~jT%4ou8%{ZLr7d5R<vb9jLDY{e~
znMrue(<qA*=*MnC=5JO{&=Hpu73Y@B?zY9+3=o%Gql5=^&uRUMz0y7zmM<Ws&+17y
zsZpw!%Z|C6k*J-#Wahd~UH+7Qx+q85W7Ey(g@)hjWQ}rDg=&l9O^Wo<8XR40&t2du
z->bgl%eRcFpsjEjNHtRAFr+v=#G22SY3>3RXk9b3fp8h&Ux&6=r05qWd+Z~wolx<(
zrb(3tbdQ)voE_YCb}+Wc3BRPgE)CX@lw=eL-+jZJ2_32;?qpSsEp~y$8H3RLNqn*u
zh8V5AL&By8lNf`|%TGvl44pMxj%j8ESj*5_OwD9eHAXUX83Z{BYx?}Pb%47Z(`Wc0
z$E00YMGKk+4DJpl5)6&{agAbKxdSX5Bq>=are<}{et#>gmGU$k6kDvrLAt;@97f4i
zE@g8>BCZB`$2#nFFnNxLMzax!Sa`%G*{KcUm9p+&M!A_=YZzu~h8_@BTo$iDMj^tR
zcNqEmeTGHx^zA-&e(9WR!oa#$vt{FcCzkwemK)i-uX-@9Y-1F6eaI%HpGH?mQZkuw
zZ>s|re5FPMB{;#aXEeK(mZUW8X&m5}t|^=I^^(}sheAxI>|Rh)!N7+ae9W1YprgKY
zO$k8Zpr1|}+g7wOSvlR>1+}B-a#%Qb_LT3m45R6H)Sy(`{3sx{m5_@j(6&_w*ARQ&
zFufVd9Z)cUS4y?GJmuw}vnmH&+Nx|g|F$V!eOK5tb?D%2gjsG&Tx%KJ2GwNGIC?O`
zf*B$QB14fQ<7DN&w($%1KeCTMCy#%G`2f-iz;}jz*hdDsKVcsY6kbWlD<d^?dmP8s
zBwB27f{~9-qv!{{w7|uK^0DI$ro}cqW#RB`%Ql6p*^`0KOstOtIwJ*-uiH%#V5p)T
z6wJ~?5M~y5WoKujz8D@nJa`~HnCVf=QKleL6W!=}mEv~nbX2!eXJKhtAgDI!|Avpr
za|IlU`5uF3J~%XakKj@Hr3uKz=Vqo2E+`P!J$$%CC*a}yD;5ZXN4xjCq5EbyBkADW
z!^hA9*JDnrq$OuJW<4`!TCFh^8)zAyA>OEQ;wXAO<<C22nPKuB3-h&oT;ZtGe>Vxi
zyp|98i65L1!lYf+Cxli>FI*Dau8E<=a(Lg%Q|t&WXv*JcnA&wT<27}x)){y}Ox3*Q
z8kK+ylKTeu-4|U0G+0wBhyazs8q7-%#6(O8P)?tSh`=~wcD{}zm1^H*A%NDR*EJXu
zWAh_;a3EzO_X-Kga-zS&{$Rms%M7|Mi*T)!EsTZ0lltc4SG)!sg>eamawH4k^iQ~S
z;X50lUtq)6A&BBO+lQ^2&R-gNUw~9*0KRjrD!E28w-w2WQ#)W%d%{=Q6P<`3IM&#D
zmbn_l>X;mj1F1Al&=rCjvF>ZW55=~F?op?!Ea`A}kcZMW$+1Btl}EhXr;78~u8Ol|
zvWJ7+NgP<>5SOB1#@lszN;(h9F>R=fGDuVU7ENo<y0tF8HI>mCLCp6|fUxa|pwCfD
zaC10>IXE#*6bU1yif_N<O={{3UnS;o?!t+Uj-FCehcEHbjp1siV!Ps+scor#4oRG^
zjmpi%2|Z|(U&k>ijiyb#4DesWxR0(AfDDXue=p+5(NXtqG6jR%tqu=&N9(btM0>FL
z0q1Eot?NhwLu$-$<Be2hIl4=IPCvvP`gM&HvA9UGRMLdQ*|(L6)M8=|Tqm_pSf80C
zAvX~_KEr#X9vapLM+EjW5+8KBG&yrMJOqy<NCmLiMn!K`sXt~UvzEVONc&K$p4u;{
zjog>?!ktHK1UMb?8te-Zeh0pa0c?~3Ch3ITI?9&6Kg_xoowMK~zXn!adrcy1!G*Oo
zts?G|<tkF_#6^W$hFOR?F#3k;n0?GATR*>%2w+G^r}r*}k`Xx)4Ef=GDw(8iiYL{3
z&0i-3<IX(}$rQWNzNC~UT)Gd8Ww8~9yXh5u+m>9?N^>B?CJZxJhVBfcxWS>?K4!p7
zaU*UFHM3uR!a%IO#|^DBD-K#&j{HQWf5mvHEPXg>Vl&SXV2{N->V3LuEJbgVelLPv
z(U9L$L?b^x9v_*T!kEK8B{;3bLrr%6J}q&OSj^D)-RHcQp9*bQNsY}NCm0UO6#blN
zaBWXiQ3J}ER+~Ld_9aRh!e3`swi#VanV&IgVhxk44{}M54s~sn8K-y0%3o9*T;03O
z!a-mozYE^A>?QF-Mp+%2-GXrzjrJQ`8Z@G!DQ)hp^|-PA7LnG0fNd-vqS!W$s?{>;
z`rch7!%OkTxh_}%z=!Od_>tZCNiS58PV}|efg>p-y+dbwIY^4LszPH0j(b383X+M}
zmF<?2Sd59+j7H^XDT6(zIA<{7)fetDq)(eK-qHJX_en+D6wdJ;IZQA2*u`1Ad8pn2
zlUjA1Io5+vg%Q`ys4SjpS=ammti1X_C9_1QBN|+eMz$8kS&5#nPbNfQ!bv(SU6AT<
zH5ZHUiJ+z8;oyy#F`3ys1Q_?mK3b9e+7m59iU^aI2PCv2g=^(#tz~FgA4^02+{~=Z
zhsPI@u;G4x5Sjj5<NAx`{pT~jCGutx8iHsK+rvw4sE*=J@owIZ$~sQEwPI9|cjT~w
zaK`iC`Lingf%T_vB7J;;EGN|7$U%~1GkNH|-@`@1e8*2t0M(ro<BP;N3GS1d0jW?V
zVj?e>>4$xPbi`cnl&Zg-%y2f8m}qCXw~x`!`)Y^>Sgfe&Gu)eO<c>@JfN%xhZ>&Hi
zC6rfjFX)Q(?3LGfjF$uHGAiqrQQOPjEqY!bj`y5_e$*>$YKG%p6u{EnD4~YkE^;Kl
zUK2#{eNrwOHNiu_2EvD6Kj5zO8&|$~%jxXcQ!&G0^wyVZk7wO@c<)YZr^jRBW?%IY
z{vzs{uAO!jWa5)`KFLG$rm4!BD++}i%|n2q==i7>P#6TR5{Gt}4Z`VHgEv{w5p6p<
zUg&`NjjLH35dk+uou6I+zk^T4$HK;UwnYy6_*Dii4D4zJS}Wg=8qHTbFfKcirH_ps
ztC8?fG0{5pCCaRKwrdra%%N0m&!Homg;Fry{F0@xSPUg(CSGqQ16VN%=AkA{M#BU6
z>DzeRWK++%=*y^Fl?AO^wSlTSwy{{|tUf~qbO5DU)&lv28i{trv>?<*m}0^>{oou#
za1f!IJ86-ecxCzk%`Q|n3Ca2^cg&FLXpwVZ1Y+P?n-?`ihkh=*s&|e#Mroon)Yf^}
zvmfdl=>;rp>iX4*GW+>0P39hHsjmbC7=i*NoW5#$V$ut4G<K>|r3{!@XQSm$kf?#F
z2AyVq*<ISw&fSi2J#jaifY?#rAF=7aGmkO9JLKxqtui+-PfUJ-NhT9?m`vM<ln;(l
zU!qRctVV>$i)U@t&Z<k5AlHU$so?2!+J7BEdx_n^^}*~L>?nu^;YO%H6)CVEle&Pk
zj0oJ8vl`w_dgRN8#0O9P`05@OmX60pAqi}3i}>S<#Zd>552U5#b0|!<2Ia15ei~n0
zGQr5*_<Yt8mI-yH_2gv3f-)70gPN|8L`m-Eg3dcuIGcpUtBLja^_-G!*-2+bHf49!
zw4Ek6LIWBFZ38MA_6$9p2g@_naWFH3-LaY<se($C0IY1*iHB0<9AF+$pT+P7$Z$Y4
zV_HTpiymh5spB+eaPIksg21O)*XzDfCF3vNpOh>vZo52jmM(t9ohcAp?>@VmMW6q;
zI3B&jLfB-?0B!qL8dLQZCM|qmHa&A=GzBurTxKMz=y*>0Y^-GHZQs$zLa*}*Y3}iz
z5D1R+1Vo|4=m)Y*x~~p_V{e7AgPJo>eFZgRKtnyh&798+j)YpiD|!+zTTXbgT_5)g
zmqK=8!Qz%2_Uuv;g&7<Bn(a!spc5`ts$qd+se=jq06Zfbne?9EIxL@uZSE)pEWFJ?
z*2Q=z09`&0f;~`dg>AqQ+I^A5JUeu%BFFsZ1wj-fbfDSdt;y;Lh754dOE}<&oR}t2
z8dtOaMu%lD;8{tb_&+d6KUb)K7^MH2@%<BhDJ6;?G78@_zE^;ZZ?onlAmiI}1l^#-
zA9@fkgXv%p{X&q?rcf~g0T>b_i0LJIIm!QN?7X9z*tS2eQltndO%M<egCY>pNvI-S
z5DOqkksdlhx*#A(?*u`bilHfjR1xXYrFQ}8Rq0%iF22dV@7^cm{nq=<%0Kg6d*+-u
zbJjV@&fcF*F3)b;8=PO{!j@_q6K^T2ZfvC!eCB}|N{x|IrTfjBzHJ}w8|uM)_8;b!
zyVZKCJjRAyS01jE|K;(;^)zI@4f3%8g+(EMuy80ny2=3Fv0l*0VuHm|Md$WuXln4A
z!Xm8mD7lFq-(ABSKQm^PA5y&QD*a)7@bjSKvd6q&;-$6VsKRqs=HD{z&fn0>9Py-J
zocy%_9v3p@msxpmi!gCebt9-^KL*Df`ytw)C%J6;xUAmVbKfpo<MqbI4iQdK)l>ZP
zeJ>|nm4l+#wx2(8nG{OCK7_Oz2Pt>urqFQFimOtU%Tkryph>4Q^)HH(OFOAVj((GV
z&N4bnS;MNqyd#Pc%NIIu1r~W9R5(CK-~$$sGxsj=8-Kc38hv@1_o;rJYDUzXZw#`L
z%mlnC-dBt<8XBVz)~%{qL(NIMW_76w<jO-=lgC&rE25XFAodK^&E2E*9kYA0H)Oq*
zzC}$=BRJ_1Q+T5O?Dod)lUIh2@vU!m@;@k_RO*$rWxWnDR{t5}yS$lKJC&YoUvna|
zb23cg3)8FpbakzBuG#l~>Zzqw-=0!-beCvHL@t(l-*bs*i_K~*HB!b}zlVEA2$pA=
znl(GNaV`9Dy><)?UW#~&H8-*G&wsnfQ+D+qZ?A>mxHm+60`T^3{O0W~e*+Tb#EG8(
zyuCd?x@kAtJ*wDmDKZ}n9q>a`U2~Ur4}4+(Z*QooIBLCmCG!_(Z<w%jv$B$XDJSUX
zjYQrR{=0bpQ`&EqH|MkLpw>dV(7nz$c~*(niM7iubl2g_bX$HgIeeeNP07aDVne^Q
zq;{YD5T8q!z3U>Ol5XUcT}?aFNgY1k^=>_~szov2>!?n%fv0ExF`dv9BtMQ1)%wkw
zkb1nT{@KFnPKCuCb!6`N2v**Mu`E6D-2H_r1h5P=-r<bO(1*%bZteH?2Y3SB-Wfyb
zHEO`yyTbNv-5>DwI!e23_bv<LKtlPvF41ZD+~5+8!3Z~A&8|YF>YI?%Ebdfe*=9NG
z=8obO@A}epp}Bq5rY`}bRS)J%)aLfCXP}llFbTBFg>$?sg$7<Ox*O`kgbHC%6VOne
zkiJYI>y}twq7oA$HV}LuAHq?9pL;5~_6=WP7RmCGj87F4<AwLGGwFWnJI@lytN~W!
z>Gu6#XDY31Sg)Lkr7PXxQs*yG&EvfP*w(Oro24>XG{uX#Y(XVEqR4N_4J(9w26{Y<
zkt>#aHI%)mA_Y{X7}FMB7ZECa9RHrHVg%R1R84QE-THFkNz~SYxNQ4ShU7}R)^>4d
z$%TBQ&y8TCNDR4woa-gG3#V~U+{AqGk-sKt;rooO2ll7CceKqtZ3E+1chFU*Y)ntU
zw%NEx;Py6`)Z~OGct3X>Y{bqNh{>hJ9H`xWRv$1(ef_}WtotmR=7DP{caXBKYxXa7
zsZz6o@?!qxg=IF=8f;1H^RatR>ffj11ghN0LTLx@`7J_BFR@S{Zx=nVy5^EJ9+1CJ
zkl({zdDNTC0k=+<4k@VQVDDIOpp9~z+<a&*I}i4OiMaS<1{&AAI`!;uN+kj09(iMO
zd*bAXKPIX}F>aq;vAXJ7I}VC$3SXA|Stgn_9KIw;adU|}mlDq1ebOPLP3Y8I$Nk}f
z2bb43WI;Wiqt%Oz(FynR&kTvsnY=U)Qd82n*_1<gZ+JOvAn&fEJAwA`W)gRfq*MHE
z+}nDiTeyU{pTqVk$qHKj+ux@u*sIQYeD{N;kb~11`Z*|LBPuF{TQ6Vedg%Au$TK($
zmNj`M!$V%^;&_3MjZ_t=x_y7utYbf5h}YVhLHd|a<x{?FoRoA8$JVfjh^MPo4>lun
zo=K8o2Z2kd-nb^Jmu5COMxDj!8>&z#6dYG!>8@U`UsI)4Hr=s?U}ax5hGtUA(#x}O
zh%a3kF5C3V-jp{m+pxg#dOvLR<(Ej!?P1=&yEobHpR|oUKd~fncS~C798;sc(njFP
zo{7B9cLSwoJvF;ham;;kTm8eC2f^N}KAZL8AulLTat5MOoaLX>CT_Dxd_VpxaC@uf
zRZw%hh{9SJ1q)p)3x}XX&BDT!wZ5ujMza(m|78pNJtFDvEet9O`|VFlI0DE4{a@bR
z98C{Zb#vxl?VfjrRCFyu!)S)@>O@oN?6TD<Uah~h9t)}@ya_U4ax$@#&BQ)<V8Q_o
zpJJG;Ta?-Ch7KBl*Zg^d3e~z)lNmu_7&y{q@an@2r@6RuAAY&wD!s-&Zi!co?L2p^
z{JMn9)yN~?q?}?G%%!hvGCteegIM~Q#g#O*>D=I_9?SCS#uSPpb<jYWVREGcaWgA>
z^ghQ2t**tKp}Pm$KdB3r`aTVzr51R_t*!G)jUQF@Dy>g{$~7F`uku2o{b%63lub~k
zoL@Iy<<!e!LQ#uYjxadoJ)LVdQ^6l)ah9Q>EnYejai=kvx;)(c(p*!<xd{Qh0enUl
zUzWI4s#!`Ye#!Kq1RlYclug~Cf&SYk5xd$-dTLW-clmqsqB_<1UiM32ndb7IU!xS{
zlMdozkFVo2ecM{S!Ca*Zzv+yRSGsMHf6)qI1AS{`P{2LFr4$ItJ6RbhjbYNNvt|3z
zDjMqYM+~)KWOZ8>1~q)&yLx5op}tz+#j>9VCnDRdqD_?xnnz0}oJzK5^PTr=lwhrs
z%`lq2SRca7E;7Ki`OC_4F`S)My5PvN!(fj-!86UED6*Xu6?xt-J)sQsH5B_AWUZC0
zJfvHOQ<*5mCzvhJHxy&d)fe68(_UMomZn8X^tHTA*8Bl`X=PNslgxTQXfombw|kLV
z6IT}fL^>VodUa0RUrbzKtxURyE<Hy`hSYPh^B3LPz`Y7YoEwzuHaE`V6Cbphq?I`E
z6{5V~{yj=rCo88qMrgy3>^!a-D!+r;TF4xK7#~Ph;p%Yq5<lx%SA)q26X|!btLMw<
zHIFf(COgcp_B)TVm1I7C;c(tKj6r=GYmC1H{NA`(J@1fsM>9N{VIdDrD58@RORwDv
zxiDmx7@?<Do>prR$ZgU9Ev2rl)-KM*lxlvq!53~X`{kpa9Pd6wqir{QjE~cA=##;!
zX8F){r*j`V6}-~!IJ^6N3?9W#_Qq?f(5ML8?<gT<sBIb6j&G`=(m{0W;Zf_SO+P%i
z)~$4hzE?(ls;xh8fZWTCJ<Dfj)$>u#feR3t^T=<-h$}TLPTr&LPM3=Fb*>HzR=E28
zsln;&IMe2I?GR@-2JQ*o*A#*mtNF{lbG~0K+3WOyY_Z?3yJFn*_=E*d$R_x-ecC!K
z!Z#agp>zEW_H(P+DOMTh5@7*g%9yQF5w!ZUu!2pD*Yl~NV}!;r=5)~B^+ACwTH{~)
z>AMJ)tes*XuVwN88IWx(&vnHWJ@^aG@kvhMI;f1ed{*=A%$AHx7w=(1E#8CHZ^rAa
z?r!R@u<kf*mk}PtJaEV8RY!+3yt=Ggb(P!xlhD96#*kU@{8{yyh>36oafOn>IlR=l
zjGJT@AE)-}^k#cuiUlWnU7cEo!AqN#ih`OMN<{&^c9kN3OnsmI{xU%7orr)9Mvpa&
zzG$Vz3*fUG4qZ}w;Yr>6Kyh>_(_C0@vP~D^dQZN#t8hN&yT`0*f`$Zc1{L0+kkFd;
z4uY4d#v}Q=7F?Q+^Qte~%xt)6o!Mjn1r^&(eJ<<m&QxxSJ3#d~|1rv50ZP7vqkZUz
zWs22a;McLd-E#+>8@e%RupLO>9lJ}$-G5ts3M#?+hWzYefSHq*P97PzpU7jnj9M$<
z3%dbp`hLphNxS0XH<dScIE}Byf?gErNIrTdv6+#-J2Y~9F=|^=Sy$XWcjJftsZa3L
z8V&XNHkEKUR*rRL2K}pCu!Hw;h;rj4oka0VCC=)3{OI?ly0M$)8w}H|nIr#YP5wQk
z@;_@bkxKQ~84%_M|9^Oxb2PkfX=nl-=FN%^ix=_)S9tSIlr4ajLa562wB=+V94zF}
z<Y}#UR5HF|RQ>_QHPdOVOi@GV0aM<HFg3NFTj3E3um)xVy~&_PL~Pvbv?^%mTV>gE
z&+P8rs*!}fN{`Wst-bb-bHiVI2lT>=nnEvM)1-TGL&<iV?s3uF{D)rhr3ETWck8#3
z3;Z_IWKMlGk>llL_6oML>MEUHl#o;}lfKj)-n^-E62dZG8(ag_91?VWOba_&$732I
zC#lze?NWZL>EukiO-&v0cug8~!sMO#tk#V|vW@`r@KlSO($!vq$CA&d9YV`arP0jO
zVih-zGt4JE37@3w<k&yS4kc9@#bqz~dK#RZ<L3$6yx_NYnz6g<&W$o}Ii~Qq?xZHU
z2m;kIN=LpiT5fD@&yu|$3aX*fu&;>VeN9t5l@j?XND*F*N)>=~*Nh1$9TRa<Vi=_H
ze_hgeURbuJlYWDG_xQS-)M(h=8e63zL77#6k`n1P)RJKM^0Clm3b|9*CqqnJ;K3OO
zvBkTzA{R^bzFXqiKGRBw&_tvdy&tg5%}oa14s$y$5agJo)tn%>k^ZVI>&jQG^vBCt
z{pYO<zTU`pY2O$ZGk1^AdGpZi*x7~;{qeBE){kcO=|;A~x!JDI=GsLrOQ(R}5AwW7
zx0iBG_M80lQt6^L*Tt^plczB~eBs8W){OpQGfz9+`Q*&ags%y5aIy7#Uwg&(tVnFQ
zR(XdZsW9~0`g5+z=MRyIlBlv=Q(eiW7Q83uhgp#aUCqH6bU=Wz@a=2Hw-`=;Rx8V5
zVd?Be<ehbJwD0$?O5UwsOj=pfoD*_c*jK{$hs&Zr*I`W;;sjR8dRE)^HHE-tVwTtY
z+G@SkJ``eI>6algXrBRuf6Aq-e88ieqmcO|sxjJ<W-uS_z7|%t^p{835&wK9kAtrE
zi+xLTjvM``QL*5h1j|Je*45(zl5~$K$C<{X{18coar#Pe!pW*PL%%-7Ax){af+|BN
z&rgmw$!`|;#ON^W1mJ(<ue?uAambQ#7tv59*S*^C@jQa7o}#3;F7xwazt=2=$yPvo
z-w4%Uqp8H`%zb@GPJ#3vQ=p7qPg`Xcig)p??XQbDdnRfVvR`_5maWbri7w?UOR2>(
zJLbV$&(=qDTF1;-S3OvUV_Te7t1$d(I|i6pa3PJCqDL|+a-^$BUo2levtD53eD4GF
zQ~J2w1{hQD`gUOR>pmI%on6NIq|#&rHsf;aXZnK5Q$-hlNxD1SQF$76tIj9!!ebxK
zw~0|EsWC51y|dfPJSS3~bf87U+JxH$=RS1witT+jgmJj*7<?{P>!}|P!`Cmaqet}g
zb_tsTdQzI)ZC))2fwCqkgnb+62%Qx^(dhVcn5Np3Bm_I#NSnlLw{Np!`BuwG*^@sw
z<G!V61;1Wp$?;sWS<sz+0{f}btXNNf=OcyEIQD{7Dp{n1PzZHFqDdxPChCoFan#J@
zHZ1|qy;SpI;M;>}dM6e^QT(-2d*X9=|GWAYXY+as*c<})xctdLBBy1^r_o(@LGe~a
z(c)Zvu4~CNNg+~8QUOyXFT(c*j<eR|<Et*3v~{?heOGJd<_(u~CD*>=D|F_meK2^C
zN~3@o6#aDBWU+BNBC%}InD<t+!JwB*d>`@|VwueM<tj@UnbR$6&2MLi6fr+We01O~
zK5Bb4kG5)?`s~Mk+BZL}u*v!Iw$ejkRjPBlI_Dmnh2V>(bsOzD%Z=9i-P3P!-(K@3
zO9VyJldi_U|K0cRFmN6T2_6w;TOiz+n4QZ5q!@_+bbAwZGbez~E&?#?d48u3|Nag8
zuh8pTM&@RYM98i)z+M4DlvRKtp**^OrurVbkTecm)(F6P<II8fh_~Q5ns^6r#{Uz$
z@gLy6y(w11Oj{TVMnJ$&^xq_XfJ1*7kpDg<1?!7K(Ewo|BMcG!Kf(GWCK?V#_Vxfo
zUqr#k#t{Gs-_nrR1}iw**ccnx*%9IY8Wxt0JizC#J_-F(1^^C*BA`eN422Yifl+_=
z_0Jyis9L%JgnS58RDj3Y-Uxu;n;G%A{JVSr3Je3o1%4xkxH*_%X#o5)h6eJ_!Gl5~
zfy`el&)*u12-YSZJa+%q5GWLY&p)g|AW&l7{vkWy1R(1i)(|LBfb4!)17c)|m;>VR
zPa7~85(B*DBN_@IzaQ1m#3BTT?cfk3sRpnrN$e1!08Hh`wJ0E;{iue<01@<u?GQjc
z1riNT+CK!6v@Zxz0K0JHen_A+1BnJB(!LM3i$nkv{G%EQL-M@9{2+N=6qE=XI($Dg
z8Yqi!M8iM;+Wt`uLBvWQz7~T*9@XFw2;%7bg#(2rNHx@du|oqz3=X#ehmwu~5HkRj
zZ8&NtN-|&IFbDwOKVk<%5K*<n<Da<(hoMNu01kATq|bmMN&kS_3rF7v9F8V^HUxxp
ztPn7gxduldL`m8}pfE@01{{Hb9DToVB$QY-;PA5{;Y4ojVGV`Akl3MNBy$&zK@wAD
z4_%9ZV2D|chc#ktk)s+g1|)U>W1i%GC^WH1z~TF$&_F?pe>5itBLI};KttR~)Ggi3
zi0hTe9URUHC;~y;REP<ZSRBuv3p(*cjz?Dlt)OrXDsxQ~7$%GY42D*ala-Z~fys-?
m%E&=63Zhax|Jy{LxLY|o0mW7RTqhBtz*M9Gf#h#0(EJOYq^73;

literal 0
HcmV?d00001

diff --git a/docs/source/zfpcmd.rst b/docs/source/zfpcmd.rst
new file mode 100644
index 00000000..3f93a41e
--- /dev/null
+++ b/docs/source/zfpcmd.rst
@@ -0,0 +1,259 @@
+.. include:: defs.rst
+
+.. _zfpcmd:
+
+File Compressor
+===============
+
+This section describes a simple, file-based |zfp| compression tool that is
+part of the |zfp| distribution named |zfpcmd|.  Other, third-party,
+file-based compression options are discussed in the
+:ref:`app-support` section.
+
+The |zfpcmd| executable in the :file:`bin` directory is primarily
+intended for evaluating the rate-distortion (compression ratio and quality)
+provided by the compressor, but since version 0.5.0 also allows reading and
+writing compressed data sets.  |zfpcmd| takes as input a raw, binary
+array of floats, doubles, or integers in native byte order and optionally
+outputs a compressed or reconstructed array obtained after lossy compression
+followed by decompression.  Various statistics on compression ratio and
+error are also displayed.
+
+The uncompressed input and output files should be a flattened, contiguous
+sequence of scalars without any header information, generated for instance
+by
+::
+
+    double* data = new double[nx * ny * nz];
+    // populate data
+    FILE* file = fopen("data.bin", "wb");
+    fwrite(data, sizeof(*data), nx * ny * nz, file);
+    fclose(file);
+
+
+|zfpcmd| requires a set of command-line options, the most important
+being the :option:`-i` option that specifies that the input is uncompressed.
+When present, :option:`-i` tells |zfpcmd| to read an uncompressed input
+file and compress it to memory.  If desired, the compressed stream can be
+written to an output file using :option:`-z`.  When :option:`-i` is absent,
+on the other hand, :option:`-z` names the compressed input (not output) file,
+which is then decompressed.  In either case, :option:`-o` can be used to
+output the reconstructed array resulting from lossy compression and
+decompression.
+
+So, to compress a file, use :code:`-i file.in -z file.zfp`.  To later
+decompress the file, use :code:`-z file.zfp -o file.out`.  A single dash
+"-" can be used in place of a file name to denote standard input or output.
+
+When reading uncompressed input, the scalar type must be specified using
+:option:`-f` (float) or :option:`-d` (double), or using :option:`-t`
+for integer-valued data.  In addition, the array dimensions must be specified
+using :option:`-1` (for 1D arrays), :option:`-2` (for 2D arrays),
+:option:`-3` (for 3D arrays), or :option:`-4` (for 4D arrays).
+For multidimensional arrays, *x* varies faster than *y*, which in turn
+varies faster than *z*, and so on.  That is, a 4D input file corresponding
+to a flattened C array :code:`a[nw][nz][ny][nx]` is specified as
+:code:`-4 nx ny nz nw`.
+
+.. note::
+  Note that :code:`-2 nx ny` is not equivalent to :code:`-3 nx ny 1`, even
+  though the same number of values are compressed.  One invokes the 2D codec,
+  while the other uses the 3D codec, which in this example has to pad the
+  input to an *nx* |times| *ny* |times| 4 array since arrays are partitioned
+  into blocks of dimensions |4powd|.  Such padding usually negatively impacts
+  compression.
+
+In addition to ensuring correct dimensionality, the order of dimensions
+also matters.  For instance, :code:`-2 nx ny` is not equivalent to
+:code:`-2 ny nx`, i.e., with the dimensions transposed.
+
+.. include:: disclaimer.inc
+
+Using :option:`-h`, the array dimensions and type are stored in a header of
+the compressed stream so that they do not have to be specified on the command
+line during decompression.  The header also stores compression parameters,
+which are described below.  The compressor and decompressor must agree on
+whether headers are used, and it is up to the user to enforce this.
+
+|zfpcmd| accepts several options for specifying how the data is to be
+compressed.  The most general of these, the :option:`-c` option, takes four
+constraint parameters that together can be used to achieve various effects.
+These constraints are::
+
+    minbits: the minimum number of bits used to represent a block
+    maxbits: the maximum number of bits used to represent a block
+    maxprec: the maximum number of bit planes encoded
+    minexp:  the smallest bit plane number encoded
+
+These parameters are discussed in detail in the section on
+:ref:`compression modes <modes>`.  Options :option:`-r`, :option:`-p`,
+and :option:`-a` provide a simpler interface to setting all of the above
+parameters by invoking
+:ref:`fixed-rate <mode-fixed-rate>` (:option:`-r`),
+:ref:`-precision <mode-fixed-precision>` (:option:`-p`), and
+:ref:`-accuracy <mode-fixed-accuracy>` (:option:`-a`) mode.
+:ref:`Reversible mode <mode-reversible>` for lossless compression is
+specified using :option:`-R`.
+
+Usage
+-----
+
+Below is a description of each command-line option accepted by |zfpcmd|.
+
+General options
+^^^^^^^^^^^^^^^
+
+.. option:: -h
+
+  Read/write array and compression parameters from/to compressed header.
+
+.. option:: -q
+
+  Quiet mode; suppress diagnostic output.
+
+.. option:: -s
+
+  Evaluate and print the following error statistics:
+
+  * rmse: The root mean square error.
+  * nrmse: The root mean square error normalized to the range.
+  * maxe: The maximum absolute pointwise error.
+  * psnr: The peak signal to noise ratio in decibels.
+
+Input and output
+^^^^^^^^^^^^^^^^
+
+.. option:: -i <path>
+
+  Name of uncompressed binary input file.  Use "-" for standard input.
+
+.. option:: -o <path>
+
+  Name of decompressed binary output file.  Use "-" for standard output.
+  May be used with either :option:`-i`, :option:`-z`, or both.
+
+.. option:: -z <path>
+
+  Name of compressed input (without :option:`-i`) or output file (with
+  :option:`-i`).  Use "-" for standard input or output.
+
+When :option:`-i` is specified, data is read from the corresponding
+uncompressed file, compressed, and written to the compressed file
+specified by :option:`-z` (when present).  Without :option:`-i`,
+compressed data is read from the file specified by :option:`-z`
+and decompressed.  In either case, the reconstructed data can be
+written to the file specified by :option:`-o`.
+
+Array type and dimensions
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. option:: -f
+
+  Single precision (float type).  Shorthand for :code:`-t f32`.
+
+.. option:: -d
+
+  Double precision (double type).  Shorthand for :code:`-t f64`.
+
+.. option:: -t <type>
+
+  Specify scalar type as one of i32, i64, f32, f64 for 32- or 64-bit
+  integer or floating scalar type.
+
+.. option:: -1 <nx>
+
+  Dimensions of 1D C array :code:`a[nx]`.
+
+.. option:: -2 <nx> <ny>
+
+  Dimensions of 2D C array :code:`a[ny][nx]`.
+
+.. option:: -3 <nx> <ny> <nz>
+
+  Dimensions of 3D C array :code:`a[nz][ny][nx]`.
+
+.. option:: -4 <nx> <ny> <nz> <nw>
+
+  Dimensions of 4D C array :code:`a[nw][nz][ny][nx]`.
+
+When :option:`-i` is used, the scalar type and array dimensions must be
+specified.  One of :option:`-f`, :option:`-d`, or :option:`-t` specifies
+the input scalar type.  :option:`-1`, :option:`-2`, :option:`-3`, or
+:option:`-4` specifies the array dimensions.  The same parameters must
+be given when decompressing data (without :option:`-i`), unless a header
+was stored using :option:`-h` during compression.
+
+Compression parameters
+^^^^^^^^^^^^^^^^^^^^^^
+
+One of the following :ref:`compression modes <modes>` must be selected.
+
+.. option:: -r <rate>
+
+  Specify fixed rate in terms of number of compressed bits per
+  integer or floating-point value.
+
+.. option:: -p <precision>
+
+  Specify fixed precision in terms of number of uncompressed bits per
+  value.
+
+.. option:: -a <tolerance>
+
+  Specify fixed accuracy in terms of absolute error tolerance.
+
+.. option:: -R
+
+  Reversible (lossless) mode.
+
+.. option:: -c <minbits> <maxbits> <maxprec> <minexp>
+
+  Specify expert mode parameters.
+
+When :option:`-i` is used, the compression parameters must be specified.
+The same parameters must be given when decompressing data (without
+:option:`-i`), unless a header was stored using :option:`-h` when
+compressing.  See the section on :ref:`compression modes <modes>` for a
+discussion of these parameters.
+
+Execution parameters
+^^^^^^^^^^^^^^^^^^^^
+
+.. option:: -x <policy>
+
+  Specify execution policy and parameters.  The default policy is
+  :code:`-x serial` for sequential execution.  To enable OpenMP parallel
+  compression, use the :code:`omp` policy.  Without parameters,
+  :code:`-x omp` selects OpenMP with default settings, which typically
+  implies maximum concurrency available.  Use :code:`-x omp=threads` to
+  request a specific number of threads (see also
+  :c:func:`zfp_stream_set_omp_threads`).  A thread count of zero is
+  ignored and results in the default number of threads.  Use
+  :code:`-x omp=threads,chunk_size` to specify the chunk size in number
+  of blocks (see also :c:func:`zfp_stream_set_omp_chunk_size`).  A
+  chunk size of zero is ignored and results in the default size.
+  Use :code:`-x cuda` to for parallel CUDA compression and decompression.
+
+As of |cudarelease|, the execution policy applies to both compression
+and decompression.  If the execution policy is not supported for
+decompression, then |zfp| will attempt to fall back on serial
+decompression.  This is done only when both compression and decompression
+are performed as part of a single execution, e.g., when specifying both
+:option:`-i` and :option:`-o`.
+
+Examples
+^^^^^^^^
+
+  * :code:`-i file` : read uncompressed file and compress to memory
+  * :code:`-z file` : read compressed file and decompress to memory
+  * :code:`-i ifile -z zfile` : read uncompressed ifile, write compressed zfile
+  * :code:`-z zfile -o ofile` : read compressed zfile, write decompressed ofile
+  * :code:`-i ifile -o ofile` : read ifile, compress, decompress, write ofile
+  * :code:`-i file -s` : read uncompressed file, compress to memory, print stats
+  * :code:`-i - -o - -s` : read stdin, compress, decompress, write stdout, print stats
+  * :code:`-f -3 100 100 100 -r 16` : 2x fixed-rate compression of 100 |times| 100 |times| 100 floats
+  * :code:`-d -1 1000000 -r 32` : 2x fixed-rate compression of 1,000,000 doubles
+  * :code:`-d -2 1000 1000 -p 32` : 32-bit precision compression of 1000 |times| 1000 doubles
+  * :code:`-d -1 1000000 -a 1e-9` : compression of 1,000,000 doubles with < 10\ :sup:`-9` max error
+  * :code:`-d -1 1000000 -c 64 64 0 -1074` : 4x fixed-rate compression of 1,000,000 doubles
+  * :code:`-x omp=16,256` : parallel compression with 16 threads, 256-block chunks
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 7cc76068..73137223 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,6 +1,14 @@
+add_executable(array array.cpp)
+target_compile_definitions(array PRIVATE ${zfp_compressed_array_defs})
+target_link_libraries(array zfp)
+
 add_executable(diffusion diffusion.cpp)
-target_link_libraries(diffusion zfp)
 target_compile_definitions(diffusion PRIVATE ${zfp_compressed_array_defs})
+if(ZFP_WITH_OPENMP)
+  target_link_libraries(diffusion zfp OpenMP::OpenMP_CXX)
+else()
+  target_link_libraries(diffusion zfp)
+endif()
 
 if(BUILD_CFP)
   add_executable(diffusionC diffusionC.c)
@@ -14,9 +22,18 @@ add_executable(iterator iterator.cpp)
 target_link_libraries(iterator zfp)
 target_compile_definitions(iterator PRIVATE ${zfp_compressed_array_defs})
 
+if(BUILD_CFP)
+  add_executable(iteratorC iteratorC.c)
+  target_link_libraries(iteratorC cfp)
+endif()
+
 add_executable(pgm pgm.c)
 target_link_libraries(pgm zfp)
 
+add_executable(ppm ppm.c)
+target_link_libraries(ppm zfp)
+target_compile_definitions(ppm PRIVATE ${ppm_private_defs})
+
 add_executable(simple simple.c)
 target_link_libraries(simple zfp)
 
@@ -24,6 +41,7 @@ add_executable(speed speed.c)
 target_link_libraries(speed zfp)
 
 if(HAVE_LIBM_MATH)
+  target_link_libraries(array m)
   target_link_libraries(diffusion m)
 
   if(BUILD_CFP)
@@ -32,5 +50,6 @@ if(HAVE_LIBM_MATH)
 
   target_link_libraries(inplace m)
   target_link_libraries(pgm m)
+  target_link_libraries(ppm m)
   target_link_libraries(simple m)
 endif()
diff --git a/examples/Makefile b/examples/Makefile
index bb44b1e1..0e288544 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,35 +1,62 @@
 include ../Config
 
 BINDIR = ../bin
-TARGETS = $(BINDIR)/diffusion\
+TARGETS = $(BINDIR)/array\
+	  $(BINDIR)/diffusion\
 	  $(BINDIR)/inplace\
 	  $(BINDIR)/iterator\
 	  $(BINDIR)/pgm\
+	  $(BINDIR)/ppm\
 	  $(BINDIR)/simple\
 	  $(BINDIR)/speed
+INCS = -I../include
 LIBS = -L../lib -lzfp
-CLIBS = $(LIBS) -lm
-CXXLIBS = $(LIBS)
+CLIBS = $(LIBS) $(LDFLAGS) -lm
+CXXLIBS = $(LIBS) $(LDFLAGS)
+
+# add cfp examples when BUILD_CFP is enabled
+ifneq ($(BUILD_CFP),0)
+  TARGETS += $(BINDIR)/diffusionC $(BINDIR)/iteratorC
+endif
+
 
 all: $(TARGETS)
 
+$(BINDIR)/array: array.cpp ../lib/$(LIBZFP)
+	$(CXX) $(CXXFLAGS) $(INCS) array.cpp $(CXXLIBS) -o $@
+
 $(BINDIR)/diffusion: diffusion.cpp ../lib/$(LIBZFP)
-	$(CXX) $(CXXFLAGS) -I../array diffusion.cpp $(CXXLIBS) -o $@
+	$(CXX) $(CXXFLAGS) $(INCS) diffusion.cpp $(CXXLIBS) -o $@
+
+$(BINDIR)/diffusionC: diffusionC.o ../lib/$(LIBZFP) ../lib/$(LIBCFP)
+	$(CXX) $(CXXFLAGS) diffusionC.o -lcfp $(CLIBS) -o $@
+
+diffusionC.o: diffusionC.c
+	$(CC) $(CFLAGS) $(INCS) -c diffusionC.c
 
 $(BINDIR)/inplace: inplace.c ../lib/$(LIBZFP)
-	$(CC) $(CFLAGS) inplace.c $(CLIBS) -o $@
+	$(CC) $(CFLAGS) $(INCS) inplace.c $(CLIBS) -o $@
 
 $(BINDIR)/iterator: iterator.cpp ../lib/$(LIBZFP)
-	$(CXX) $(CXXFLAGS) -I../array iterator.cpp $(CXXLIBS) -o $@
+	$(CXX) $(CXXFLAGS) $(INCS) iterator.cpp $(CXXLIBS) -o $@
+
+$(BINDIR)/iteratorC: iteratorC.o ../lib/$(LIBZFP) ../lib/$(LIBCFP)
+	$(CXX) $(CXXFLAGS) iteratorC.o -lcfp $(CLIBS) -o $@
+
+iteratorC.o: iteratorC.c
+	$(CC) $(CFLAGS) $(INCS) -c iteratorC.c
 
 $(BINDIR)/pgm: pgm.c ../lib/$(LIBZFP)
-	$(CC) $(CFLAGS) pgm.c $(CLIBS) -o $@
+	$(CC) $(CFLAGS) $(INCS) pgm.c $(CLIBS) -o $@
+
+$(BINDIR)/ppm: ppm.c ../lib/$(LIBZFP)
+	$(CC) $(CFLAGS) $(PPM_FLAGS) $(INCS) ppm.c $(CLIBS) -o $@
 
 $(BINDIR)/simple: simple.c ../lib/$(LIBZFP)
-	$(CC) $(CFLAGS) simple.c $(CLIBS) -o $@
+	$(CC) $(CFLAGS) $(INCS) simple.c $(CLIBS) -o $@
 
 $(BINDIR)/speed: speed.c ../lib/$(LIBZFP)
-	$(CC) $(CFLAGS) speed.c $(CLIBS) -o $@
+	$(CC) $(CFLAGS) $(INCS) speed.c $(CLIBS) -o $@
 
 clean:
-	rm -f $(TARGETS)
+	rm -f $(TARGETS) $(BINDIR)/diffusionC $(BINDIR)/iteratorC diffusionC.o iteratorC.o
diff --git a/examples/array.cpp b/examples/array.cpp
new file mode 100644
index 00000000..233cb36e
--- /dev/null
+++ b/examples/array.cpp
@@ -0,0 +1,42 @@
+// simple example that shows how to work with zfp's compressed-array classes
+
+#include <iostream>
+#include <vector>
+#include "zfp/array2.hpp"
+
+int main()
+{
+  // array dimensions (can be arbitrary) and zfp memory footprint
+  const size_t nx = 12;
+  const size_t ny = 8;
+  const double bits_per_value = 4.0;
+
+  // declare 2D arrays using STL and zfp
+  std::vector<double> vec(nx * ny);
+  zfp::array2<double> arr(nx, ny, bits_per_value);
+
+  // initialize arrays to linear ramp
+  for (size_t y = 0; y < ny; y++)
+    for (size_t x = 0; x < nx; x++)
+      arr(x, y) = vec[x + nx * y] = x + nx * y;
+
+  // alternative initialization of entire array, arr:
+  // arr.set(&vec[0]);
+
+  // optional: force compression of cached data
+  arr.flush_cache();
+
+  // print values
+  for (size_t y = 0; y < ny; y++)
+    for (size_t x = 0; x < nx; x++)
+      std::cout << vec[x + nx * y] << " " << arr(x, y) << std::endl;
+
+  // alternative using printf(); note the necessary cast:
+  // printf("%g %g\n", vec[x + nx * y], (double)arr(x, y));
+
+  // print storage size of payload data
+  std::cout << "vec bytes = " << vec.capacity() * sizeof(vec[0]) << std::endl;
+  std::cout << "zfp bytes = " << arr.size_bytes(ZFP_DATA_PAYLOAD) << std::endl;
+
+  return 0;
+}
diff --git a/examples/array2d.h b/examples/array2d.h
deleted file mode 100644
index 8ba7291c..00000000
--- a/examples/array2d.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef ARRAY2D_H
-#define ARRAY2D_H
-
-#include <climits>
-#include <vector>
-
-#define unused_(x) ((void)(x))
-
-typedef unsigned int uint;
-
-// uncompressed 2D double-precision array (for comparison)
-namespace raw {
-class array2d {
-public:
-  array2d() : nx(0), ny(0) {}
-  array2d(uint nx, uint ny, double rate = 0.0, const double* p = 0, size_t csize = 0) : nx(nx), ny(ny), data(nx * ny, 0.0)
-  {
-    unused_(rate);
-    unused_(p);
-    unused_(csize);
-  }
-  void resize(uint nx, uint ny) { this->nx = nx; this->ny = ny; data.resize(nx * ny, 0.0); }
-  size_t size() const { return data.size(); }
-  size_t size_x() const { return nx; }
-  size_t size_y() const { return ny; }
-  double rate() const { return CHAR_BIT * sizeof(double); }
-  size_t cache_size() const { return 0; }
-  double& operator()(uint x, uint y) { return data[x + nx * y]; }
-  const double& operator()(uint x, uint y) const { return data[x + nx * y]; }
-  double& operator[](uint i) { return data[i]; }
-  const double& operator[](uint i) const { return data[i]; }
-  class iterator {
-  public:
-    double& operator*() const { return array->operator[](index); }
-    iterator& operator++() { index++; return *this; }
-    iterator operator++(int) { iterator p = *this; index++; return p; }
-    bool operator==(const iterator& it) const { return array == it.array && index == it.index; }
-    bool operator!=(const iterator& it) const { return !operator==(it); }
-    uint i() const { return index % array->nx; }
-    uint j() const { return index / array->nx; }
-  protected:
-    friend class array2d;
-    iterator(array2d* array, uint index) : array(array), index(index) {}
-    array2d* array;
-    uint index;
-  };
-  iterator begin() { return iterator(this, 0); }
-  iterator end() { return iterator(this, nx * ny); }
-protected:
-  uint nx;
-  uint ny;
-  std::vector<double> data;
-};
-}
-
-#undef unused_
-
-#endif
diff --git a/examples/array2d.hpp b/examples/array2d.hpp
new file mode 100644
index 00000000..c349328b
--- /dev/null
+++ b/examples/array2d.hpp
@@ -0,0 +1,72 @@
+#ifndef ARRAY2D_HPP
+#define ARRAY2D_HPP
+
+#include <climits>
+#include <vector>
+
+typedef unsigned int uint;
+
+// uncompressed 2D double-precision array (for comparison)
+namespace raw {
+class array2d {
+public:
+  // constructors
+  array2d() : nx(0), ny(0) {}
+  array2d(size_t nx, size_t ny, double = 0.0, const double* = 0, size_t = 0) : nx(nx), ny(ny), data(nx * ny, 0.0) {}
+
+  // array size
+  size_t size() const { return data.size(); }
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  void resize(size_t nx, size_t ny) { this->nx = nx; this->ny = ny; data.resize(nx * ny, 0.0); }
+
+  // rate in bits/value
+  double rate() const { return CHAR_BIT * sizeof(double); }
+
+  // cache size in bytes
+  size_t cache_size() const { return 0; }
+
+  // byte size of data structures
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    if (mask & ZFP_DATA_PAYLOAD)
+      size += data.size() * sizeof(double);
+    return size;
+  }
+
+  // accessors
+  double& operator()(size_t x, size_t y) { return data[x + nx * y]; }
+  const double& operator()(size_t x, size_t y) const { return data[x + nx * y]; }
+  double& operator[](size_t index) { return data[index]; }
+  const double& operator[](size_t index) const { return data[index]; }
+
+  // minimal-functionality forward iterator
+  class iterator {
+  public:
+    double& operator*() const { return array->operator[](index); }
+    iterator& operator++() { index++; return *this; }
+    iterator operator++(int) { iterator p = *this; index++; return p; }
+    bool operator==(const iterator& it) const { return array == it.array && index == it.index; }
+    bool operator!=(const iterator& it) const { return !operator==(it); }
+    size_t i() const { return index % array->nx; }
+    size_t j() const { return index / array->nx; }
+  protected:
+    friend class array2d;
+    iterator(array2d* array, size_t index) : array(array), index(index) {}
+    array2d* array;
+    size_t index;
+  };
+
+  iterator begin() { return iterator(this, 0); }
+  iterator end() { return iterator(this, nx * ny); }
+
+protected:
+  size_t nx, ny;
+  std::vector<double> data;
+};
+}
+
+#endif
diff --git a/examples/diffusion.cpp b/examples/diffusion.cpp
index 82cc109d..a62f191e 100644
--- a/examples/diffusion.cpp
+++ b/examples/diffusion.cpp
@@ -6,19 +6,46 @@
 #include <cstdlib>
 #include <iomanip>
 #include <iostream>
-#include "zfparray2.h"
-#include "array2d.h"
+#include <sstream>
+#include "zfp/array2.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/codec/gencodec.hpp"
+#include "array2d.hpp"
+
+// add half precision if compiler supports it
+#define __STDC_WANT_IEC_60559_TYPES_EXT__
+#include <cfloat>
+#ifdef FLT16_MAX
+  #define WITH_HALF 1
+#else
+  #undef WITH_HALF
+#endif
 
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 
-#define unused_(x) ((void)(x))
+// uncompressed tiled arrays based on zfp generic codec
+namespace tiled {
+#if WITH_HALF
+  typedef zfp::array2< double, zfp::codec::generic2<double, _Float16> > array2h;
+#endif
+  typedef zfp::array2< double, zfp::codec::generic2<double, float> > array2f;
+  typedef zfp::array2< double, zfp::codec::generic2<double, double> > array2d;
+}
+
+// enumeration of uncompressed storage types
+enum storage_type {
+  type_none = 0,
+  type_half = 1,
+  type_float = 2,
+  type_double = 3
+};
 
 // constants used in the solution
 class Constants {
 public:
-  Constants(int nx, int ny, int nt) :
+  Constants(size_t nx, size_t ny, size_t nt) :
     nx(nx),
     ny(ny),
     nt(nt),
@@ -32,11 +59,11 @@ class Constants {
     pi(3.14159265358979323846)
   {}
 
-  int nx;        // grid points in x
-  int ny;        // grid points in y
-  int nt;        // number of time steps (0 for default)
-  int x0;        // x location of heat source
-  int y0;        // y location of heat source
+  size_t nx;     // grid points in x
+  size_t ny;     // grid points in y
+  size_t nt;     // number of time steps (0 for default)
+  size_t x0;     // x location of heat source
+  size_t y0;     // y location of heat source
   double k;      // diffusion constant
   double dx;     // grid spacing in x
   double dy;     // grid spacing in y
@@ -45,20 +72,31 @@ class Constants {
   double pi;     // 3.141...
 };
 
+// compute Laplacian uxx + uyy at (x, y)
 template <class array2d>
+inline double
+laplacian(const array2d& u, size_t x, size_t y, const Constants& c)
+{
+  double uxx = (u(x - 1, y) - 2 * u(x, y) + u(x + 1, y)) / (c.dx * c.dx);
+  double uyy = (u(x, y - 1) - 2 * u(x, y) + u(x, y + 1)) / (c.dy * c.dy);
+  return uxx + uyy;
+}
+
+template <class state, class scratch>
 inline void
-time_step_parallel(array2d& u, const Constants& c);
+time_step_parallel(state& u, scratch& v, const Constants& c);
 
+#ifdef _OPENMP
 // advance solution in parallel via thread-safe views
 template <>
 inline void
-time_step_parallel(zfp::array2d& u, const Constants& c)
+time_step_parallel(zfp::array2d& u, zfp::array2d& du, const Constants& c)
 {
-#ifdef _OPENMP
   // flush shared cache to ensure cache consistency across threads
   u.flush_cache();
+  // zero-initialize du
+  du.set(0);
   // compute du/dt in parallel
-  zfp::array2d du(c.nx, c.ny, u.rate(), 0, u.cache_size());
   #pragma omp parallel
   {
     // create read-only private view of entire array u
@@ -67,127 +105,177 @@ time_step_parallel(zfp::array2d& u, const Constants& c)
     zfp::array2d::private_view mydu(&du);
     mydu.partition(omp_get_thread_num(), omp_get_num_threads());
     // process rectangular region owned by this thread
-    for (uint j = 0; j < mydu.size_y(); j++) {
-      int y = mydu.global_y(j);
+    for (size_t j = 0; j < mydu.size_y(); j++) {
+      size_t y = mydu.global_y(j);
       if (1 <= y && y <= c.ny - 2)
-        for (uint i = 0; i < mydu.size_x(); i++) {
-          int x = mydu.global_x(i);
-          if (1 <= x && x <= c.nx - 2) {
-            double uxx = (myu(x - 1, y) - 2 * myu(x, y) + myu(x + 1, y)) / (c.dx * c.dx);
-            double uyy = (myu(x, y - 1) - 2 * myu(x, y) + myu(x, y + 1)) / (c.dy * c.dy);
-            mydu(i, j) = c.dt * c.k * (uxx + uyy);
-          }
+        for (size_t i = 0; i < mydu.size_x(); i++) {
+          size_t x = mydu.global_x(i);
+          if (1 <= x && x <= c.nx - 2)
+            mydu(i, j) = c.dt * c.k * laplacian(myu, x, y, c);
         }
     }
     // compress all private cached blocks to shared storage
     mydu.flush_cache();
   }
   // take forward Euler step in serial
-  for (uint i = 0; i < u.size(); i++)
+  for (size_t i = 0; i < u.size(); i++)
     u[i] += du[i];
+}
 #else
-  unused_(u);
-  unused_(c);
+// dummy template instantiation when OpenMP support is not available
+template <>
+inline void time_step_parallel(zfp::array2d&, zfp::array2d&, const Constants&) {}
+#endif
+
+// dummy template instantiations; never executed
+template <>
+inline void time_step_parallel(zfp::const_array2d&, raw::array2d&, const Constants&) {}
+template <>
+inline void time_step_parallel(raw::array2d&, raw::array2d&, const Constants&) {}
+template <>
+inline void time_step_parallel(tiled::array2d&, tiled::array2d&, const Constants&) {}
+template <>
+inline void time_step_parallel(tiled::array2f&, tiled::array2f&, const Constants&) {}
+#if WITH_HALF
+template <>
+inline void time_step_parallel(tiled::array2h&, tiled::array2h&, const Constants&) {}
 #endif
+
+// advance solution using integer array indices (generic implementation)
+template <class state, class scratch>
+inline void
+time_step_indexed(state& u, scratch& du, const Constants& c)
+{
+  // compute du/dt
+  for (size_t y = 1; y < c.ny - 1; y++)
+    for (size_t x = 1; x < c.nx - 1; x++)
+      du(x, y) = c.dt * c.k * laplacian(u, x, y, c);
+  // take forward Euler step
+  for (uint i = 0; i < u.size(); i++)
+    u[i] += du[i];
 }
 
-// dummy template instantiation; never executed
+// advance solution using integer array indices (read-only arrays)
 template <>
 inline void
-time_step_parallel(raw::array2d& u, const Constants& c)
+time_step_indexed(zfp::const_array2d& u, raw::array2d& v, const Constants& c)
 {
-  unused_(u);
-  unused_(c);
+  // initialize v as uncompressed copy of u
+  u.get(&v[0]);
+  // take forward Euler step v += (du/dt) dt
+  for (size_t y = 1; y < c.ny - 1; y++)
+    for (size_t x = 1; x < c.nx - 1; x++)
+      v(x, y) += c.dt * c.k * laplacian(u, x, y, c);
+  // update u with uncompressed copy v
+  u.set(&v[0]);
 }
 
-// advance solution using integer array indices
-template <class array2d>
+// advance solution using array iterators (generic implementation)
+template <class state, class scratch>
 inline void
-time_step_indexed(array2d& u, const Constants& c)
+time_step_iterated(state& u, scratch& du, const Constants& c)
 {
   // compute du/dt
-  array2d du(c.nx, c.ny, u.rate(), 0, u.cache_size());
-  for (int y = 1; y < c.ny - 1; y++) {
-    for (int x = 1; x < c.nx - 1; x++) {
-      double uxx = (u(x - 1, y) - 2 * u(x, y) + u(x + 1, y)) / (c.dx * c.dx);
-      double uyy = (u(x, y - 1) - 2 * u(x, y) + u(x, y + 1)) / (c.dy * c.dy);
-      du(x, y) = c.dt * c.k * (uxx + uyy);
-    }
+  for (typename scratch::iterator q = du.begin(); q != du.end(); q++) {
+    size_t x = q.i();
+    size_t y = q.j();
+    if (1 <= x && x <= c.nx - 2 &&
+        1 <= y && y <= c.ny - 2)
+      *q = c.dt * c.k * laplacian(u, x, y, c);
   }
   // take forward Euler step
-  for (uint i = 0; i < u.size(); i++)
-    u[i] += du[i];
+  for (typename state::iterator p = u.begin(); p != u.end(); p++)
+    *p += du(p.i(), p.j());
 }
 
-// advance solution using array iterators
-template <class array2d>
+// advance solution using array iterators (read-only arrays)
+template <>
 inline void
-time_step_iterated(array2d& u, const Constants& c)
+time_step_iterated(zfp::const_array2d& u, raw::array2d& v, const Constants& c)
 {
-  // compute du/dt
-  array2d du(c.nx, c.ny, u.rate(), 0, u.cache_size());
-  for (typename array2d::iterator p = du.begin(); p != du.end(); p++) {
-    int x = p.i();
-    int y = p.j();
+  // initialize v as uncompressed copy of u
+  u.get(&v[0]);
+  // take forward Euler step v += (du/dt) dt
+  for (raw::array2d::iterator q = v.begin(); q != v.end(); q++) {
+    size_t x = q.i();
+    size_t y = q.j();
     if (1 <= x && x <= c.nx - 2 &&
-        1 <= y && y <= c.ny - 2) {
-      double uxx = (u(x - 1, y) - 2 * u(x, y) + u(x + 1, y)) / (c.dx * c.dx);
-      double uyy = (u(x, y - 1) - 2 * u(x, y) + u(x, y + 1)) / (c.dy * c.dy);
-      *p = c.dt * c.k * (uxx + uyy);
-    }
+        1 <= y && y <= c.ny - 2)
+      *q += c.dt * c.k * laplacian(u, x, y, c);
   }
-  // take forward Euler step
-  for (typename array2d::iterator p = u.begin(), q = du.begin(); p != u.end(); p++, q++)
-    *p += *q;
+  // update u with uncompressed copy v
+  u.set(&v[0]);
 }
 
-// solve heat equation using 
-template <class array2d>
-inline double
-solve(array2d& u, const Constants& c, bool iterator, bool parallel)
+// set initial conditions with a point heat source (u is assumed zero-initialized)
+template <class state, class scratch>
+inline void
+initialize(state& u, scratch&, const Constants& c)
 {
-  // initialize u with point heat source (u is assumed to be zero initialized)
   u(c.x0, c.y0) = 1;
+}
+
+// set initial conditions for const_array; requires updating the whole array
+template <>
+inline void
+initialize(zfp::const_array2d& u, raw::array2d& v, const Constants& c)
+{
+  v(c.x0, c.y0) = 1;
+  u.set(&v[0]);
+}
+
+// solve heat equation
+template <class state, class scratch>
+inline double
+solve(state& u, scratch& v, const Constants& c, bool iterator, bool parallel)
+{
+  // initialize u with point heat source
+  initialize(u, v, c);
 
   // iterate until final time
   double t;
   for (t = 0; t < c.tfinal; t += c.dt) {
-    std::cerr << "t=" << std::setprecision(6) << std::fixed << t << std::endl;
+    // print time and effective rate
+    double rate = double(u.size_bytes(ZFP_DATA_PAYLOAD)) * CHAR_BIT / u.size();
+    double rest = double(u.size_bytes(ZFP_DATA_ALL ^ ZFP_DATA_PAYLOAD) * CHAR_BIT / u.size());
+    std::cerr << "time=" << std::setprecision(6) << std::fixed << t << " ";
+    std::cerr << "rate=" << std::setprecision(3) << std::fixed << rate << " (+" << rest << ")" << std::endl;
+    // advance solution one time step
     if (parallel)
-      time_step_parallel(u, c);
+      time_step_parallel(u, v, c);
     else if (iterator)
-      time_step_iterated(u, c);
+      time_step_iterated(u, v, c);
     else
-      time_step_indexed(u, c);
+      time_step_indexed(u, v, c);
   }
 
   return t;
 }
 
 // compute sum of array values
-template <class array2d>
+template <class state>
 inline double
-total(const array2d& u)
+total(const state& u)
 {
   double s = 0;
-  const int nx = u.size_x();
-  const int ny = u.size_y();
-  for (int y = 1; y < ny - 1; y++)
-    for (int x = 1; x < nx - 1; x++)
+  const size_t nx = u.size_x();
+  const size_t ny = u.size_y();
+  for (size_t y = 1; y < ny - 1; y++)
+    for (size_t x = 1; x < nx - 1; x++)
       s += u(x, y);
   return s;
 }
 
 // compute root mean square error with respect to exact solution
-template <class array2d>
+template <class state>
 inline double
-error(const array2d& u, const Constants& c, double t)
+error(const state& u, const Constants& c, double t)
 {
   double e = 0;
-  for (int y = 1; y < c.ny - 1; y++) {
-    double py = c.dy * (y - c.y0);
-    for (int x = 1; x < c.nx - 1; x++) {
-      double px = c.dx * (x - c.x0);
+  for (size_t y = 1; y < c.ny - 1; y++) {
+    double py = c.dy * ((int)y - (int)c.y0);
+    for (size_t x = 1; x < c.nx - 1; x++) {
+      double px = c.dx * ((int)x - (int)c.x0);
       double f = u(x, y);
       double g = c.dx * c.dy * std::exp(-(px * px + py * py) / (4 * c.k * t)) / (4 * c.pi * c.k * t);
       e += (f - g) * (f - g);
@@ -196,93 +284,195 @@ error(const array2d& u, const Constants& c, double t)
   return std::sqrt(e / ((c.nx - 2) * (c.ny - 2)));
 }
 
+// execute solver and evaluate error
+template <class state, class scratch>
+inline void
+execute(state& u, scratch& v, size_t nt, bool iterator, bool parallel)
+{
+  Constants c(u.size_x(), u.size_y(), nt);
+  double t = solve(u, v, c, iterator, parallel);
+  double sum = total(u);
+  double err = error(u, c, t);
+  std::cerr.unsetf(std::ios::fixed);
+  std::cerr << "sum=" << std::setprecision(6) << std::fixed << sum << " error=" << std::setprecision(6) << std::scientific << err << std::endl;
+}
+
+// print usage information
 inline int
 usage()
 {
   std::cerr << "Usage: diffusion [options]" << std::endl;
   std::cerr << "Options:" << std::endl;
+  std::cerr << "-a <tolerance> : use compressed arrays with given absolute error tolerance" << std::endl;
+  std::cerr << "-b <blocks> : use 'blocks' 4x4 blocks of cache" << std::endl;
+  std::cerr << "-c : use read-only compressed arrays" << std::endl;
+  std::cerr << "-d : use double-precision tiled arrays" << std::endl;
+  std::cerr << "-f : use single-precision tiled arrays" << std::endl;
+#if WITH_HALF
+  std::cerr << "-h : use half-precision tiled arrays" << std::endl;
+#endif
   std::cerr << "-i : traverse arrays using iterators" << std::endl;
-  std::cerr << "-n <nx> <ny> : number of grid points" << std::endl;
 #ifdef _OPENMP
-  std::cerr << "-p : use multithreading (only with compressed arrays)" << std::endl;
+  std::cerr << "-j : use multithreading (only with compressed arrays)" << std::endl;
 #endif
+  std::cerr << "-n <nx> <ny> : number of grid points" << std::endl;
+  std::cerr << "-p <precision> : use compressed arrays with given precision" << std::endl;
+  std::cerr << "-r <rate> : use compressed arrays with given compressed bits/value" << std::endl;
+  std::cerr << "-R : use compressed arrays with lossless compression" << std::endl;
   std::cerr << "-t <nt> : number of time steps" << std::endl;
-  std::cerr << "-r <rate> : use compressed arrays with 'rate' bits/value" << std::endl;
-  std::cerr << "-c <blocks> : use 'blocks' 4x4 blocks of cache" << std::endl;
   return EXIT_FAILURE;
 }
 
 int main(int argc, char* argv[])
 {
-  int nx = 100;
-  int ny = 100;
-  int nt = 0;
-  double rate = 64;
+  size_t nx = 128;
+  size_t ny = 128;
+  size_t nt = 0;
+  size_t cache_size = 0;
+  zfp_config config = zfp_config_none();
   bool iterator = false;
-  bool compression = false;
   bool parallel = false;
-  int cache = 0;
+  bool writable = true;
+  storage_type type = type_none;
 
   // parse command-line options
   for (int i = 1; i < argc; i++)
-    if (std::string(argv[i]) == "-i")
-      iterator = true;
-    else if (std::string(argv[i]) == "-n") {
-      if (++i == argc || sscanf(argv[i], "%i", &nx) != 1 ||
-          ++i == argc || sscanf(argv[i], "%i", &ny) != 1)
+    if (std::string(argv[i]) == "-a") {
+      double tolerance;
+      if (++i == argc || sscanf(argv[i], "%lf", &tolerance) != 1)
         return usage();
+      config = zfp_config_accuracy(tolerance);
     }
+    else if (std::string(argv[i]) == "-b") {
+      if (++i == argc || (std::istringstream(argv[i]) >> cache_size).fail())
+        return usage();
+      cache_size *= 4 * 4 * sizeof(double);
+    }
+    else if (std::string(argv[i]) == "-c")
+      writable = false;
+    else if (std::string(argv[i]) == "-d")
+      type = type_double;
+    else if (std::string(argv[i]) == "-f")
+      type = type_float;
+#if WITH_HALF
+    else if (std::string(argv[i]) == "-h")
+      type = type_half;
+#endif
+    else if (std::string(argv[i]) == "-i")
+      iterator = true;
 #ifdef _OPENMP
-    else if (std::string(argv[i]) == "-p")
+    else if (std::string(argv[i]) == "-j")
       parallel = true;
 #endif
-    else if (std::string(argv[i]) == "-t") {
-      if (++i == argc || sscanf(argv[i], "%i", &nt) != 1)
+    else if (std::string(argv[i]) == "-n") {
+      if (++i == argc || (std::istringstream(argv[i]) >> nx).fail() ||
+          ++i == argc || (std::istringstream(argv[i]) >> ny).fail())
+        return usage();
+    }
+    else if (std::string(argv[i]) == "-p") {
+      uint precision;
+      if (++i == argc || sscanf(argv[i], "%u", &precision) != 1)
         return usage();
+      config = zfp_config_precision(precision);
     }
     else if (std::string(argv[i]) == "-r") {
+      double rate;
       if (++i == argc || sscanf(argv[i], "%lf", &rate) != 1)
         return usage();
-      compression = true;
+      config = zfp_config_rate(rate, false);
     }
-    else if (std::string(argv[i]) == "-c") {
-      if (++i == argc || sscanf(argv[i], "%i", &cache) != 1)
+    else if (std::string(argv[i]) == "-R")
+      config = zfp_config_reversible();
+    else if (std::string(argv[i]) == "-t") {
+      if (++i == argc || (std::istringstream(argv[i]) >> nt).fail())
         return usage();
     }
     else
       return usage();
 
+  bool compression = (config.mode != zfp_mode_null);
+
+  // sanity check command-line arguments
   if (parallel && !compression) {
     fprintf(stderr, "multithreading requires compressed arrays\n");
     return EXIT_FAILURE;
   }
+  if (parallel && !writable) {
+    fprintf(stderr, "multithreading requires read-write arrays\n");
+    return EXIT_FAILURE;
+  }
   if (parallel && iterator) {
     fprintf(stderr, "multithreading does not support iterators\n");
     return EXIT_FAILURE;
   }
+  if (compression && writable && config.mode != zfp_mode_fixed_rate) {
+    fprintf(stderr, "compression mode requires read-only arrays (-c)\n");
+    return EXIT_FAILURE;
+  }
+  if (!writable && !compression) {
+    fprintf(stderr, "read-only arrays require compression parameters\n");
+    return EXIT_FAILURE;
+  }
+  if (compression && type != type_none) {
+    fprintf(stderr, "tiled arrays do not support compression parameters\n");
+    return EXIT_FAILURE;
+  }
 
-  Constants c(nx, ny, nt);
+  // if unspecified, set cache size to two layers of blocks
+  if (!cache_size)
+    cache_size = 2 * 4 * nx * sizeof(double);
 
-  double sum;
-  double err;
+  // solve problem
   if (compression) {
-    // solve problem using compressed arrays
-    zfp::array2d u(nx, ny, rate, 0, cache * 4 * 4 * sizeof(double));
-    rate = u.rate();
-    double t = solve(u, c, iterator, parallel);
-    sum = total(u);
-    err = error(u, c, t);
+    // use compressed arrays
+    if (writable) {
+      // use read-write fixed-rate arrays
+      zfp::array2d u(nx, ny, config.arg.rate, 0, cache_size);
+      zfp::array2d v(nx, ny, config.arg.rate, 0, cache_size);
+      execute(u, v, nt, iterator, parallel);
+    }
+    else {
+      // use read-only variable-rate arrays
+      zfp::const_array2d u(nx, ny, config, 0, cache_size);
+      raw::array2d v(nx, ny);
+      execute(u, v, nt, iterator, parallel);
+    }
   }
   else {
-    // solve problem using uncompressed arrays
-    raw::array2d u(nx, ny);
-    double t = solve(u, c, iterator, parallel);
-    sum = total(u);
-    err = error(u, c, t);
+    // use uncompressed arrays
+    switch (type) {
+#if WITH_HALF
+      case type_half: {
+          // use zfp generic codec with tiled half-precision storage
+          tiled::array2h u(nx, ny, sizeof(__fp16) * CHAR_BIT, 0, cache_size);
+          tiled::array2h v(nx, ny, sizeof(__fp16) * CHAR_BIT, 0, cache_size);
+          execute(u, v, nt, iterator, parallel);
+        }
+        break;
+#endif
+      case type_float: {
+          // use zfp generic codec with tiled single-precision storage
+          tiled::array2f u(nx, ny, sizeof(float) * CHAR_BIT, 0, cache_size);
+          tiled::array2f v(nx, ny, sizeof(float) * CHAR_BIT, 0, cache_size);
+          execute(u, v, nt, iterator, parallel);
+        }
+        break;
+      case type_double: {
+          // use zfp generic codec with tiled double-precision storage
+          tiled::array2d u(nx, ny, sizeof(double) * CHAR_BIT, 0, cache_size);
+          tiled::array2d v(nx, ny, sizeof(double) * CHAR_BIT, 0, cache_size);
+          execute(u, v, nt, iterator, parallel);
+        }
+        break;
+      default: {
+          // use uncompressed array with row-major double-precision storage
+          raw::array2d u(nx, ny, sizeof(double) * CHAR_BIT);
+          raw::array2d v(nx, ny, sizeof(double) * CHAR_BIT);
+          execute(u, v, nt, iterator, parallel);
+        }
+        break;
+    }
   }
 
-  std::cerr.unsetf(std::ios::fixed);
-  std::cerr << "rate=" << rate << " sum=" << std::fixed << sum << " error=" << std::setprecision(6) << std::scientific << err << std::endl;
-
   return 0;
 }
diff --git a/examples/diffusionC.c b/examples/diffusionC.c
index 99a5c3db..3a2ac6ab 100644
--- a/examples/diffusionC.c
+++ b/examples/diffusionC.c
@@ -1,28 +1,30 @@
-// forward Euler finite difference solution to the heat equation on a 2D grid
-// (ported to C, from diffusion.cpp)
+/*
+forward Euler finite difference solution to the heat equation on a 2D grid
+(ported to C, from diffusion.cpp)
+*/
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
+#include "zfp/array.h"
 
-#include "cfparrays.h"
 #define _ (CFP_NAMESPACE.array2d)
 
 #define MAX(x, y) (((nx) > (ny)) ? (nx) : (ny))
 
-// constants used in the solution
+/* constants used in the solution */
 typedef struct {
-  int nx;        // grid points in x
-  int ny;        // grid points in y
-  int nt;        // number of time steps (0 for default)
-  int x0;        // x location of heat source
-  int y0;        // y location of heat source
-  double k;      // diffusion constant
-  double dx;     // grid spacing in x
-  double dy;     // grid spacing in y
-  double dt;     // time step
-  double tfinal; // minimum time to run solution to
-  double pi;     // 3.141...
+  size_t nx;     /* grid points in x */
+  size_t ny;     /* grid points in y */
+  int nt;        /* number of time steps (0 for default) */
+  int x0;        /* x location of heat source */
+  int y0;        /* y location of heat source */
+  double k;      /* diffusion constant */
+  double dx;     /* grid spacing in x */
+  double dy;     /* grid spacing in y */
+  double dt;     /* time step */
+  double tfinal; /* minimum time to run solution to */
+  double pi;     /* 3.141... */
 } constants;
 
 void
@@ -41,13 +43,13 @@ init_constants(constants* c, int nx, int ny, int nt)
   c->pi = 3.14159265358979323846;
 }
 
-// advance solution using integer array indices
+/* advance solution using integer array indices */
 static void
-time_step_indexed_compressed(cfp_array2d* u, const constants* c)
+time_step_indexed_compressed(cfp_array2d u, const constants* c)
 {
-  // compute du/dt
-  cfp_array2d* du = _.ctor(c->nx, c->ny, _.rate(u), 0, _.cache_size(u));
-  int x, y;
+  /* compute du/dt */
+  cfp_array2d du = _.ctor(c->nx, c->ny, _.rate(u), 0, _.cache_size(u));
+  size_t i, x, y;
   for (y = 1; y < c->ny - 1; y++) {
     for (x = 1; x < c->nx - 1; x++) {
       double uxx = (_.get(u, x - 1, y) - 2 * _.get(u, x, y) + _.get(u, x + 1, y)) / (c->dx * c->dx);
@@ -55,10 +57,9 @@ time_step_indexed_compressed(cfp_array2d* u, const constants* c)
       _.set(du, x, y, c->dt * c->k * (uxx + uyy));
     }
   }
-  // take forward Euler step
-  uint i;
+  /* take forward Euler step */
   for (i = 0; i < _.size(u); i++) {
-    // u[i] += du[i]
+    /* u[i] += du[i] */
     double val = _.get_flat(u, i) + _.get_flat(du, i);
     _.set_flat(u, i, val);
   }
@@ -66,55 +67,84 @@ time_step_indexed_compressed(cfp_array2d* u, const constants* c)
   _.dtor(du);
 }
 
-// advance solution using integer array indices
+/* advance solution using array iterators */
+static void
+time_step_iterated_compressed(cfp_array2d u, const constants* c)
+{
+  /* compute du/dt */
+  cfp_array2d du = _.ctor(c->nx, c->ny, _.rate(u), 0, _.cache_size(u));
+  cfp_iter2d p, q;
+  for (q = _.begin(du); _.iterator.neq(q, _.end(du)); q = _.iterator.inc(q)) {
+    size_t x = _.iterator.i(q);
+    size_t y = _.iterator.j(q);
+    if (1 <= x && x <= c->nx - 2 &&
+        1 <= y && y <= c->ny - 2) {
+      double uxx = (_.get(u, x - 1, y) - 2 * _.get(u, x, y) + _.get(u, x + 1, y)) / (c->dx * c->dx);
+      double uyy = (_.get(u, x, y - 1) - 2 * _.get(u, x, y) + _.get(u, x, y + 1)) / (c->dy * c->dy);
+      _.iterator.set(q, c->dt * c->k * (uxx + uyy));
+    }
+  }
+  /* take forward Euler step */
+  for (p = _.begin(u), q = _.begin(du); _.iterator.neq(p, _.end(u)); p = _.iterator.inc(p), q = _.iterator.inc(q)) {
+    /* u[i] += du[i] */
+    double val = _.iterator.get(p) + _.iterator.get(q);
+    _.iterator.set(p, val);
+  }
+
+  _.dtor(du);
+}
+
+/* advance solution using integer array indices */
 static void
 time_step_indexed(double* u, const constants* c)
 {
-  // compute du/dt
+  /* compute du/dt */
   double* du = calloc(c->nx * c->ny, sizeof(double));
-  int x, y;
-  for (y = 1; y < c->ny - 1; y++) {
+  size_t i, x, y;
+  for (y = 1; y < c->ny - 1; y++)
     for (x = 1; x < c->nx - 1; x++) {
-      double uxx = (u[y*c->nx + (x - 1)] - 2 * u[y*c->nx + x] + u[y*c->nx + (x + 1)]) / (c->dx * c->dx);
-      double uyy = (u[(y - 1)*c->nx + x] - 2 * u[y*c->nx + x] + u[(y + 1)*c->nx + x]) / (c->dy * c->dy);
-      du[y*c->nx + x] = c->dt * c->k * (uxx + uyy);
+      double uxx = (u[(x - 1) + c->nx * y] - 2 * u[x + c->nx * y] + u[(x + 1) + c->nx * y]) / (c->dx * c->dx);
+      double uyy = (u[x + c->nx * (y - 1)] - 2 * u[x + c->nx * y] + u[x + c->nx * (y + 1)]) / (c->dy * c->dy);
+      du[x + c->nx * y] = c->dt * c->k * (uxx + uyy);
     }
-  }
-  // take forward Euler step
-  uint i;
-  for (i = 0; i < (c->nx * c->ny); i++) {
-    // u[i] += du[i]
+  /* take forward Euler step */
+  for (i = 0; i < c->nx * c->ny; i++)
     u[i] += du[i];
-  }
 
   free(du);
 }
 
-// solve heat equation using 
+/* solve heat equation using compressed arrays */
 static double
-solve_compressed(cfp_array2d* u, const constants* c)
+solve_compressed(cfp_array2d u, const constants* c, zfp_bool iterator)
 {
-  // initialize u with point heat source (u is assumed to be zero initialized)
+  double t;
+
+  /* initialize u with point heat source (u is assumed to be zero initialized) */
   _.set(u, c->x0, c->y0, 1);
 
-  // iterate until final time
-  double t;
+  /* iterate until final time */
   for (t = 0; t < c->tfinal; t += c->dt) {
     fprintf(stderr, "t=%lf\n", t);
-    time_step_indexed_compressed(u, c);
+    if (iterator)
+      time_step_iterated_compressed(u, c);
+    else
+      time_step_indexed_compressed(u, c);
   }
 
   return t;
 }
 
+/* solve heat equation using uncompressed arrays */
 static double
 solve(double* u, const constants* c)
 {
-  // initialize u with point heat source (u is assumed to be zero initialized)
-  u[c->y0*c->nx + c->x0] = 1;
-
-  // iterate until final time
   double t;
+
+  /* initialize u with point heat source (u is assumed to be zero initialized) */
+  u[c->x0 + c->nx * c->y0] = 1;
+
+  /* iterate until final time */
   for (t = 0; t < c->tfinal; t += c->dt) {
     fprintf(stderr, "t=%lf\n", t);
     time_step_indexed(u, c);
@@ -123,42 +153,42 @@ solve(double* u, const constants* c)
   return t;
 }
 
-// compute sum of array values
+/* compute sum of array values */
 static double
-total_compressed(const cfp_array2d* u)
+total_compressed(const cfp_array2d u)
 {
   double s = 0;
-  const int nx = _.size_x(u);
-  const int ny = _.size_y(u);
-  int x, y;
+  const size_t nx = _.size_x(u);
+  const size_t ny = _.size_y(u);
+  size_t x, y;
   for (y = 1; y < ny - 1; y++)
     for (x = 1; x < nx - 1; x++)
       s += _.get(u, x, y);
   return s;
 }
 
-// compute sum of array values
+/* compute sum of array values */
 static double
-total(const double* u, const int nx, const int ny)
+total(const double* u, size_t nx, size_t ny)
 {
   double s = 0;
-  int x, y;
+  size_t x, y;
   for (y = 1; y < ny - 1; y++)
     for (x = 1; x < nx - 1; x++)
-      s += u[y*nx + x];
+      s += u[x + nx * y];
   return s;
 }
 
-// compute root mean square error with respect to exact solution
+/* compute root mean square error with respect to exact solution */
 static double
-error_compressed(const cfp_array2d* u, const constants* c, double t)
+error_compressed(const cfp_array2d u, const constants* c, double t)
 {
   double e = 0;
-  int x, y;
+  size_t x, y;
   for (y = 1; y < c->ny - 1; y++) {
-    double py = c->dy * (y - c->y0);
+    double py = c->dy * ((int)y - (int)c->y0);
     for (x = 1; x < c->nx - 1; x++) {
-      double px = c->dx * (x - c->x0);
+      double px = c->dx * ((int)x - (int)c->x0);
       double f = _.get(u, x, y);
       double g = c->dx * c->dy * exp(-(px * px + py * py) / (4 * c->k * t)) / (4 * c->pi * c->k * t);
       e += (f - g) * (f - g);
@@ -167,17 +197,17 @@ error_compressed(const cfp_array2d* u, const constants* c, double t)
   return sqrt(e / ((c->nx - 2) * (c->ny - 2)));
 }
 
-// compute root mean square error with respect to exact solution
+/* compute root mean square error with respect to exact solution */
 static double
 error(const double* u, const constants* c, double t)
 {
   double e = 0;
-  int x, y;
+  size_t x, y;
   for (y = 1; y < c->ny - 1; y++) {
-    double py = c->dy * (y - c->y0);
+    double py = c->dy * ((int)y - (int)c->y0);
     for (x = 1; x < c->nx - 1; x++) {
-      double px = c->dx * (x - c->x0);
-      double f = u[y*c->nx + x];
+      double px = c->dx * ((int)x - (int)c->x0);
+      double f = u[x + c->nx * y];
       double g = c->dx * c->dy * exp(-(px * px + py * py) / (4 * c->k * t)) / (4 * c->pi * c->k * t);
       e += (f - g) * (f - g);
     }
@@ -186,76 +216,82 @@ error(const double* u, const constants* c, double t)
 }
 
 static int
-usage()
+usage(void)
 {
   fprintf(stderr, "Usage: diffusionC [options]\n");
   fprintf(stderr, "Options:\n");
+  fprintf(stderr, "-b <blocks> : use 'blocks' 4x4 blocks of cache\n");
+  fprintf(stderr, "-i : traverse arrays using iterators\n");
   fprintf(stderr, "-n <nx> <ny> : number of grid points\n");
+  fprintf(stderr, "-r <rate> : use compressed arrays with given compressed bits/value\n");
   fprintf(stderr, "-t <nt> : number of time steps\n");
-  fprintf(stderr, "-r <rate> : use compressed arrays with 'rate' bits/value\n");
-  fprintf(stderr, "-c <blocks> : use 'blocks' 4x4 blocks of cache\n");
   return EXIT_FAILURE;
 }
 
 int main(int argc, char* argv[])
 {
-  int nx = 100;
-  int ny = 100;
+  int nx = 128;
+  int ny = 128;
   int nt = 0;
+  int cache_size = 0;
   double rate = 64;
-  int compression = 0;
-  int cache = 0;
+  zfp_bool iterator = zfp_false;
+  zfp_bool compression = zfp_false;
+  constants* c = 0;
+  double sum;
+  double err;
 
-  // parse command-line options
+  /* parse command-line options */
   int i;
   for (i = 1; i < argc; i++) {
     if (argv[i][0] != '-' || argv[i][2])
       return usage();
     switch(argv[i][1]) {
+      case 'b':
+        if (++i == argc || sscanf(argv[i], "%d", &cache_size) != 1)
+          return usage();
+        cache_size *= (int)(4 * 4 * sizeof(double));
+        break;
+      case 'i':
+        iterator = zfp_true;
+        break;
       case 'n':
         if (++i == argc || sscanf(argv[i], "%d", &nx) != 1 ||
             ++i == argc || sscanf(argv[i], "%d", &ny) != 1)
           return usage();
         break;
-      case 't':
-        if (++i == argc || sscanf(argv[i], "%d", &nt) != 1)
-          return usage();
-        break;
       case 'r':
         if (++i == argc || sscanf(argv[i], "%lf", &rate) != 1)
           return usage();
-        compression = 1;
+        compression = zfp_true;
         break;
-      case 'c':
-        if (++i == argc || sscanf(argv[i], "%d", &cache) != 1)
+      case 't':
+        if (++i == argc || sscanf(argv[i], "%d", &nt) != 1)
           return usage();
+        break;
+      default:
+        return usage();
     }
   }
 
-  constants* c = malloc(sizeof(constants));
+  c = malloc(sizeof(constants));
   init_constants(c, nx, ny, nt);
 
-  double sum;
-  double err;
   if (compression) {
-    // solve problem using compressed arrays
-    cfp_array2d* u = _.ctor(nx, ny, rate, 0, cache * 4 * 4 * sizeof(double));
-
-    rate = _.rate(u);
-    double t = solve_compressed(u, c);
+    /* solve problem using compressed arrays */
+    cfp_array2d u = _.ctor(nx, ny, rate, 0, cache_size);
+    double t = solve_compressed(u, c, iterator);
     sum = total_compressed(u);
     err = error_compressed(u, c, t);
-
+    rate = _.rate(u);
     _.dtor(u);
   }
   else {
-    // solve problem using primitive arrays
+    /* solve problem using primitive arrays */
     double* u = calloc(nx * ny, sizeof(double));
-
     double t = solve(u, c);
     sum = total(u, nx, ny);
     err = error(u, c, t);
-
     free(u);
   }
 
diff --git a/examples/inplace.c b/examples/inplace.c
index 3764166b..9516240d 100644
--- a/examples/inplace.c
+++ b/examples/inplace.c
@@ -46,7 +46,7 @@ process(double* buffer, uint blocks, double tolerance)
   ptr = buffer;
   for (i = 0; i < blocks; i++) {
     offset[i] = stream_wtell(stream);
-    bits = zfp_encode_block_double_2(zfp, ptr);
+    bits = (uint)zfp_encode_block_double_2(zfp, ptr);
     if (!bits) {
       fprintf(stderr, "compression failed\n");
       return 0;
diff --git a/examples/iterator.cpp b/examples/iterator.cpp
index 698692ff..94f907de 100644
--- a/examples/iterator.cpp
+++ b/examples/iterator.cpp
@@ -1,9 +1,9 @@
 #include <algorithm>
 #include <cstdlib>
 #include <iostream>
-#include "zfparray1.h"
-#include "zfparray2.h"
-#include "zfparray3.h"
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
 
 void print1(zfp::array1<double>::pointer p, size_t n)
 {
@@ -17,9 +17,9 @@ void print2(zfp::array2<double>::pointer p, size_t n)
     std::cout << *p++ << std::endl;
 }
 
-void print3(zfp::array1<double>::iterator begin, zfp::array1<double>::iterator end)
+void print3(zfp::array1<double>::const_iterator begin, zfp::array1<double>::const_iterator end)
 {
-  for (zfp::array1<double>::iterator p = begin; p != end; p++)
+  for (zfp::array1<double>::const_iterator p = begin; p != end; p++)
     std::cout << *p << std::endl;
 }
 
diff --git a/examples/iteratorC.c b/examples/iteratorC.c
new file mode 100644
index 00000000..93ef4725
--- /dev/null
+++ b/examples/iteratorC.c
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "zfp/array.h"
+
+void print1(cfp_ptr1d p, size_t n)
+{
+  size_t i;
+  const cfp_array1d_api _ = cfp.array1d;
+
+  for (i = 0; i < n; i++)
+    printf("%g\n", _.reference.get(_.pointer.ref_at(p, i)));
+}
+
+void print2(cfp_ptr2d p, size_t n)
+{
+  const cfp_array2d_api _ = cfp.array2d;
+
+  while (n--) {
+    printf("%g\n", _.reference.get(_.pointer.ref(p)));
+    p = _.pointer.inc(p);
+  }
+}
+
+void print3(cfp_iter1d begin, cfp_iter1d end)
+{
+  const cfp_array1d_api _ = cfp.array1d;
+  cfp_iter1d p;
+
+  for (p = begin; !_.iterator.eq(p, end); p = _.iterator.inc(p))
+    printf("%g\n", _.reference.get(_.iterator.ref(p)));
+}
+
+int main(void)
+{
+  const cfp_array1d_api _1d = cfp.array1d;
+  const cfp_array2d_api _2d = cfp.array2d;
+  const cfp_array3d_api _3d = cfp.array3d;
+  cfp_array1d v;
+  cfp_iter1d it1;
+  cfp_array2d a;
+  cfp_iter2d it2;
+  cfp_ptr2d pb2;
+  cfp_ptr2d pe2;
+  cfp_array3d b;
+  cfp_iter3d it3;
+  cfp_ptr3d pb3;
+  cfp_ptr3d pe3;
+  size_t i, j, k;
+
+  /* some fun with 1D arrays */
+  v = _1d.ctor(10, 64.0, 0, 0);
+  /* initialize and print array of random values */
+  for (it1 = _1d.begin(v); !_1d.iterator.eq(it1, _1d.end(v)); it1 = _1d.iterator.inc(it1))
+    _1d.reference.set(_1d.iterator.ref(it1), rand());
+  printf("random array\n");
+  print1(_1d.ptr(v, 0), _1d.size(v)); 
+  printf("\n");
+
+  /* some fun with 2D arrays */
+  a = _2d.ctor(5, 7, 64.0, 0, 0);
+  /* print array indices visited in block-order traversal*/
+  printf("block order (x, y) indices\n");
+  for (it2 = _2d.begin(a); !_2d.iterator.eq(it2, _2d.end(a)); it2 = _2d.iterator.inc(it2)) {
+    i = _2d.iterator.i(it2);
+    j = _2d.iterator.j(it2);
+    printf("(%lu, %lu)\n", (unsigned long)i, (unsigned long)j);
+    _2d.reference.set(_2d.iterator.ref(it2), i + 10 * j);
+  }
+  printf("\n");
+
+  /* print array contents in row-major order */
+  printf("row-major order yx indices\n");
+  print2(_2d.ptr_flat(a, 0), _2d.size(a));
+  printf("\n");
+  /* pointer arithmetic */
+  pb2 = _2d.reference.ptr(_2d.iterator.ref(_2d.begin(a)));
+  pe2 = _2d.reference.ptr(_2d.iterator.ref(_2d.end(a)));
+  printf("%lu * %lu = %ld\n", (unsigned long)_2d.size_x(a), (unsigned long)_2d.size_y(a), (long)_2d.pointer.distance(pb2, pe2));
+
+  /* some fun with 3D arrays */
+  b = _3d.ctor(7, 2, 5, 64.0, 0, 0);
+  /* print array indices visited in block-order traversal */
+  printf("block order (x, y, z) indices\n");
+  for (it3 = _3d.begin(b); !_3d.iterator.eq(it3, _3d.end(b)); it3 = _3d.iterator.inc(it3)) {
+    i = _3d.iterator.i(it3);
+    j = _3d.iterator.j(it3);
+    k = _3d.iterator.k(it3);
+    printf("(%lu, %lu, %lu)\n", (unsigned long)i, (unsigned long)j, (unsigned long)k);
+  }
+  printf("\n");
+  /* pointer arithmetic */
+  pb3 = _3d.reference.ptr(_3d.iterator.ref(_3d.begin(b)));
+  pe3 = _3d.reference.ptr(_3d.iterator.ref(_3d.end(b)));
+  printf("%lu * %lu * %lu = %ld\n", (unsigned long)_3d.size_x(b), (unsigned long)_3d.size_y(b), (unsigned long)_3d.size_z(b), (long)_3d.pointer.distance(pb3, pe3));
+
+  return 0;
+}
diff --git a/examples/pgm.c b/examples/pgm.c
index c23ecb2d..ce580dc7 100644
--- a/examples/pgm.c
+++ b/examples/pgm.c
@@ -60,7 +60,7 @@ int main(int argc, char* argv[])
   if (rate < 0)
     zfp_stream_set_precision(zfp, (uint)floor(0.5 - rate));
   else
-    zfp_stream_set_rate(zfp, rate, zfp_type_int32, 2, 0);
+    zfp_stream_set_rate(zfp, rate, zfp_type_int32, 2, zfp_false);
   bytes = zfp_stream_maximum_size(zfp, field);
   buffer = malloc(bytes);
   stream = stream_open(buffer, bytes);
diff --git a/examples/ppm.c b/examples/ppm.c
new file mode 100644
index 00000000..4b989a30
--- /dev/null
+++ b/examples/ppm.c
@@ -0,0 +1,390 @@
+/*
+This simple example shows how zfp can be used to compress 8-bit color images
+stored in the PPM image format.  This lossy compressor employs two common image
+compression strategies: (1) transformation to the YCoCg color space, which
+decorrelates color bands, and (2) chroma subsampling, which reduces spatial
+resolution in the Co and Cg chrominance bands.  The single command-line argument
+selects one of two compression modes: if a positive rate (in bits/pixel) is
+specified, fixed-rate mode is selected; a negative integer argument, -p, sets
+the precision to p in fixed-precision mode.  Rate allocation in fixed-rate mode
+assigns more bits to luma than to chroma components due to the relatively higher
+information content in luma after chroma subsampling.
+
+The YCoCg transform employed here has been adapted to avoid range expansion and
+potential overflow.  Chroma subsampling is achieved by performing zfp's forward
+decorrelating transform and then zeroing all but the four lowest-sequency
+coefficients, effectively reducing each chroma block to a bilinear approximation.
+
+Because only four chroma coefficients per 4x4 pixel block are retained, an
+alternative to zeroing and then encoding the remaining twelve zero-valued
+coefficients is to treat the chroma block as being one-dimensional, with only
+four values, and then compressing it using zfp's 1D codec.  The dimensionality
+of chroma blocks (1 or 2) is specified at compile time via the PPM_CHROMA macro.
+
+NOTE: To keep this example simple, only images whose dimensions are multiples
+of four are supported.
+*/
+
+#ifdef PPM_CHROMA
+  #if PPM_CHROMA != 1 && PPM_CHROMA != 2
+    #error "compile with PPM_CHROMA=1 or PPM_CHROMA=2"
+  #endif
+#else
+  /* default */
+  #define PPM_CHROMA 2
+#endif
+
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "zfp.h"
+
+/* clamp values to 31-bit range */
+static void
+clamp(int32* block, uint n)
+{
+  uint i;
+  for (i = 0; i < n; i++) {
+    if (block[i] < 1 - (1 << 30))
+      block[i] = 1 - (1 << 30);
+    if (block[i] > (1 << 30) - 1)
+      block[i] = (1 << 30) - 1;
+  }
+}
+
+/* convert 2D block from RGB to YCoCg color space */
+static void
+rgb2ycocg(int32 ycocg[3][16], /*const*/ int32 rgb[3][16])
+{
+  uint i;
+  for (i = 0; i < 16; i++) {
+    int32 r, g, b;
+    int32 y, co, cg, t;
+    /* fetch RGB values */
+    r = rgb[0][i];
+    g = rgb[1][i];
+    b = rgb[2][i];
+    /* perform range-preserving YCoCg forward transform */
+    co = (r - b) >> 1;
+    t = b + co;
+    cg = (g - t) >> 1;
+    y = t + cg;
+    /* store YCoCg values */
+    ycocg[0][i] = y;
+    ycocg[1][i] = co;
+    ycocg[2][i] = cg;
+  }
+}
+
+/* convert 2D block from YCoCg to RGB color space */
+static void
+ycocg2rgb(int32 rgb[3][16], /*const*/ int32 ycocg[3][16])
+{
+  uint i;
+  for (i = 0; i < 16; i++) {
+    int32 r, g, b;
+    int32 y, co, cg, t;
+    /* fetch YCoCg values */
+    y = ycocg[0][i];
+    co = ycocg[1][i];
+    cg = ycocg[2][i];
+    /* perform range-preserving YCoCg inverse transform */
+    t = y - cg;
+    g = (cg << 1) + t;
+    b = t - co;
+    r = (co << 1) + b;
+    /* store RGB values */
+    rgb[0][i] = r;
+    rgb[1][i] = g;
+    rgb[2][i] = b;
+  }
+}
+
+/* perform partial forward decorrelating transform */
+static void
+fwd_lift(int32* p, uint s)
+{
+  int32 x, y, z, w;
+  x = *p; p += s;
+  y = *p; p += s;
+  z = *p; p += s;
+  w = *p; p += s;
+
+  x += w; x >>= 1; w -= x;
+  z += y; z >>= 1; y -= z;
+  x += z; x >>= 1; z -= x;
+  w += y; w >>= 1; y -= w;
+  w += y >> 1; y -= w >> 1;
+
+  p -= s; *p = w;
+  p -= s; *p = z;
+  p -= s; *p = y;
+  p -= s; *p = x;
+}
+
+/* perform partial inverse decorrelating transform */
+static void
+inv_lift(int32* p, uint s)
+{
+  int32 x, y, z, w;
+  x = *p; p += s;
+  y = *p; p += s;
+  z = *p; p += s;
+  w = *p; p += s;
+
+  y += w >> 1; w -= y >> 1;
+  y += w; w <<= 1; w -= y;
+  z += x; x <<= 1; x -= z;
+  y += z; z <<= 1; z -= y;
+  w += x; x <<= 1; x -= w;
+
+  p -= s; *p = w;
+  p -= s; *p = z;
+  p -= s; *p = y;
+  p -= s; *p = x;
+}
+
+/* perform chroma subsampling by discarding high-frequency components */
+static void
+chroma_downsample(int32* block)
+{
+  uint i, j;
+  /* perform forward decorrelating transform */
+  for (j = 0; j < 4; j++)
+    fwd_lift(block + 4 * j, 1);
+  for (i = 0; i < 4; i++)
+    fwd_lift(block + 1 * i, 4);
+#if PPM_CHROMA == 1
+  /* keep only the four lowest-sequency coefficients */
+  block[2] = block[4];
+  block[3] = block[5];
+  for (i = 4; i < 16; i++)
+    block[i] = 0;
+  /* reconstruct as 1D block */
+  inv_lift(block, 1);
+  /* clamp values to 31 bits to avoid overflow */
+  clamp(block, 4);
+#else
+  /* zero out all but four lowest-sequency coefficients */
+  for (j = 0; j < 4; j++)
+    for (i = 0; i < 4; i++)
+      if (i >= 2 || j >= 2)
+        block[i + 4 * j] = 0;
+  /* perform inverse decorrelating transform */
+  for (i = 0; i < 4; i++)
+    inv_lift(block + 1 * i, 4);
+  for (j = 0; j < 4; j++)
+    inv_lift(block + 4 * j, 1);
+  /* clamp values to 31 bits to avoid overflow */
+  clamp(block, 16);
+#endif
+}
+
+/* reconstruct 2D chroma block */
+static void
+chroma_upsample(int32* block)
+{
+#if PPM_CHROMA == 1
+  uint i, j;
+  /* obtain 1D block coefficients */
+  fwd_lift(block, 1);
+  /* reorganize and initialize remaining 2D block coefficients */
+  block[4] = block[2];
+  block[5] = block[3];
+  block[2] = 0;
+  block[3] = 0;
+  for (i = 6; i < 16; i++)
+    block[i] = 0;
+  /* perform inverse decorrelating transform */
+  for (i = 0; i < 4; i++)
+    inv_lift(block + 1 * i, 4);
+  for (j = 0; j < 4; j++)
+    inv_lift(block + 4 * j, 1);
+  /* clamp values to 31 bits to avoid overflow */
+  clamp(block, 16);
+#else
+  /* clamp values to 31 bits to avoid overflow */
+  clamp(block, 16);
+#endif
+}
+
+int main(int argc, char* argv[])
+{
+  double rate = 0;
+  uint nx, ny;
+  uint x, y;
+  uint k;
+  char line[0x100];
+  uchar* image;
+  zfp_field* field;
+  zfp_stream* zfp[3];
+  bitstream* stream;
+  void* buffer;
+  size_t bytes;
+  size_t size;
+
+  switch (argc) {
+    case 2:
+      if (sscanf(argv[1], "%lf", &rate) != 1)
+        goto usage;
+      break;
+    default:
+    usage:
+      fprintf(stderr, "Usage: ppm <rate|-precision> <input.ppm >output.ppm\n");
+      return EXIT_FAILURE;
+  }
+
+  /* read ppm header */
+  if (!fgets(line, sizeof(line), stdin) || strcmp(line, "P6\n") ||
+      !fgets(line, sizeof(line), stdin) || sscanf(line, "%u%u", &nx, &ny) != 2 ||
+      !fgets(line, sizeof(line), stdin) || strcmp(line, "255\n")) {
+    fprintf(stderr, "error opening image\n");
+    return EXIT_FAILURE;
+  }
+  if ((nx & 3u) || (ny & 3u)) {
+    fprintf(stderr, "image dimensions must be multiples of four\n");
+    return EXIT_FAILURE;
+  }
+
+  /* read image data */
+  image = malloc(3 * nx * ny);
+  if (!image) {
+    fprintf(stderr, "error allocating memory\n");
+    return EXIT_FAILURE;
+  }
+  if (fread(image, sizeof(*image), 3 * nx * ny, stdin) != 3 * nx * ny) {
+    fprintf(stderr, "error reading image\n");
+    return EXIT_FAILURE;
+  }
+
+  /* initialize compressed streams */
+  for (k = 0; k < 3; k++)
+    zfp[k] = zfp_stream_open(NULL);
+  if (rate < 0) {
+    /* use fixed-precision mode */
+    for (k = 0; k < 3; k++)
+      zfp_stream_set_precision(zfp[k], (uint)floor(0.5 - rate));
+  }
+  else {
+    /* assign higher rate to luminance than to chrominance components */
+#if PPM_CHROMA == 1
+    double chroma_rate = floor(8 * rate / 3 + 0.5) / 4;
+    double luma_rate = rate - chroma_rate / 2;
+    zfp_stream_set_rate(zfp[0], luma_rate, zfp_type_int32, 2, zfp_false);
+    zfp_stream_set_rate(zfp[1], chroma_rate, zfp_type_int32, 1, zfp_false);
+    zfp_stream_set_rate(zfp[2], chroma_rate, zfp_type_int32, 1, zfp_false);
+#else
+    double chroma_rate = floor(8 * rate / 3 + 0.5) / 16;
+    double luma_rate = rate - 2 * chroma_rate;
+    zfp_stream_set_rate(zfp[0], luma_rate, zfp_type_int32, 2, zfp_false);
+    zfp_stream_set_rate(zfp[1], chroma_rate, zfp_type_int32, 2, zfp_false);
+    zfp_stream_set_rate(zfp[2], chroma_rate, zfp_type_int32, 2, zfp_false);
+#endif
+  }
+
+  /* determine size of compressed buffer */
+  bytes = 0;
+  field = zfp_field_2d(image, zfp_type_int32, nx, ny);
+  for (k = 0; k < 3; k++)
+    bytes += zfp_stream_maximum_size(zfp[k], field);
+  zfp_field_free(field);
+
+  /* allocate buffer and initialize bit stream */
+  buffer = malloc(bytes);
+  if (!buffer) {
+    fprintf(stderr, "error allocating memory\n");
+    return EXIT_FAILURE;
+  }
+  stream = stream_open(buffer, bytes);
+
+  /* the three zfp streams share a single bit stream */
+  for (k = 0; k < 3; k++)
+    zfp_stream_set_bit_stream(zfp[k], stream);
+
+  /* compress image */
+  for (y = 0; y < ny; y += 4)
+    for (x = 0; x < nx; x += 4) {
+      uchar block[3][16];
+      int32 rgb[3][16];
+      int32 ycocg[3][16];
+      uint i, j, k;
+      /* fetch R, G, and B blocks */
+      for (k = 0; k < 3; k++)
+        for (j = 0; j < 4; j++)
+          for (i = 0; i < 4; i++)
+            block[k][i + 4 * j] = image[k + 3 * (x + i + nx * (y + j))];
+      /* promote to 32-bit integers */
+      for (k = 0; k < 3; k++)
+        zfp_promote_uint8_to_int32(rgb[k], block[k], 2);
+      /* perform color space transform */
+      rgb2ycocg(ycocg, rgb);
+      /* chroma subsample the Co and Cg bands */
+      for (k = 1; k < 3; k++)
+        chroma_downsample(ycocg[k]);
+      /* compress the Y, Co, and Cg blocks */
+#if PPM_CHROMA == 1
+      zfp_encode_block_int32_2(zfp[0], ycocg[0]);
+      zfp_encode_block_int32_1(zfp[1], ycocg[1]);
+      zfp_encode_block_int32_1(zfp[2], ycocg[2]);
+#else
+      for (k = 0; k < 3; k++)
+        zfp_encode_block_int32_2(zfp[k], ycocg[k]);
+#endif
+    }
+
+  zfp_stream_flush(zfp[0]);
+  size = zfp_stream_compressed_size(zfp[0]);
+  fprintf(stderr, "%u compressed bytes (%.2f bits/pixel)\n", (uint)size, (double)size * CHAR_BIT / (nx * ny));
+
+  /* decompress image */
+  zfp_stream_rewind(zfp[0]);
+  for (y = 0; y < ny; y += 4)
+    for (x = 0; x < nx; x += 4) {
+      uchar block[3][16];
+      int32 rgb[3][16];
+      int32 ycocg[3][16];
+      uint i, j, k;
+      /* decompress the Y, Co, and Cg blocks */
+#if PPM_CHROMA == 1
+      zfp_decode_block_int32_2(zfp[0], ycocg[0]);
+      zfp_decode_block_int32_1(zfp[1], ycocg[1]);
+      zfp_decode_block_int32_1(zfp[2], ycocg[2]);
+#else
+      for (k = 0; k < 3; k++)
+        zfp_decode_block_int32_2(zfp[k], ycocg[k]);
+#endif
+      /* reconstruct Co and Cg chroma bands */
+      for (k = 1; k < 3; k++)
+        chroma_upsample(ycocg[k]);
+      /* perform color space transform */
+      ycocg2rgb(rgb, ycocg);
+      /* demote to 8-bit integers */
+      for (k = 0; k < 3; k++)
+        zfp_demote_int32_to_uint8(block[k], rgb[k], 2);
+      /* store R, G, and B blocks */
+      for (k = 0; k < 3; k++)
+        for (j = 0; j < 4; j++)
+          for (i = 0; i < 4; i++)
+            image[k + 3 * (x + i + nx * (y + j))] = block[k][i + 4 * j];
+    }
+
+  /* clean up */
+  for (k = 0; k < 3; k++)
+    zfp_stream_close(zfp[k]);
+  stream_close(stream);
+  free(buffer);
+
+  /* output reconstructed image */
+  printf("P6\n");
+  printf("%u %u\n", nx, ny);
+  printf("255\n");
+  if (fwrite(image, sizeof(*image), 3 * nx * ny, stdout) != 3 * nx * ny) {
+    fprintf(stderr, "error writing image\n");
+    return EXIT_FAILURE;
+  }
+  free(image);
+
+  return 0;
+}
diff --git a/examples/simple.c b/examples/simple.c
index 2ccb5977..d2261301 100644
--- a/examples/simple.c
+++ b/examples/simple.c
@@ -8,7 +8,7 @@
 
 /* compress or decompress array */
 static int
-compress(double* array, int nx, int ny, int nz, double tolerance, int decompress)
+compress(double* array, size_t nx, size_t ny, size_t nz, double tolerance, zfp_bool decompress)
 {
   int status = 0;    /* return value: 0 = success */
   zfp_type type;     /* array scalar type */
@@ -26,8 +26,9 @@ compress(double* array, int nx, int ny, int nz, double tolerance, int decompress
   /* allocate meta data for a compressed stream */
   zfp = zfp_stream_open(NULL);
 
-  /* set compression mode and parameters via one of three functions */
-/*  zfp_stream_set_rate(zfp, rate, type, 3, 0); */
+  /* set compression mode and parameters via one of four functions */
+/*  zfp_stream_set_reversible(zfp); */
+/*  zfp_stream_set_rate(zfp, rate, type, zfp_field_dimensionality(field), zfp_false); */
 /*  zfp_stream_set_precision(zfp, precision); */
   zfp_stream_set_accuracy(zfp, tolerance);
 
@@ -42,19 +43,21 @@ compress(double* array, int nx, int ny, int nz, double tolerance, int decompress
 
   /* compress or decompress entire array */
   if (decompress) {
-    /* read compressed stream and decompress array */
+    /* read compressed stream and decompress and output array */
     zfpsize = fread(buffer, 1, bufsize, stdin);
     if (!zfp_decompress(zfp, field)) {
       fprintf(stderr, "decompression failed\n");
-      status = 1;
+      status = EXIT_FAILURE;
     }
+    else
+      fwrite(array, sizeof(double), zfp_field_size(field, NULL), stdout);
   }
   else {
     /* compress array and output compressed stream */
     zfpsize = zfp_compress(zfp, field);
     if (!zfpsize) {
       fprintf(stderr, "compression failed\n");
-      status = 1;
+      status = EXIT_FAILURE;
     }
     else
       fwrite(buffer, 1, zfpsize, stdout);
@@ -73,17 +76,17 @@ compress(double* array, int nx, int ny, int nz, double tolerance, int decompress
 int main(int argc, char* argv[])
 {
   /* use -d to decompress rather than compress data */
-  int decompress = (argc == 2 && !strcmp(argv[1], "-d"));
+  zfp_bool decompress = (argc == 2 && !strcmp(argv[1], "-d"));
 
   /* allocate 100x100x100 array of doubles */
-  int nx = 100;
-  int ny = 100;
-  int nz = 100;
+  size_t nx = 100;
+  size_t ny = 100;
+  size_t nz = 100;
   double* array = malloc(nx * ny * nz * sizeof(double));
 
   if (!decompress) {
     /* initialize array to be compressed */
-    int i, j, k;
+    size_t i, j, k;
     for (k = 0; k < nz; k++)
       for (j = 0; j < ny; j++)
         for (i = 0; i < nx; i++) {
diff --git a/examples/speed.c b/examples/speed.c
index 9332605d..e75f4285 100644
--- a/examples/speed.c
+++ b/examples/speed.c
@@ -103,7 +103,7 @@ int main(int argc, char* argv[])
 
   /* allocate storage for compressed bit stream */
   zfp = zfp_stream_open(NULL);
-  zfp_stream_set_rate(zfp, rate, zfp_field_type(field), zfp_field_dimensionality(field), 0);
+  zfp_stream_set_rate(zfp, rate, zfp_field_type(field), zfp_field_dimensionality(field), zfp_false);
   bytes = zfp_stream_maximum_size(zfp, field);
   buffer = malloc(bytes);
   stream = stream_open(buffer, bytes);
diff --git a/fortran/CMakeLists.txt b/fortran/CMakeLists.txt
index 22381df4..9c376348 100644
--- a/fortran/CMakeLists.txt
+++ b/fortran/CMakeLists.txt
@@ -13,5 +13,19 @@ set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/modules)
 set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} ${bounds}")
 set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${dialect}")
 
-add_library(zFORp zfp.f)
+add_library(zFORp zfp.f90)
 target_link_libraries(zFORp PRIVATE zfp)
+
+set_property(TARGET zFORp PROPERTY VERSION ${ZFP_VERSION})
+set_property(TARGET zFORp PROPERTY SOVERSION ${ZFP_VERSION_MAJOR})
+set_property(TARGET zFORp PROPERTY OUTPUT_NAME ${ZFP_LIBRARY_PREFIX}zFORp)
+
+# install location for module file
+install(FILES ${CMAKE_Fortran_MODULE_DIRECTORY}/zfp.mod
+  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+
+# install location for library
+install(TARGETS zFORp EXPORT cFORp-targets
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/fortran/Makefile b/fortran/Makefile
index 229bf42c..9e514868 100644
--- a/fortran/Makefile
+++ b/fortran/Makefile
@@ -1,14 +1,16 @@
 include ../Config
 
+.SUFFIXES: .f90
+
 LIBDIR = ../lib
 MODDIR = ../modules
-TARGETS = $(LIBDIR)/libzFORp.a $(LIBDIR)/libzFORp.so $(MODDIR)/zforp_module.mod
+TARGETS = $(LIBDIR)/libzFORp.a $(LIBDIR)/libzFORp.so $(MODDIR)/zfp.mod
 OBJECTS = zfp.o
-MODULES = zforp_module.mod
+MODULES = zfp.mod
 
-static: $(LIBDIR)/libzFORp.a $(MODDIR)/zforp_module.mod
+static: $(LIBDIR)/libzFORp.a $(MODDIR)/zforp.mod
 
-shared: $(LIBDIR)/libzFORp.so $(MODDIR)/zforp_module.mod
+shared: $(LIBDIR)/libzFORp.so $(MODDIR)/zforp.mod
 
 clean:
 	rm -f $(TARGETS) $(OBJECTS)
@@ -22,9 +24,9 @@ $(LIBDIR)/libzFORp.so: $(OBJECTS)
 	mkdir -p $(LIBDIR)
 	$(FC) $(FFLAGS) -shared $^ -o $@
 
-$(MODDIR)/zforp_module.mod: $(OBJECTS)
+$(MODDIR)/zforp.mod: $(OBJECTS)
 	mkdir -p $(MODDIR)
-	mv zforp_module.mod $(MODDIR)
+	mv $(MODULES) $(MODDIR)
 
-.f.o:
+.f90.o:
 	$(FC) $(FFLAGS) -c $<
diff --git a/fortran/zfp.f b/fortran/zfp.f90
similarity index 78%
rename from fortran/zfp.f
rename to fortran/zfp.f90
index 3ce9563c..ae571468 100644
--- a/fortran/zfp.f
+++ b/fortran/zfp.f90
@@ -1,6 +1,6 @@
-module zFORp_module
+module zfp
 
-  use, intrinsic :: iso_c_binding, only: c_int, c_int64_t, c_size_t, c_double, c_ptr, c_null_ptr, c_loc
+  use, intrinsic :: iso_c_binding, only: c_int, c_int64_t, c_size_t, c_ptrdiff_t, c_double, c_ptr, c_null_ptr, c_loc
   implicit none
   private
 
@@ -20,6 +20,11 @@ module zFORp_module
     type(c_ptr) :: object = c_null_ptr
   end type zFORp_field
 
+  type, bind(c) :: zFORp_config
+    private
+    type(c_ptr) :: object = c_null_ptr
+  end type zFORp_config
+
   enum, bind(c)
     enumerator :: zFORp_type_none = 0, &
                   zFORp_type_int32 = 1, &
@@ -46,28 +51,31 @@ module zFORp_module
   ! constants are hardcoded
   ! const_xyz holds value, but xyz is the public constant
 
-  integer, parameter :: const_zFORp_version_major = 0
-  integer, parameter :: const_zFORp_version_minor = 5
-  integer, parameter :: const_zFORp_version_patch = 5
+  integer, parameter :: const_zFORp_version_major = 1
+  integer, parameter :: const_zFORp_version_minor = 0
+  integer, parameter :: const_zFORp_version_patch = 1
+  integer, parameter :: const_zFORp_version_tweak = 0
   integer, protected, bind(c, name="zFORp_version_major") :: zFORp_version_major
   integer, protected, bind(c, name="zFORp_version_minor") :: zFORp_version_minor
   integer, protected, bind(c, name="zFORp_version_patch") :: zFORp_version_patch
+  integer, protected, bind(c, name="zFORp_version_tweak") :: zFORp_version_tweak
   data zFORp_version_major/const_zFORp_version_major/, &
        zFORp_version_minor/const_zFORp_version_minor/, &
-       zFORp_version_patch/const_zFORp_version_patch/
+       zFORp_version_patch/const_zFORp_version_patch/, &
+       zFORp_version_tweak/const_zFORp_version_tweak/
 
   integer, parameter :: const_zFORp_codec_version = 5
   integer, protected, bind(c, name="zFORp_codec_version") :: zFORp_codec_version
   data zFORp_codec_version/const_zFORp_codec_version/
 
-  integer, parameter :: const_zFORp_library_version = 85 ! 0x55
+  integer, parameter :: const_zFORp_library_version = 4112 ! 0x1010
   integer, protected, bind(c, name="zFORp_library_version") :: zFORp_library_version
   data zFORp_library_version/const_zFORp_library_version/
 
-  character(len = 36), parameter :: zFORp_version_string = 'zfp version 0.5.5 (May 5, 2019)'
+  character(len = 36), parameter :: zFORp_version_string = 'zfp version 1.0.1 (December 15, 2023)'
 
   integer, parameter :: const_zFORp_min_bits = 1
-  integer, parameter :: const_zFORp_max_bits = 16657
+  integer, parameter :: const_zFORp_max_bits = 16658
   integer, parameter :: const_zFORp_max_prec = 64
   integer, parameter :: const_zFORp_min_exp = -1074
   integer, protected, bind(c, name="zFORp_min_bits") :: zFORp_min_bits
@@ -135,8 +143,8 @@ subroutine zfp_bitstream_stream_close(bs) bind(c, name="stream_close")
 
     function zfp_type_size(scalar_type) result(type_size) bind(c, name="zfp_type_size")
       import
-      integer(c_int) scalar_type
-      integer(c_size_t) type_size
+      integer(c_int), value :: scalar_type
+      integer(c_size_t) :: type_size
     end function
 
     ! high-level API: zfp_stream functions
@@ -164,10 +172,29 @@ function zfp_stream_compression_mode(stream) result(zfp_mode) bind(c, name="zfp_
       integer(c_int) :: zfp_mode
     end function
 
+    function zfp_stream_rate(stream, dims) result(rate_result) bind(c, name="zfp_stream_rate")
+      import
+      type(c_ptr), value :: stream
+      integer(c_int), value :: dims
+      real(c_double) :: rate_result
+    end function
+
+    function zfp_stream_precision(stream) result(prec_result) bind(c, name="zfp_stream_precision")
+      import
+      type(c_ptr), value :: stream
+      integer(c_int) :: prec_result
+    end function
+
+    function zfp_stream_accuracy(stream) result(acc_result) bind(c, name="zfp_stream_accuracy")
+      import
+      type(c_ptr), value :: stream
+      real(c_double) :: acc_result
+    end function
+
     function zfp_stream_mode(stream) result(encoded_mode) bind(c, name="zfp_stream_mode")
       import
       type(c_ptr), value :: stream
-      integer(c_int64_t) encoded_mode
+      integer(c_int64_t) :: encoded_mode
     end function
 
     subroutine zfp_stream_params(stream, minbits, maxbits, maxprec, minexp) bind(c, name="zfp_stream_params")
@@ -188,6 +215,11 @@ function zfp_stream_maximum_size(stream, field) result(max_size) bind(c, name="z
       integer(c_size_t) max_size
     end function
 
+    subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
+      import
+      type(c_ptr), value :: stream
+    end subroutine
+
     subroutine zfp_stream_set_bit_stream(stream, bs) bind(c, name="zfp_stream_set_bit_stream")
       import
       type(c_ptr), value :: stream, bs
@@ -198,13 +230,13 @@ subroutine zfp_stream_set_reversible(stream) bind(c, name="zfp_stream_set_revers
       type(c_ptr), value :: stream
     end subroutine
 
-    function zfp_stream_set_rate(stream, rate, scalar_type, dims, wra) result(rate_result) bind(c, name="zfp_stream_set_rate")
+    function zfp_stream_set_rate(stream, rate, scalar_type, dims, align) result(rate_result) bind(c, name="zfp_stream_set_rate")
       import
       type(c_ptr), value :: stream
       real(c_double), value :: rate
       integer(c_int), value :: scalar_type
       ! no unsigned int in Fortran
-      integer(c_int), value :: dims, wra
+      integer(c_int), value :: dims, align
       real(c_double) :: rate_result
     end function
 
@@ -275,6 +307,15 @@ function zfp_stream_set_omp_chunk_size(stream, chunk_size) result(is_success) bi
       integer(c_int) chunk_size, is_success
     end function
 
+    ! TODO: high-level API: zfp_config functions (resolve Fortran's lack of unions)
+
+    ! zfp_config_none
+    ! zfp_config_rate
+    ! zfp_config_precision
+    ! zfp_config_accuracy
+    ! zfp_config_reversible
+    ! zfp_config_expert
+
     ! high-level API: zfp_field functions
 
     function zfp_field_alloc() result(field) bind(c, name="zfp_field_alloc")
@@ -286,28 +327,32 @@ function zfp_field_1d(uncompressed_ptr, scalar_type, nx) result(field) bind(c, n
       import
       type(c_ptr), value :: uncompressed_ptr
       type(c_ptr) :: field
-      integer(c_int), value :: scalar_type, nx
+      integer(c_int), value :: scalar_type
+      integer(c_size_t), value :: nx
     end function
 
     function zfp_field_2d(uncompressed_ptr, scalar_type, nx, ny) result(field) bind(c, name="zfp_field_2d")
       import
       type(c_ptr), value :: uncompressed_ptr
       type(c_ptr) :: field
-      integer(c_int), value :: scalar_type, nx, ny
+      integer(c_int), value :: scalar_type
+      integer(c_size_t), value :: nx, ny
     end function
 
     function zfp_field_3d(uncompressed_ptr, scalar_type, nx, ny, nz) result(field) bind(c, name="zfp_field_3d")
       import
       type(c_ptr), value :: uncompressed_ptr
       type(c_ptr) :: field
-      integer(c_int), value :: scalar_type, nx, ny, nz
+      integer(c_int), value :: scalar_type
+      integer(c_size_t), value :: nx, ny, nz
     end function
 
     function zfp_field_4d(uncompressed_ptr, scalar_type, nx, ny, nz, nw) result(field) bind(c, name="zfp_field_4d")
       import
       type(c_ptr), value :: uncompressed_ptr
       type(c_ptr) :: field
-      integer(c_int), value :: scalar_type, nx, ny, nz, nw
+      integer(c_int), value :: scalar_type
+      integer(c_size_t), value :: nx, ny, nz, nw
     end function
 
     subroutine zfp_field_free(field) bind(c, name="zfp_field_free")
@@ -321,40 +366,64 @@ function zfp_field_pointer(field) result(arr_ptr) bind(c, name="zfp_field_pointe
       type(c_ptr) :: arr_ptr
     end function
 
+    function zfp_field_begin(field) result(begin_ptr) bind(c, name="zfp_field_begin")
+      import
+      type(c_ptr), value :: field
+      type(c_ptr) :: begin_ptr
+    end function
+
     function zfp_field_type(field) result(scalar_type) bind(c, name="zfp_field_type")
       import
       type(c_ptr), value :: field
-      integer(c_int) scalar_type
+      integer(c_int) :: scalar_type
     end function
 
     function zfp_field_precision(field) result(prec) bind(c, name="zfp_field_precision")
       import
       type(c_ptr), value :: field
-      integer(c_int) prec
+      integer(c_int) :: prec
     end function
 
     function zfp_field_dimensionality(field) result(dims) bind(c, name="zfp_field_dimensionality")
       import
       type(c_ptr), value :: field
-      integer(c_int) dims
+      integer(c_int) :: dims
     end function
 
     function zfp_field_size(field, size_arr) result(total_size) bind(c, name="zfp_field_size")
       import
       type(c_ptr), value :: field, size_arr
-      integer(c_size_t) total_size
+      integer(c_size_t) :: total_size
+    end function
+
+    function zfp_field_size_bytes(field) result(byte_size) bind(c, name="zfp_field_size_bytes")
+      import
+      type(c_ptr), value :: field
+      integer(c_size_t) :: byte_size
+    end function
+
+    function zfp_field_blocks(field) result(blocks) bind(c, name="zfp_field_blocks")
+      import
+      type(c_ptr), value :: field
+      integer(c_size_t) :: blocks
     end function
 
     function zfp_field_stride(field, stride_arr) result(is_strided) bind(c, name="zfp_field_stride")
       import
       type(c_ptr), value :: field, stride_arr
-      integer(c_int) is_strided
+      integer(c_int) :: is_strided
+    end function
+
+    function zfp_field_is_contiguous(field) result(is_contiguous) bind(c, name="zfp_field_is_contiguous")
+      import
+      type(c_ptr), value :: field
+      integer(c_int) :: is_contiguous
     end function
 
     function zfp_field_metadata(field) result(encoded_metadata) bind(c, name="zfp_field_metadata")
       import
       type(c_ptr), value :: field
-      integer(c_int64_t) encoded_metadata
+      integer(c_int64_t) :: encoded_metadata
     end function
 
     subroutine zfp_field_set_pointer(field, arr_ptr) bind(c, name="zfp_field_set_pointer")
@@ -371,56 +440,56 @@ function zfp_field_set_type(field, scalar_type) result(scalar_type_result) bind(
     subroutine zfp_field_set_size_1d(field, nx) bind(c, name="zfp_field_set_size_1d")
       import
       type(c_ptr), value :: field
-      integer(c_int) nx
+      integer(c_size_t) :: nx
     end subroutine
 
     subroutine zfp_field_set_size_2d(field, nx, ny) bind(c, name="zfp_field_set_size_2d")
       import
       type(c_ptr), value :: field
-      integer(c_int) nx, ny
+      integer(c_size_t) :: nx, ny
     end subroutine
 
     subroutine zfp_field_set_size_3d(field, nx, ny, nz) bind(c, name="zfp_field_set_size_3d")
       import
       type(c_ptr), value :: field
-      integer(c_int) nx, ny, nz
+      integer(c_size_t) :: nx, ny, nz
     end subroutine
 
     subroutine zfp_field_set_size_4d(field, nx, ny, nz, nw) bind(c, name="zfp_field_set_size_4d")
       import
       type(c_ptr), value :: field
-      integer(c_int) nx, ny, nz, nw
+      integer(c_size_t) :: nx, ny, nz, nw
     end subroutine
 
     subroutine zfp_field_set_stride_1d(field, sx) bind(c, name="zfp_field_set_stride_1d")
       import
       type(c_ptr), value :: field
-      integer(c_int) sx
+      integer(c_ptrdiff_t), value :: sx
     end subroutine
 
     subroutine zfp_field_set_stride_2d(field, sx, sy) bind(c, name="zfp_field_set_stride_2d")
       import
       type(c_ptr), value :: field
-      integer(c_int) sx, sy
+      integer(c_ptrdiff_t), value :: sx, sy
     end subroutine
 
     subroutine zfp_field_set_stride_3d(field, sx, sy, sz) bind(c, name="zfp_field_set_stride_3d")
       import
       type(c_ptr), value :: field
-      integer(c_int) sx, sy, sz
+      integer(c_ptrdiff_t), value :: sx, sy, sz
     end subroutine
 
     subroutine zfp_field_set_stride_4d(field, sx, sy, sz, sw) bind(c, name="zfp_field_set_stride_4d")
       import
       type(c_ptr), value :: field
-      integer(c_int) sx, sy, sz, sw
+      integer(c_ptrdiff_t), value :: sx, sy, sz, sw
     end subroutine
 
     function zfp_field_set_metadata(field, encoded_metadata) result(is_success) bind(c, name="zfp_field_set_metadata")
       import
       type(c_ptr), value :: field
       integer(c_int64_t) :: encoded_metadata
-      integer(c_int) is_success
+      integer(c_int) :: is_success
     end function
 
     ! high-level API: compression and decompression
@@ -440,30 +509,25 @@ function zfp_decompress(stream, field) result(bitstream_offset_bytes) bind(c, na
     function zfp_write_header(stream, field, mask) result(num_bits_written) bind(c, name="zfp_write_header")
       import
       type(c_ptr), value :: stream, field
-      integer(c_int) mask
-      integer(c_size_t) num_bits_written
+      integer(c_int) :: mask
+      integer(c_size_t) :: num_bits_written
     end function
 
     function zfp_read_header(stream, field, mask) result(num_bits_read) bind(c, name="zfp_read_header")
       import
       type(c_ptr), value :: stream, field
-      integer(c_int) mask
-      integer(c_size_t) num_bits_read
+      integer(c_int) :: mask
+      integer(c_size_t) :: num_bits_read
     end function
 
-    ! low-level API: stream manipulation
-    subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
-      import
-      type(c_ptr), value :: stream
-    end subroutine
-
   end interface
 
   ! types
 
   public :: zFORp_bitstream, &
             zFORp_stream, &
-            zFORp_field
+            zFORp_field, &
+            zFORp_config
 
   ! enums
 
@@ -477,7 +541,8 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
             zFORp_mode_expert, &
             zFORp_mode_fixed_rate, &
             zFORp_mode_fixed_precision, &
-            zFORp_mode_fixed_accuracy
+            zFORp_mode_fixed_accuracy, &
+            zFORp_mode_reversible
 
   public :: zFORp_exec_serial, &
             zFORp_exec_omp, &
@@ -486,7 +551,8 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
   ! C macros -> constants
   public :: zFORp_version_major, &
             zFORp_version_minor, &
-            zFORp_version_patch
+            zFORp_version_patch, &
+            zFORp_version_tweak
 
   public :: zFORp_codec_version, &
             zFORp_library_version, &
@@ -524,10 +590,14 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
             zFORp_stream_close, &
             zFORp_stream_bit_stream, &
             zFORp_stream_compression_mode, &
+            zFORp_stream_rate, &
+            zFORp_stream_precision, &
+            zFORp_stream_accuracy, &
             zFORp_stream_mode, &
             zFORp_stream_params, &
             zFORp_stream_compressed_size, &
             zFORp_stream_maximum_size, &
+            zFORp_stream_rewind, &
             zFORp_stream_set_bit_stream, &
             zFORp_stream_set_reversible, &
             zFORp_stream_set_rate, &
@@ -537,6 +607,7 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
             zFORp_stream_set_params
 
   ! high-level API: execution policy functions
+
   public :: zFORp_stream_execution, &
             zFORp_stream_omp_threads, &
             zFORp_stream_omp_chunk_size, &
@@ -544,6 +615,15 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
             zFORp_stream_set_omp_threads, &
             zFORp_stream_set_omp_chunk_size
 
+  ! TODO: high-level API: compression mode and parameter settings
+
+  ! public :: zFORp_config_none, &
+  !           zFORp_config_rate, &
+  !           zFORp_config_precision, &
+  !           zFORp_config_accuracy, &
+  !           zFORp_config_reversible, &
+  !           zFORp_config_expert
+
   ! high-level API: zfp_field functions
 
   public :: zFORp_field_alloc, &
@@ -553,11 +633,15 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
             zFORp_field_4d, &
             zFORp_field_free, &
             zFORp_field_pointer, &
+            zFORp_field_begin, &
             zFORp_field_type, &
             zFORp_field_precision, &
             zFORp_field_dimensionality, &
             zFORp_field_size, &
+            zFORp_field_size_bytes, &
+            zFORp_field_blocks, &
             zFORp_field_stride, &
+            zFORp_field_is_contiguous, &
             zFORp_field_metadata, &
             zFORp_field_set_pointer, &
             zFORp_field_set_type, &
@@ -578,10 +662,6 @@ subroutine zfp_stream_rewind(stream) bind(c, name="zfp_stream_rewind")
             zFORp_write_header, &
             zFORp_read_header
 
-  ! low-level API: stream manipulation
-
-  public :: zFORp_stream_rewind
-
 contains
 
   ! minimal bitstream API
@@ -605,7 +685,7 @@ end subroutine zFORp_bitstream_stream_close
   function zFORp_type_size(scalar_type) result(type_size) bind(c, name="zforp_type_size")
     implicit none
     integer, intent(in) :: scalar_type
-    integer (kind=8) type_size
+    integer (kind=8) :: type_size
     type_size = zfp_type_size(int(scalar_type, c_int))
   end function zFORp_type_size
 
@@ -634,14 +714,36 @@ end function zFORp_stream_bit_stream
   function zFORp_stream_compression_mode(stream) result(zfp_mode) bind(c, name="zforp_stream_compression_mode")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    integer zfp_mode
+    integer :: zfp_mode
     zfp_mode = zfp_stream_compression_mode(stream%object)
   end function zFORp_stream_compression_mode
 
+  function zFORp_stream_rate(stream, dims) result(rate_result) bind(c, name="zforp_stream_rate")
+    implicit none
+    type(zFORp_stream), intent(in) :: stream
+    integer, intent(in) :: dims
+    real (kind=8) :: rate_result
+    rate_result = zfp_stream_rate(stream%object, int(dims, c_int))
+  end function zFORp_stream_rate
+
+  function zFORp_stream_precision(stream) result(prec_result) bind(c, name="zforp_stream_precision")
+    implicit none
+    type(zFORp_stream), intent(in) :: stream
+    integer :: prec_result
+    prec_result = zfp_stream_precision(stream%object)
+  end function zFORp_stream_precision
+
+  function zFORp_stream_accuracy(stream) result(acc_result) bind(c, name="zforp_stream_accuracy")
+    implicit none
+    type(zFORp_stream), intent(in) :: stream
+    real (kind=8) :: acc_result
+    acc_result = zfp_stream_accuracy(stream%object)
+  end function zFORp_stream_accuracy
+
   function zFORp_stream_mode(stream) result(encoded_mode) bind(c, name="zforp_stream_mode")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    integer (kind=8) encoded_mode
+    integer (kind=8) :: encoded_mode
     encoded_mode = zfp_stream_mode(stream%object)
   end function zFORp_stream_mode
 
@@ -658,7 +760,7 @@ end subroutine zFORp_stream_params
   function zFORp_stream_compressed_size(stream) result(compressed_size) bind(c, name="zforp_stream_compressed_size")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    integer (kind=8) compressed_size
+    integer (kind=8) :: compressed_size
     compressed_size = zfp_stream_compressed_size(stream%object)
   end function zFORp_stream_compressed_size
 
@@ -666,10 +768,15 @@ function zFORp_stream_maximum_size(stream, field) result(max_size) bind(c, name=
     implicit none
     type(zFORp_stream), intent(in) :: stream
     type(zFORp_field), intent(in) :: field
-    integer (kind=8) max_size
+    integer (kind=8) :: max_size
     max_size = zfp_stream_maximum_size(stream%object, field%object)
   end function zFORp_stream_maximum_size
 
+  subroutine zFORp_stream_rewind(stream) bind(c, name="zforp_stream_rewind")
+    type(zFORp_stream), intent(in) :: stream
+    call zfp_stream_rewind(stream%object)
+  end subroutine zFORp_stream_rewind
+
   subroutine zFORp_stream_set_bit_stream(stream, bs) bind(c, name="zforp_stream_set_bit_stream")
     type(zFORp_stream), intent(in) :: stream
     type(zFORp_bitstream), intent(in) :: bs
@@ -681,38 +788,38 @@ subroutine zFORp_stream_set_reversible(stream) bind(c, name="zforp_stream_set_re
     call zfp_stream_set_reversible(stream%object)
   end subroutine zFORp_stream_set_reversible
 
-  function zFORp_stream_set_rate(stream, rate, scalar_type, dims, wra) result(rate_result) bind(c, name="zforp_stream_set_rate")
+  function zFORp_stream_set_rate(stream, rate, scalar_type, dims, align) result(rate_result) bind(c, name="zforp_stream_set_rate")
     implicit none
     type(zFORp_stream), intent(in) :: stream
     real (kind=8), intent(in) :: rate
     integer, intent(in) :: scalar_type
-    integer, intent(in) :: dims, wra
+    integer, intent(in) :: dims, align
     real (kind=8) :: rate_result
     rate_result = zfp_stream_set_rate(stream%object, real(rate, c_double), &
-      int(scalar_type, c_int), int(dims, c_int), int(wra, c_int))
+      int(scalar_type, c_int), int(dims, c_int), int(align, c_int))
   end function zFORp_stream_set_rate
 
   function zFORp_stream_set_precision(stream, prec) result(prec_result) bind(c, name="zforp_stream_set_precision")
     implicit none
     type(zFORp_stream), intent(in) :: stream
     integer, intent(in) :: prec
-    integer prec_result
+    integer :: prec_result
     prec_result = zfp_stream_set_precision(stream%object, int(prec, c_int))
   end function zFORp_stream_set_precision
 
-  function zFORp_stream_set_accuracy(stream, acc) result(acc_result) bind(c, name="zforp_stream_set_accuracy")
+  function zFORp_stream_set_accuracy(stream, tolerance) result(acc_result) bind(c, name="zforp_stream_set_accuracy")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    real (kind=8), intent(in) :: acc
-    real (kind=8) acc_result
-    acc_result = zfp_stream_set_accuracy(stream%object, real(acc, c_double))
+    real (kind=8), intent(in) :: tolerance
+    real (kind=8) :: acc_result
+    acc_result = zfp_stream_set_accuracy(stream%object, real(tolerance, c_double))
   end function zFORp_stream_set_accuracy
 
   function zFORp_stream_set_mode(stream, encoded_mode) result(mode_result) bind(c, name="zforp_stream_set_mode")
     implicit none
     type(zFORp_stream), intent(in) :: stream
     integer (kind=8), intent(in) :: encoded_mode
-    integer mode_result
+    integer :: mode_result
     mode_result = zfp_stream_set_mode(stream%object, int(encoded_mode, c_int64_t))
   end function zFORp_stream_set_mode
 
@@ -721,7 +828,7 @@ function zFORp_stream_set_params(stream, minbits, maxbits, maxprec, minexp) resu
     implicit none
     type(zFORp_stream), intent(in) :: stream
     integer, intent(in) :: minbits, maxbits, maxprec, minexp
-    integer is_success
+    integer :: is_success
     is_success = zfp_stream_set_params(stream%object, &
                                        int(minbits, c_int), &
                                        int(maxbits, c_int), &
@@ -734,21 +841,21 @@ end function zFORp_stream_set_params
   function zFORp_stream_execution(stream) result(execution_policy) bind(c, name="zforp_stream_execution")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    integer execution_policy
+    integer :: execution_policy
     execution_policy = zfp_stream_execution(stream%object)
   end function zFORp_stream_execution
 
   function zFORp_stream_omp_threads(stream) result(thread_count) bind(c, name="zforp_stream_omp_threads")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    integer thread_count
+    integer :: thread_count
     thread_count = zfp_stream_omp_threads(stream%object)
   end function zFORp_stream_omp_threads
 
   function zFORp_stream_omp_chunk_size(stream) result(chunk_size_blocks) bind(c, name="zforp_stream_omp_chunk_size")
     implicit none
     type(zFORp_stream), intent(in) :: stream
-    integer (kind=8) chunk_size_blocks
+    integer (kind=8) :: chunk_size_blocks
     chunk_size_blocks = zfp_stream_omp_chunk_size(stream%object)
   end function zFORp_stream_omp_chunk_size
 
@@ -756,7 +863,7 @@ function zFORp_stream_set_execution(stream, execution_policy) result(is_success)
     implicit none
     type(zFORp_stream), intent(in) :: stream
     integer, intent(in) :: execution_policy
-    integer is_success
+    integer :: is_success
     is_success = zfp_stream_set_execution(stream%object, int(execution_policy, c_int))
   end function zFORp_stream_set_execution
 
@@ -764,7 +871,7 @@ function zFORp_stream_set_omp_threads(stream, thread_count) result(is_success) b
     implicit none
     type(zFORp_stream), intent(in) :: stream
     integer, intent(in) :: thread_count
-    integer is_success
+    integer :: is_success
     is_success = zfp_stream_set_omp_threads(stream%object, int(thread_count, c_int))
   end function zFORp_stream_set_omp_threads
 
@@ -773,15 +880,24 @@ function zFORp_stream_set_omp_chunk_size(stream, chunk_size) result(is_success)
     implicit none
     type(zFORp_stream), intent(in) :: stream
     integer, intent(in) :: chunk_size
-    integer is_success
+    integer :: is_success
     is_success = zfp_stream_set_omp_chunk_size(stream%object, int(chunk_size, c_int))
   end function zFORp_stream_set_omp_chunk_size
 
+  ! TODO: high-level API: compression mode and parameter settings
+
+  ! zfp_config_none
+  ! zfp_config_rate
+  ! zfp_config_precision
+  ! zfp_config_accuracy
+  ! zfp_config_reversible
+  ! zfp_config_expert
+
   ! high-level API: zfp_field functions
 
   function zFORp_field_alloc() result(field) bind(c, name="zforp_field_alloc")
     implicit none
-    type(zFORp_field) field
+    type(zFORp_field) :: field
     field%object = zfp_field_alloc()
   end function zFORp_field_alloc
 
@@ -789,38 +905,38 @@ function zFORp_field_1d(uncompressed_ptr, scalar_type, nx) result(field) bind(c,
     implicit none
     type(c_ptr), intent(in) :: uncompressed_ptr
     integer, intent(in) :: scalar_type, nx
-    type(zFORp_field) field
+    type(zFORp_field) :: field
     field%object = zfp_field_1d(uncompressed_ptr, int(scalar_type, c_int), &
-                                    int(nx, c_int))
+                                    int(nx, c_size_t))
   end function zFORp_field_1d
 
   function zFORp_field_2d(uncompressed_ptr, scalar_type, nx, ny) result(field) bind(c, name="zforp_field_2d")
     implicit none
     type(c_ptr), intent(in) :: uncompressed_ptr
     integer, intent(in) :: scalar_type, nx, ny
-    type(zFORp_field) field
+    type(zFORp_field) :: field
     field%object = zfp_field_2d(uncompressed_ptr, int(scalar_type, c_int), &
-                                    int(nx, c_int), int(ny, c_int))
+                                    int(nx, c_size_t), int(ny, c_size_t))
   end function zFORp_field_2d
 
   function zFORp_field_3d(uncompressed_ptr, scalar_type, nx, ny, nz) result(field) bind(c, name="zforp_field_3d")
     implicit none
     type(c_ptr), intent(in) :: uncompressed_ptr
     integer, intent(in) :: scalar_type, nx, ny, nz
-    type(zFORp_field) field
+    type(zFORp_field) :: field
     field%object = zfp_field_3d(uncompressed_ptr, int(scalar_type, c_int), &
-                                    int(nx, c_int), int(ny, c_int), &
-                                    int(nz, c_int))
+                                    int(nx, c_size_t), int(ny, c_size_t), &
+                                    int(nz, c_size_t))
   end function zFORp_field_3d
 
   function zFORp_field_4d(uncompressed_ptr, scalar_type, nx, ny, nz, nw) result(field) bind(c, name="zforp_field_4d")
     implicit none
     type(c_ptr), intent(in) :: uncompressed_ptr
     integer, intent(in) :: scalar_type, nx, ny, nz, nw
-    type(zFORp_field) field
+    type(zFORp_field) :: field
     field%object = zfp_field_4d(uncompressed_ptr, int(scalar_type, c_int), &
-                                    int(nx, c_int), int(ny, c_int), &
-                                    int(nz, c_int), int(nw, c_int))
+                                    int(nx, c_size_t), int(ny, c_size_t), &
+                                    int(nz, c_size_t), int(nw, c_size_t))
   end function zFORp_field_4d
 
   subroutine zFORp_field_free(field) bind(c, name="zforp_field_free")
@@ -832,28 +948,35 @@ end subroutine zFORp_field_free
   function zFORp_field_pointer(field) result(arr_ptr) bind(c, name="zforp_field_pointer")
     implicit none
     type(zFORp_field), intent(in) :: field
-    type(c_ptr) arr_ptr
+    type(c_ptr) :: arr_ptr
     arr_ptr = zfp_field_pointer(field%object)
   end function zFORp_field_pointer
 
+  function zFORp_field_begin(field) result(begin_ptr) bind(c, name="zforp_field_begin")
+    implicit none
+    type(zFORp_field), intent(in) :: field
+    type(c_ptr) :: begin_ptr
+    begin_ptr = zfp_field_begin(field%object)
+  end function zFORp_field_begin
+
   function zFORp_field_type(field) result(scalar_type) bind(c, name="zforp_field_type")
     implicit none
     type(zFORp_field), intent(in) :: field
-    integer scalar_type
+    integer :: scalar_type
     scalar_type = zfp_field_type(field%object)
   end function zFORp_field_type
 
   function zFORp_field_precision(field) result(prec) bind(c, name="zforp_field_precision")
     implicit none
     type(zFORp_field), intent(in) :: field
-    integer prec
+    integer :: prec
     prec = zfp_field_precision(field%object)
   end function zFORp_field_precision
 
   function zFORp_field_dimensionality(field) result(dims) bind(c, name="zforp_field_dimensionality")
     implicit none
     type(zFORp_field), intent(in) :: field
-    integer dims
+    integer :: dims
     dims = zfp_field_dimensionality(field%object)
   end function zFORp_field_dimensionality
 
@@ -861,22 +984,43 @@ function zFORp_field_size(field, size_arr) result(total_size) bind(c, name="zfor
     implicit none
     type(zFORp_field), intent(in) :: field
     integer, dimension(4), target, intent(inout) :: size_arr
-    integer (kind=8) total_size
+    integer (kind=8) :: total_size
     total_size = zfp_field_size(field%object, c_loc(size_arr))
   end function zFORp_field_size
 
+  function zFORp_field_size_bytes(field) result(byte_size) bind(c, name="zforp_field_size_bytes")
+    implicit none
+    type(zFORp_field), intent(in) :: field
+    integer (kind=8) :: byte_size
+    byte_size = zfp_field_size_bytes(field%object)
+  end function zFORp_field_size_bytes
+
+  function zFORp_field_blocks(field) result(blocks) bind(c, name="zforp_field_blocks")
+    implicit none
+    type(zFORp_field), intent(in) :: field
+    integer (kind=8) :: blocks
+    blocks = zfp_field_blocks(field%object)
+  end function zFORp_field_blocks
+
   function zFORp_field_stride(field, stride_arr) result(is_strided) bind(c, name="zforp_field_stride")
     implicit none
     type(zFORp_field), intent(in) :: field
     integer, dimension(4), target, intent(inout) :: stride_arr
-    integer is_strided
+    integer :: is_strided
     is_strided = zfp_field_stride(field%object, c_loc(stride_arr))
   end function zFORp_field_stride
 
+  function zFORp_field_is_contiguous(field) result(is_contiguous) bind(c, name="zforp_field_is_contiguous")
+    implicit none
+    type(zFORp_field), intent(in) :: field
+    integer :: is_contiguous
+    is_contiguous = zfp_field_is_contiguous(field%object)
+  end function zFORp_field_is_contiguous
+
   function zFORp_field_metadata(field) result(encoded_metadata) bind(c, name="zforp_field_metadata")
     implicit none
     type(zFORp_field), intent(in) :: field
-    integer (kind=8) encoded_metadata
+    integer (kind=8) :: encoded_metadata
     encoded_metadata = zfp_field_metadata(field%object)
   end function zFORp_field_metadata
 
@@ -890,63 +1034,64 @@ function zFORp_field_set_type(field, scalar_type) result(scalar_type_result) bin
     implicit none
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: scalar_type
-    integer scalar_type_result
+    integer :: scalar_type_result
     scalar_type_result = zfp_field_set_type(field%object, int(scalar_type, c_int))
   end function zFORp_field_set_type
 
   subroutine zFORp_field_set_size_1d(field, nx) bind(c, name="zforp_field_set_size_1d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: nx
-    call zfp_field_set_size_1d(field%object, int(nx, c_int))
+    call zfp_field_set_size_1d(field%object, int(nx, c_size_t))
   end subroutine zFORp_field_set_size_1d
 
   subroutine zFORp_field_set_size_2d(field, nx, ny) bind(c, name="zforp_field_set_size_2d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: nx, ny
-    call zfp_field_set_size_2d(field%object, int(nx, c_int), int(ny, c_int))
+    call zfp_field_set_size_2d(field%object, int(nx, c_size_t), int(ny, c_size_t))
   end subroutine zFORp_field_set_size_2d
 
   subroutine zFORp_field_set_size_3d(field, nx, ny, nz) bind(c, name="zforp_field_set_size_3d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: nx, ny, nz
-    call zfp_field_set_size_3d(field%object, int(nx, c_int), int(ny, c_int), int(nz, c_int))
+    call zfp_field_set_size_3d(field%object, int(nx, c_size_t), int(ny, c_size_t), int(nz, c_size_t))
   end subroutine zFORp_field_set_size_3d
 
   subroutine zFORp_field_set_size_4d(field, nx, ny, nz, nw) bind(c, name="zforp_field_set_size_4d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: nx, ny, nz, nw
-    call zfp_field_set_size_4d(field%object, int(nx, c_int), int(ny, c_int), int(nz, c_int), int(nw, c_int))
+    call zfp_field_set_size_4d(field%object, int(nx, c_size_t), int(ny, c_size_t), int(nz, c_size_t), int(nw, c_size_t))
   end subroutine zFORp_field_set_size_4d
 
   subroutine zFORp_field_set_stride_1d(field, sx) bind(c, name="zforp_field_set_stride_1d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: sx
-    call zfp_field_set_stride_1d(field%object, int(sx, c_int))
+    call zfp_field_set_stride_1d(field%object, int(sx, c_ptrdiff_t))
   end subroutine zFORp_field_set_stride_1d
 
   subroutine zFORp_field_set_stride_2d(field, sx, sy) bind(c, name="zforp_field_set_stride_2d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: sx, sy
-    call zfp_field_set_stride_2d(field%object, int(sx, c_int), int(sy, c_int))
+    call zfp_field_set_stride_2d(field%object, int(sx, c_ptrdiff_t), int(sy, c_ptrdiff_t))
   end subroutine zFORp_field_set_stride_2d
 
   subroutine zFORp_field_set_stride_3d(field, sx, sy, sz) bind(c, name="zforp_field_set_stride_3d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: sx, sy, sz
-    call zfp_field_set_stride_3d(field%object, int(sx, c_int), int(sy, c_int), int(sz, c_int))
+    call zfp_field_set_stride_3d(field%object, int(sx, c_ptrdiff_t), int(sy, c_ptrdiff_t), int(sz, c_ptrdiff_t))
   end subroutine zFORp_field_set_stride_3d
 
   subroutine zFORp_field_set_stride_4d(field, sx, sy, sz, sw) bind(c, name="zforp_field_set_stride_4d")
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: sx, sy, sz, sw
-    call zfp_field_set_stride_4d(field%object, int(sx, c_int), int(sy, c_int), int(sz, c_int), int(sw, c_int))
+    call zfp_field_set_stride_4d(field%object, int(sx, c_ptrdiff_t), int(sy, c_ptrdiff_t), &
+                                               int(sz, c_ptrdiff_t), int(sw, c_ptrdiff_t))
   end subroutine zFORp_field_set_stride_4d
 
   function zFORp_field_set_metadata(field, encoded_metadata) result(is_success) bind(c, name="zforp_field_set_metadata")
     implicit none
     type(zFORp_field), intent(in) :: field
     integer (kind=8), intent(in) :: encoded_metadata
-    integer is_success
+    integer :: is_success
     is_success = zfp_field_set_metadata(field%object, int(encoded_metadata, c_int64_t))
   end function zFORp_field_set_metadata
 
@@ -956,7 +1101,7 @@ function zFORp_compress(stream, field) result(bitstream_offset_bytes) bind(c, na
     implicit none
     type(zFORp_stream), intent(in) :: stream
     type(zFORp_field), intent(in) :: field
-    integer (kind=8) bitstream_offset_bytes
+    integer (kind=8) :: bitstream_offset_bytes
     bitstream_offset_bytes = zfp_compress(stream%object, field%object)
   end function zFORp_compress
 
@@ -964,7 +1109,7 @@ function zFORp_decompress(stream, field) result(bitstream_offset_bytes) bind(c,
     implicit none
     type(zFORp_stream), intent(in) :: stream
     type(zFORp_field), intent(in) :: field
-    integer (kind=8) bitstream_offset_bytes
+    integer (kind=8) :: bitstream_offset_bytes
     bitstream_offset_bytes = zfp_decompress(stream%object, field%object)
   end function zFORp_decompress
 
@@ -973,7 +1118,7 @@ function zFORp_write_header(stream, field, mask) result(num_bits_written) bind(c
     type(zFORp_stream), intent(in) :: stream
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: mask
-    integer (kind=8) num_bits_written
+    integer (kind=8) :: num_bits_written
     num_bits_written = zfp_write_header(stream%object, field%object, int(mask, c_int))
   end function zFORp_write_header
 
@@ -982,15 +1127,8 @@ function zFORp_read_header(stream, field, mask) result(num_bits_read) bind(c, na
     type(zFORp_stream), intent(in) :: stream
     type(zFORp_field), intent(in) :: field
     integer, intent(in) :: mask
-    integer (kind=8) num_bits_read
+    integer (kind=8) :: num_bits_read
     num_bits_read = zfp_read_header(stream%object, field%object, int(mask, c_int))
   end function zFORp_read_header
 
-  ! low-level API: stream manipulation
-
-  subroutine zFORp_stream_rewind(stream) bind(c, name="zforp_stream_rewind")
-    type(zFORp_stream), intent(in) :: stream
-    call zfp_stream_rewind(stream%object)
-  end subroutine zFORp_stream_rewind
-
-end module zFORp_module
+end module zfp
diff --git a/include/zfp.h b/include/zfp.h
index b2cc25d0..5e87a324 100644
--- a/include/zfp.h
+++ b/include/zfp.h
@@ -1,109 +1,43 @@
 /*
-** Copyright (c) 2014-2019, Lawrence Livermore National Security, LLC.
-** Produced at the Lawrence Livermore National Laboratory.
-** Authors: Peter Lindstrom, Markus Salasoo, Matt Larsen, Stephen Herbein.
-** LLNL-CODE-663824.
-** All rights reserved.
-**
-** This file is part of the zfp library.
-** For details, see http://computation.llnl.gov/casc/zfp/.
-**
-** Redistribution and use in source and binary forms, with or without
-** modification, are permitted provided that the following conditions are met:
-**
-** 1. Redistributions of source code must retain the above copyright notice,
-** this list of conditions and the disclaimer below.
-**
-** 2. Redistributions in binary form must reproduce the above copyright notice,
-** this list of conditions and the disclaimer (as noted below) in the
-** documentation and/or other materials provided with the distribution.
-**
-** 3. Neither the name of the LLNS/LLNL nor the names of its contributors may
-** be used to endorse or promote products derived from this software without
-** specific prior written permission.
-**
-** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-** ARE DISCLAIMED.  IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL SECURITY,
-** LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-** INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-**
-**
-** Additional BSD Notice
-**
-** 1. This notice is required to be provided under our contract with the U.S.
-** Department of Energy (DOE).  This work was produced at Lawrence Livermore
-** National Laboratory under Contract No. DE-AC52-07NA27344 with the DOE.
-
-** 2. Neither the United States Government nor Lawrence Livermore National
-** Security, LLC nor any of their employees, makes any warranty, express or
-** implied, or assumes any liability or responsibility for the accuracy,
-** completeness, or usefulness of any information, apparatus, product, or
-** process disclosed, or represents that its use would not infringe
-** privately-owned rights.
-**
-** 3. Also, reference herein to any specific commercial products, process, or
-** services by trade name, trademark, manufacturer or otherwise does not
-** necessarily constitute or imply its endorsement, recommendation, or
-** favoring by the United States Government or Lawrence Livermore National
-** Security, LLC.  The views and opinions of authors expressed herein do not
-** necessarily state or reflect those of the United States Government or
-** Lawrence Livermore National Security, LLC, and shall not be used for
-** advertising or product endorsement purposes.
+** Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC and
+** other zfp project contributors. See the top-level LICENSE file for details.
+** SPDX-License-Identifier: BSD-3-Clause
 */
 
 #ifndef ZFP_H
 #define ZFP_H
 
-#include "zfp/types.h"
-#include "zfp/system.h"
-#include "bitstream.h"
+#include "zfp/bitstream.h"
+#include "zfp/version.h"
+#include "zfp/internal/zfp/system.h"
+#include "zfp/internal/zfp/types.h"
 
 /* macros ------------------------------------------------------------------ */
 
-/* stringification */
-#define _zfp_str_(x) # x
-#define _zfp_str(x) _zfp_str_(x)
-
-/* library version information */
-#define ZFP_VERSION_MAJOR 0 /* library major version number */
-#define ZFP_VERSION_MINOR 5 /* library minor version number */
-#define ZFP_VERSION_PATCH 5 /* library patch version number */
-#define ZFP_VERSION_RELEASE ZFP_VERSION_PATCH
-
-/* codec version number (see also zfp_codec_version) */
-#define ZFP_CODEC 5
-
-/* library version number (see also zfp_library_version) */
-#define ZFP_VERSION \
-  ((ZFP_VERSION_MAJOR << 8) + \
-   (ZFP_VERSION_MINOR << 4) + \
-   (ZFP_VERSION_PATCH << 0))
-
-/* library version string (see also zfp_version_string) */
-#define ZFP_VERSION_STRING \
-  _zfp_str(ZFP_VERSION_MAJOR) "." \
-  _zfp_str(ZFP_VERSION_MINOR) "." \
-  _zfp_str(ZFP_VERSION_PATCH)
-
 /* default compression parameters */
 #define ZFP_MIN_BITS     1 /* minimum number of bits per block */
-#define ZFP_MAX_BITS 16657 /* maximum number of bits per block */
+#define ZFP_MAX_BITS 16658 /* maximum number of bits per block */
 #define ZFP_MAX_PREC    64 /* maximum precision supported */
 #define ZFP_MIN_EXP  -1074 /* minimum floating-point base-2 exponent */
 
 /* header masks (enable via bitwise or; reader must use same mask) */
+#define ZFP_HEADER_NONE   0x0u /* no header */
 #define ZFP_HEADER_MAGIC  0x1u /* embed 64-bit magic */
 #define ZFP_HEADER_META   0x2u /* embed 52-bit field metadata */
 #define ZFP_HEADER_MODE   0x4u /* embed 12- or 64-bit compression mode */
 #define ZFP_HEADER_FULL   0x7u /* embed all of the above */
 
+/* bit masks for specifying storage class */
+#define ZFP_DATA_UNUSED  0x01u /* allocated but unused storage */
+#define ZFP_DATA_PADDING 0x02u /* padding for alignment purposes */
+#define ZFP_DATA_META    0x04u /* class members and other fixed-size storage */
+#define ZFP_DATA_MISC    0x08u /* miscellaneous uncategorized storage */
+#define ZFP_DATA_PAYLOAD 0x10u /* compressed data */
+#define ZFP_DATA_INDEX   0x20u /* variable-rate block index information */
+#define ZFP_DATA_CACHE   0x40u /* uncompressed cached data */
+#define ZFP_DATA_HEADER  0x80u /* header information */
+#define ZFP_DATA_ALL     0xffu /* all storage */
+
 /* field metadata indeterminate state and error code */
 #define ZFP_META_NULL (UINT64C(-1))
 
@@ -115,8 +49,21 @@
 #define ZFP_HEADER_MAX_BITS 148 /* max number of header bits */
 #define ZFP_MODE_SHORT_MAX  ((1u << ZFP_MODE_SHORT_BITS) - 2)
 
+/* rounding mode for reducing bias; see build option ZFP_ROUNDING_MODE */
+#define ZFP_ROUND_FIRST (-1) /* round during compression */
+#define ZFP_ROUND_NEVER 0    /* never round */
+#define ZFP_ROUND_LAST  1    /* round during decompression */
+
 /* types ------------------------------------------------------------------- */
 
+/* Boolean constants */
+enum {
+  zfp_false = 0,         /* false */
+  zfp_true  = !zfp_false /* true */
+};
+
+typedef int zfp_bool; /* Boolean type */
+
 /* execution policy */
 typedef enum {
   zfp_exec_serial = 0, /* serial execution (default) */
@@ -130,14 +77,9 @@ typedef struct {
   uint chunk_size; /* number of blocks per chunk (1D only) */
 } zfp_exec_params_omp;
 
-/* execution parameters */
-typedef union {
-  zfp_exec_params_omp omp; /* OpenMP parameters */
-} zfp_exec_params;
-
 typedef struct {
   zfp_exec_policy policy; /* execution policy (serial, omp, ...) */
-  zfp_exec_params params; /* execution parameters */
+  void* params;           /* execution parameters */
 } zfp_execution;
 
 /* compressed stream; use accessors to get/set members */
@@ -160,6 +102,22 @@ typedef enum {
   zfp_mode_reversible      = 5  /* reversible (lossless) mode */
 } zfp_mode;
 
+/* compression mode and parameter settings */
+typedef struct {
+  zfp_mode mode;      /* compression mode */
+  union {
+    double rate;      /* compressed bits/value (negative for word alignment) */
+    uint precision;   /* uncompressed bits/value */
+    double tolerance; /* absolute error tolerance */
+    struct {
+      uint minbits;   /* min number of compressed bits/block */
+      uint maxbits;   /* max number of compressed bits/block */
+      uint maxprec;   /* max number of uncompressed bits/value */
+      int minexp;     /* min floating point bit plane number to store */
+    } expert;         /* expert mode arguments */
+  } arg;              /* arguments corresponding to compression mode */
+} zfp_config;
+
 /* scalar type */
 typedef enum {
   zfp_type_none   = 0, /* unspecified type */
@@ -171,10 +129,10 @@ typedef enum {
 
 /* uncompressed array; use accessors to get/set members */
 typedef struct {
-  zfp_type type;       /* scalar type (e.g. int32, double) */
-  uint nx, ny, nz, nw; /* sizes (zero for unused dimensions) */
-  int sx, sy, sz, sw;  /* strides (zero for contiguous array a[nw][nz][ny][nx]) */
-  void* data;          /* pointer to array data */
+  zfp_type type;            /* scalar type (e.g. int32, double) */
+  size_t nx, ny, nz, nw;    /* sizes (zero for unused dimensions) */
+  ptrdiff_t sx, sy, sz, sw; /* strides (zero for contiguous array a[nw][nz][ny][nx]) */
+  void* data;               /* pointer to array data */
 } zfp_field;
 
 #ifdef __cplusplus
@@ -216,12 +174,31 @@ zfp_stream_bit_stream(
   const zfp_stream* stream /* compressed stream */
 );
 
-/* returns enum of compression mode */
-zfp_mode                   /* enum for compression mode */
+/* enumerated compression mode */
+zfp_mode                   /* compression mode or zfp_mode_null if not set */
 zfp_stream_compression_mode(
   const zfp_stream* stream /* compressed stream */
 );
 
+/* rate in compressed bits/scalar (when in fixed-rate mode) */
+double                      /* rate or zero upon failure */
+zfp_stream_rate(
+  const zfp_stream* stream, /* compressed stream */
+  uint dims                 /* array dimensionality (1, 2, 3, or 4) */
+);
+
+/* precision in uncompressed bits/scalar (when in fixed-precision mode) */
+uint                       /* precision or zero upon failure */
+zfp_stream_precision(
+  const zfp_stream* stream /* compressed stream */
+);
+
+/* accuracy as absolute error tolerance (when in fixed-accuracy mode) */
+double                     /* tolerance or zero upon failure */
+zfp_stream_accuracy(
+  const zfp_stream* stream /* compressed stream */
+);
+
 /* get all compression parameters in a compact representation */
 uint64                     /* 12- or 64-bit encoding of parameters */
 zfp_stream_mode(
@@ -279,7 +256,7 @@ zfp_stream_set_rate(
   double rate,        /* desired rate in compressed bits/scalar */
   zfp_type type,      /* scalar type to compress */
   uint dims,          /* array dimensionality (1, 2, 3, or 4) */
-  int wra             /* nonzero if write random access is needed */
+  zfp_bool align      /* word-aligned blocks, e.g., for write random access */
 );
 
 /* set precision in uncompressed bits/scalar (fixed-precision mode) */
@@ -304,7 +281,7 @@ zfp_stream_set_mode(
 );
 
 /* set all parameters (expert mode); leaves stream intact on failure */
-int                   /* nonzero upon success */
+zfp_bool              /* true upon success */
 zfp_stream_set_params(
   zfp_stream* stream, /* compressed stream */
   uint minbits,       /* minimum number of bits per 4^d block */
@@ -334,38 +311,76 @@ zfp_stream_omp_chunk_size(
 );
 
 /* set execution policy */
-int                      /* nonzero upon success */
+zfp_bool                 /* true upon success */
 zfp_stream_set_execution(
   zfp_stream* stream,    /* compressed stream */
   zfp_exec_policy policy /* execution policy */
 );
 
 /* set OpenMP execution policy and number of threads */
-int                   /* nonzero upon success */
+zfp_bool              /* true upon success */
 zfp_stream_set_omp_threads(
   zfp_stream* stream, /* compressed stream */
   uint threads        /* number of OpenMP threads to use (0 for default) */
 );
 
 /* set OpenMP execution policy and number of blocks per chunk (1D only) */
-int                   /* nonzero upon success */
+zfp_bool              /* true upon success */
 zfp_stream_set_omp_chunk_size(
   zfp_stream* stream, /* compressed stream */
   uint chunk_size     /* number of blocks per chunk (0 for default) */
 );
 
+/* high-level API: compression mode and parameter settings ----------------- */
+
+/* unspecified configuration */
+zfp_config /* compression mode and parameter settings */
+zfp_config_none(void);
+
+/* fixed-rate configuration */
+zfp_config       /* compression mode and parameter settings */
+zfp_config_rate(
+  double rate,   /* desired rate in compressed bits/scalar */
+  zfp_bool align /* word-aligned blocks, e.g., for write random access */
+);
+
+/* fixed-precision configuration */
+zfp_config       /* compression mode and parameter settings */
+zfp_config_precision(
+  uint precision /* desired precision in uncompressed bits/scalar */
+);
+
+/* fixed-accuracy configuration */
+zfp_config         /* compression mode and parameter settings */
+zfp_config_accuracy(
+  double tolerance /* desired error tolerance */
+);
+
+/* reversible (lossless) configuration */
+zfp_config /* compression mode and parameter settings */
+zfp_config_reversible(void);
+
+/* expert configuration */
+zfp_config      /* compression mode and parameter settings */
+zfp_config_expert(
+  uint minbits, /* minimum number of bits per 4^d block */
+  uint maxbits, /* maximum number of bits per 4^d block */
+  uint maxprec, /* maximum precision (# bit planes coded) */
+  int minexp    /* minimum base-2 exponent; error <= 2^minexp */
+);
+
 /* high-level API: uncompressed array construction/destruction ------------- */
 
 /* allocate field struct */
 zfp_field* /* pointer to default initialized field */
-zfp_field_alloc();
+zfp_field_alloc(void);
 
 /* allocate metadata for 1D field f[nx] */
 zfp_field*       /* allocated field metadata */
 zfp_field_1d(
   void* pointer, /* pointer to uncompressed scalars (may be NULL) */
   zfp_type type, /* scalar type */
-  uint nx        /* number of scalars */
+  size_t nx      /* number of scalars */
 );
 
 /* allocate metadata for 2D field f[ny][nx] */
@@ -373,8 +388,8 @@ zfp_field*       /* allocated field metadata */
 zfp_field_2d(
   void* pointer, /* pointer to uncompressed scalars (may be NULL) */
   zfp_type type, /* scalar type */
-  uint nx,       /* number of scalars in x dimension */
-  uint ny        /* number of scalars in y dimension */
+  size_t nx,     /* number of scalars in x dimension */
+  size_t ny      /* number of scalars in y dimension */
 );
 
 /* allocate metadata for 3D field f[nz][ny][nx] */
@@ -382,9 +397,9 @@ zfp_field*       /* allocated field metadata */
 zfp_field_3d(
   void* pointer, /* pointer to uncompressed scalars (may be NULL) */
   zfp_type type, /* scalar type */
-  uint nx,       /* number of scalars in x dimension */
-  uint ny,       /* number of scalars in y dimension */
-  uint nz        /* number of scalars in z dimension */
+  size_t nx,     /* number of scalars in x dimension */
+  size_t ny,     /* number of scalars in y dimension */
+  size_t nz      /* number of scalars in z dimension */
 );
 
 /* allocate metadata for 4D field f[nw][nz][ny][nx] */
@@ -392,10 +407,10 @@ zfp_field*       /* allocated field metadata */
 zfp_field_4d(
   void* pointer, /* pointer to uncompressed scalars (may be NULL) */
   zfp_type type, /* scalar type */
-  uint nx,       /* number of scalars in x dimension */
-  uint ny,       /* number of scalars in y dimension */
-  uint nz,       /* number of scalars in z dimension */
-  uint nw        /* number of scalars in w dimension */
+  size_t nx,     /* number of scalars in x dimension */
+  size_t ny,     /* number of scalars in y dimension */
+  size_t nz,     /* number of scalars in z dimension */
+  size_t nw      /* number of scalars in w dimension */
 );
 
 /* deallocate field metadata */
@@ -412,6 +427,12 @@ zfp_field_pointer(
   const zfp_field* field /* field metadata */
 );
 
+/* pointer to lowest memory address spanned by field */
+void*
+zfp_field_begin(
+  const zfp_field* field /* field metadata */
+);
+
 /* field scalar type */
 zfp_type                 /* scalar type */
 zfp_field_type(
@@ -424,7 +445,7 @@ zfp_field_precision(
   const zfp_field* field /* field metadata */
 );
 
-/* field dimensionality (1, 2, or 3) */
+/* field dimensionality (1, 2, 3, or 4) */
 uint                     /* number of dimensions */
 zfp_field_dimensionality(
   const zfp_field* field /* field metadata */
@@ -434,14 +455,32 @@ zfp_field_dimensionality(
 size_t                    /* total number of scalars */
 zfp_field_size(
   const zfp_field* field, /* field metadata */
-  uint* size              /* number of scalars per dimension (may be NULL) */
+  size_t* size            /* number of scalars per dimension (may be NULL) */
+);
+
+/* number of bytes spanned by field data including gaps (if any) */
+size_t
+zfp_field_size_bytes(
+  const zfp_field* field /* field metadata */
+);
+
+/* field size in number of blocks */
+size_t                   /* total number of blocks */
+zfp_field_blocks(
+  const zfp_field* field /* field metadata */
 );
 
 /* field strides per dimension */
-int                       /* zero if array is contiguous */
+zfp_bool                  /* true if array is not contiguous */
 zfp_field_stride(
   const zfp_field* field, /* field metadata */
-  int* stride             /* stride in scalars per dimension (may be NULL) */
+  ptrdiff_t* stride       /* stride in scalars per dimension (may be NULL) */
+);
+
+/* field contiguity test */
+zfp_bool                 /* true if field layout is contiguous */
+zfp_field_is_contiguous(
+  const zfp_field* field /* field metadata */
 );
 
 /* field scalar type and dimensions */
@@ -470,72 +509,72 @@ zfp_field_set_type(
 void
 zfp_field_set_size_1d(
   zfp_field* field, /* field metadata */
-  uint nx           /* number of scalars */
+  size_t nx         /* number of scalars */
 );
 
 /* set 2D field size */
 void
 zfp_field_set_size_2d(
   zfp_field* field, /* field metadata */
-  uint nx,          /* number of scalars in x dimension */
-  uint ny           /* number of scalars in y dimension */
+  size_t nx,        /* number of scalars in x dimension */
+  size_t ny         /* number of scalars in y dimension */
 );
 
 /* set 3D field size */
 void
 zfp_field_set_size_3d(
   zfp_field* field, /* field metadata */
-  uint nx,          /* number of scalars in x dimension */
-  uint ny,          /* number of scalars in y dimension */
-  uint nz           /* number of scalars in z dimension */
+  size_t nx,        /* number of scalars in x dimension */
+  size_t ny,        /* number of scalars in y dimension */
+  size_t nz         /* number of scalars in z dimension */
 );
 
 /* set 4D field size */
 void
 zfp_field_set_size_4d(
   zfp_field* field, /* field metadata */
-  uint nx,          /* number of scalars in x dimension */
-  uint ny,          /* number of scalars in y dimension */
-  uint nz,          /* number of scalars in z dimension */
-  uint nw           /* number of scalars in w dimension */
+  size_t nx,        /* number of scalars in x dimension */
+  size_t ny,        /* number of scalars in y dimension */
+  size_t nz,        /* number of scalars in z dimension */
+  size_t nw         /* number of scalars in w dimension */
 );
 
 /* set 1D field stride in number of scalars */
 void
 zfp_field_set_stride_1d(
   zfp_field* field, /* field metadata */
-  int sx            /* stride in number of scalars: &f[1] - &f[0] */
+  ptrdiff_t sx      /* stride in number of scalars: &f[1] - &f[0] */
 );
 
 /* set 2D field strides in number of scalars */
 void
 zfp_field_set_stride_2d(
   zfp_field* field, /* field metadata */
-  int sx,           /* stride in x dimension: &f[0][1] - &f[0][0] */
-  int sy            /* stride in y dimension: &f[1][0] - &f[0][0] */
+  ptrdiff_t sx,     /* stride in x dimension: &f[0][1] - &f[0][0] */
+  ptrdiff_t sy      /* stride in y dimension: &f[1][0] - &f[0][0] */
 );
 
 /* set 3D field strides in number of scalars */
 void
 zfp_field_set_stride_3d(
   zfp_field* field, /* field metadata */
-  int sx,           /* stride in x dimension: &f[0][0][1] - &f[0][0][0] */
-  int sy,           /* stride in y dimension: &f[0][1][0] - &f[0][0][0] */
-  int sz            /* stride in z dimension: &f[1][0][0] - &f[0][0][0] */
+  ptrdiff_t sx,     /* stride in x dimension: &f[0][0][1] - &f[0][0][0] */
+  ptrdiff_t sy,     /* stride in y dimension: &f[0][1][0] - &f[0][0][0] */
+  ptrdiff_t sz      /* stride in z dimension: &f[1][0][0] - &f[0][0][0] */
 );
 
 /* set 4D field strides in number of scalars */
 void
 zfp_field_set_stride_4d(
   zfp_field* field, /* field metadata */
-  int sx,           /* stride in x dimension: &f[0][0][0][1] - &f[0][0][0][0] */
-  int sy,           /* stride in y dimension: &f[0][0][1][0] - &f[0][0][0][0] */
-  int sz,           /* stride in z dimension: &f[0][1][0][0] - &f[0][0][0][0] */
-  int sw            /* stride in w dimension: &f[1][0][0][0] - &f[0][0][0][0] */
+  ptrdiff_t sx,     /* stride in x dimension: &f[0][0][0][1] - &f[0][0][0][0] */
+  ptrdiff_t sy,     /* stride in y dimension: &f[0][0][1][0] - &f[0][0][0][0] */
+  ptrdiff_t sz,     /* stride in z dimension: &f[0][1][0][0] - &f[0][0][0][0] */
+  ptrdiff_t sw      /* stride in w dimension: &f[1][0][0][0] - &f[0][0][0][0] */
 );
 
 /* set field scalar type and dimensions */
-int                 /* nonzero upon success */
+zfp_bool            /* true upon success */
 zfp_field_set_metadata(
   zfp_field* field, /* field metadata */
   uint64 meta       /* compact 52-bit encoding of metadata */
@@ -600,68 +639,68 @@ needed for the compressed block.
 */
 
 /* encode 1D contiguous block of 4 values */
-uint zfp_encode_block_int32_1(zfp_stream* stream, const int32* block);
-uint zfp_encode_block_int64_1(zfp_stream* stream, const int64* block);
-uint zfp_encode_block_float_1(zfp_stream* stream, const float* block);
-uint zfp_encode_block_double_1(zfp_stream* stream, const double* block);
+size_t zfp_encode_block_int32_1(zfp_stream* stream, const int32* block);
+size_t zfp_encode_block_int64_1(zfp_stream* stream, const int64* block);
+size_t zfp_encode_block_float_1(zfp_stream* stream, const float* block);
+size_t zfp_encode_block_double_1(zfp_stream* stream, const double* block);
 
 /* encode 1D complete or partial block from strided array */
-uint zfp_encode_block_strided_int32_1(zfp_stream* stream, const int32* p, int sx);
-uint zfp_encode_block_strided_int64_1(zfp_stream* stream, const int64* p, int sx);
-uint zfp_encode_block_strided_float_1(zfp_stream* stream, const float* p, int sx);
-uint zfp_encode_block_strided_double_1(zfp_stream* stream, const double* p, int sx);
-uint zfp_encode_partial_block_strided_int32_1(zfp_stream* stream, const int32* p, uint nx, int sx);
-uint zfp_encode_partial_block_strided_int64_1(zfp_stream* stream, const int64* p, uint nx, int sx);
-uint zfp_encode_partial_block_strided_float_1(zfp_stream* stream, const float* p, uint nx, int sx);
-uint zfp_encode_partial_block_strided_double_1(zfp_stream* stream, const double* p, uint nx, int sx);
+size_t zfp_encode_block_strided_int32_1(zfp_stream* stream, const int32* p, ptrdiff_t sx);
+size_t zfp_encode_block_strided_int64_1(zfp_stream* stream, const int64* p, ptrdiff_t sx);
+size_t zfp_encode_block_strided_float_1(zfp_stream* stream, const float* p, ptrdiff_t sx);
+size_t zfp_encode_block_strided_double_1(zfp_stream* stream, const double* p, ptrdiff_t sx);
+size_t zfp_encode_partial_block_strided_int32_1(zfp_stream* stream, const int32* p, size_t nx, ptrdiff_t sx);
+size_t zfp_encode_partial_block_strided_int64_1(zfp_stream* stream, const int64* p, size_t nx, ptrdiff_t sx);
+size_t zfp_encode_partial_block_strided_float_1(zfp_stream* stream, const float* p, size_t nx, ptrdiff_t sx);
+size_t zfp_encode_partial_block_strided_double_1(zfp_stream* stream, const double* p, size_t nx, ptrdiff_t sx);
 
 /* encode 2D contiguous block of 4x4 values */
-uint zfp_encode_block_int32_2(zfp_stream* stream, const int32* block);
-uint zfp_encode_block_int64_2(zfp_stream* stream, const int64* block);
-uint zfp_encode_block_float_2(zfp_stream* stream, const float* block);
-uint zfp_encode_block_double_2(zfp_stream* stream, const double* block);
+size_t zfp_encode_block_int32_2(zfp_stream* stream, const int32* block);
+size_t zfp_encode_block_int64_2(zfp_stream* stream, const int64* block);
+size_t zfp_encode_block_float_2(zfp_stream* stream, const float* block);
+size_t zfp_encode_block_double_2(zfp_stream* stream, const double* block);
 
 /* encode 2D complete or partial block from strided array */
-uint zfp_encode_partial_block_strided_int32_2(zfp_stream* stream, const int32* p, uint nx, uint ny, int sx, int sy);
-uint zfp_encode_partial_block_strided_int64_2(zfp_stream* stream, const int64* p, uint nx, uint ny, int sx, int sy);
-uint zfp_encode_partial_block_strided_float_2(zfp_stream* stream, const float* p, uint nx, uint ny, int sx, int sy);
-uint zfp_encode_partial_block_strided_double_2(zfp_stream* stream, const double* p, uint nx, uint ny, int sx, int sy);
-uint zfp_encode_block_strided_int32_2(zfp_stream* stream, const int32* p, int sx, int sy);
-uint zfp_encode_block_strided_int64_2(zfp_stream* stream, const int64* p, int sx, int sy);
-uint zfp_encode_block_strided_float_2(zfp_stream* stream, const float* p, int sx, int sy);
-uint zfp_encode_block_strided_double_2(zfp_stream* stream, const double* p, int sx, int sy);
+size_t zfp_encode_partial_block_strided_int32_2(zfp_stream* stream, const int32* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_partial_block_strided_int64_2(zfp_stream* stream, const int64* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_partial_block_strided_float_2(zfp_stream* stream, const float* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_partial_block_strided_double_2(zfp_stream* stream, const double* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_block_strided_int32_2(zfp_stream* stream, const int32* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_block_strided_int64_2(zfp_stream* stream, const int64* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_block_strided_float_2(zfp_stream* stream, const float* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_encode_block_strided_double_2(zfp_stream* stream, const double* p, ptrdiff_t sx, ptrdiff_t sy);
 
 /* encode 3D contiguous block of 4x4x4 values */
-uint zfp_encode_block_int32_3(zfp_stream* stream, const int32* block);
-uint zfp_encode_block_int64_3(zfp_stream* stream, const int64* block);
-uint zfp_encode_block_float_3(zfp_stream* stream, const float* block);
-uint zfp_encode_block_double_3(zfp_stream* stream, const double* block);
+size_t zfp_encode_block_int32_3(zfp_stream* stream, const int32* block);
+size_t zfp_encode_block_int64_3(zfp_stream* stream, const int64* block);
+size_t zfp_encode_block_float_3(zfp_stream* stream, const float* block);
+size_t zfp_encode_block_double_3(zfp_stream* stream, const double* block);
 
 /* encode 3D complete or partial block from strided array */
-uint zfp_encode_block_strided_int32_3(zfp_stream* stream, const int32* p, int sx, int sy, int sz);
-uint zfp_encode_block_strided_int64_3(zfp_stream* stream, const int64* p, int sx, int sy, int sz);
-uint zfp_encode_block_strided_float_3(zfp_stream* stream, const float* p, int sx, int sy, int sz);
-uint zfp_encode_block_strided_double_3(zfp_stream* stream, const double* p, int sx, int sy, int sz);
-uint zfp_encode_partial_block_strided_int32_3(zfp_stream* stream, const int32* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
-uint zfp_encode_partial_block_strided_int64_3(zfp_stream* stream, const int64* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
-uint zfp_encode_partial_block_strided_float_3(zfp_stream* stream, const float* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
-uint zfp_encode_partial_block_strided_double_3(zfp_stream* stream, const double* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
+size_t zfp_encode_block_strided_int32_3(zfp_stream* stream, const int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_block_strided_int64_3(zfp_stream* stream, const int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_block_strided_float_3(zfp_stream* stream, const float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_block_strided_double_3(zfp_stream* stream, const double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_partial_block_strided_int32_3(zfp_stream* stream, const int32* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_partial_block_strided_int64_3(zfp_stream* stream, const int64* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_partial_block_strided_float_3(zfp_stream* stream, const float* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_encode_partial_block_strided_double_3(zfp_stream* stream, const double* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
 
 /* encode 4D contiguous block of 4x4x4x4 values */
-uint zfp_encode_block_int32_4(zfp_stream* stream, const int32* block);
-uint zfp_encode_block_int64_4(zfp_stream* stream, const int64* block);
-uint zfp_encode_block_float_4(zfp_stream* stream, const float* block);
-uint zfp_encode_block_double_4(zfp_stream* stream, const double* block);
+size_t zfp_encode_block_int32_4(zfp_stream* stream, const int32* block);
+size_t zfp_encode_block_int64_4(zfp_stream* stream, const int64* block);
+size_t zfp_encode_block_float_4(zfp_stream* stream, const float* block);
+size_t zfp_encode_block_double_4(zfp_stream* stream, const double* block);
 
 /* encode 4D complete or partial block from strided array */
-uint zfp_encode_block_strided_int32_4(zfp_stream* stream, const int32* p, int sx, int sy, int sz, int sw);
-uint zfp_encode_block_strided_int64_4(zfp_stream* stream, const int64* p, int sx, int sy, int sz, int sw);
-uint zfp_encode_block_strided_float_4(zfp_stream* stream, const float* p, int sx, int sy, int sz, int sw);
-uint zfp_encode_block_strided_double_4(zfp_stream* stream, const double* p, int sx, int sy, int sz, int sw);
-uint zfp_encode_partial_block_strided_int32_4(zfp_stream* stream, const int32* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
-uint zfp_encode_partial_block_strided_int64_4(zfp_stream* stream, const int64* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
-uint zfp_encode_partial_block_strided_float_4(zfp_stream* stream, const float* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
-uint zfp_encode_partial_block_strided_double_4(zfp_stream* stream, const double* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
+size_t zfp_encode_block_strided_int32_4(zfp_stream* stream, const int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_block_strided_int64_4(zfp_stream* stream, const int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_block_strided_float_4(zfp_stream* stream, const float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_block_strided_double_4(zfp_stream* stream, const double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_partial_block_strided_int32_4(zfp_stream* stream, const int32* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_partial_block_strided_int64_4(zfp_stream* stream, const int64* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_partial_block_strided_float_4(zfp_stream* stream, const float* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_encode_partial_block_strided_double_4(zfp_stream* stream, const double* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
 
 /* low-level API: decoder -------------------------------------------------- */
 
@@ -672,68 +711,68 @@ further details.
 */
 
 /* decode 1D contiguous block of 4 values */
-uint zfp_decode_block_int32_1(zfp_stream* stream, int32* block);
-uint zfp_decode_block_int64_1(zfp_stream* stream, int64* block);
-uint zfp_decode_block_float_1(zfp_stream* stream, float* block);
-uint zfp_decode_block_double_1(zfp_stream* stream, double* block);
+size_t zfp_decode_block_int32_1(zfp_stream* stream, int32* block);
+size_t zfp_decode_block_int64_1(zfp_stream* stream, int64* block);
+size_t zfp_decode_block_float_1(zfp_stream* stream, float* block);
+size_t zfp_decode_block_double_1(zfp_stream* stream, double* block);
 
 /* decode 1D complete or partial block from strided array */
-uint zfp_decode_block_strided_int32_1(zfp_stream* stream, int32* p, int sx);
-uint zfp_decode_block_strided_int64_1(zfp_stream* stream, int64* p, int sx);
-uint zfp_decode_block_strided_float_1(zfp_stream* stream, float* p, int sx);
-uint zfp_decode_block_strided_double_1(zfp_stream* stream, double* p, int sx);
-uint zfp_decode_partial_block_strided_int32_1(zfp_stream* stream, int32* p, uint nx, int sx);
-uint zfp_decode_partial_block_strided_int64_1(zfp_stream* stream, int64* p, uint nx, int sx);
-uint zfp_decode_partial_block_strided_float_1(zfp_stream* stream, float* p, uint nx, int sx);
-uint zfp_decode_partial_block_strided_double_1(zfp_stream* stream, double* p, uint nx, int sx);
+size_t zfp_decode_block_strided_int32_1(zfp_stream* stream, int32* p, ptrdiff_t sx);
+size_t zfp_decode_block_strided_int64_1(zfp_stream* stream, int64* p, ptrdiff_t sx);
+size_t zfp_decode_block_strided_float_1(zfp_stream* stream, float* p, ptrdiff_t sx);
+size_t zfp_decode_block_strided_double_1(zfp_stream* stream, double* p, ptrdiff_t sx);
+size_t zfp_decode_partial_block_strided_int32_1(zfp_stream* stream, int32* p, size_t nx, ptrdiff_t sx);
+size_t zfp_decode_partial_block_strided_int64_1(zfp_stream* stream, int64* p, size_t nx, ptrdiff_t sx);
+size_t zfp_decode_partial_block_strided_float_1(zfp_stream* stream, float* p, size_t nx, ptrdiff_t sx);
+size_t zfp_decode_partial_block_strided_double_1(zfp_stream* stream, double* p, size_t nx, ptrdiff_t sx);
 
 /* decode 2D contiguous block of 4x4 values */
-uint zfp_decode_block_int32_2(zfp_stream* stream, int32* block);
-uint zfp_decode_block_int64_2(zfp_stream* stream, int64* block);
-uint zfp_decode_block_float_2(zfp_stream* stream, float* block);
-uint zfp_decode_block_double_2(zfp_stream* stream, double* block);
+size_t zfp_decode_block_int32_2(zfp_stream* stream, int32* block);
+size_t zfp_decode_block_int64_2(zfp_stream* stream, int64* block);
+size_t zfp_decode_block_float_2(zfp_stream* stream, float* block);
+size_t zfp_decode_block_double_2(zfp_stream* stream, double* block);
 
 /* decode 2D complete or partial block from strided array */
-uint zfp_decode_block_strided_int32_2(zfp_stream* stream, int32* p, int sx, int sy);
-uint zfp_decode_block_strided_int64_2(zfp_stream* stream, int64* p, int sx, int sy);
-uint zfp_decode_block_strided_float_2(zfp_stream* stream, float* p, int sx, int sy);
-uint zfp_decode_block_strided_double_2(zfp_stream* stream, double* p, int sx, int sy);
-uint zfp_decode_partial_block_strided_int32_2(zfp_stream* stream, int32* p, uint nx, uint ny, int sx, int sy);
-uint zfp_decode_partial_block_strided_int64_2(zfp_stream* stream, int64* p, uint nx, uint ny, int sx, int sy);
-uint zfp_decode_partial_block_strided_float_2(zfp_stream* stream, float* p, uint nx, uint ny, int sx, int sy);
-uint zfp_decode_partial_block_strided_double_2(zfp_stream* stream, double* p, uint nx, uint ny, int sx, int sy);
+size_t zfp_decode_block_strided_int32_2(zfp_stream* stream, int32* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_block_strided_int64_2(zfp_stream* stream, int64* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_block_strided_float_2(zfp_stream* stream, float* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_block_strided_double_2(zfp_stream* stream, double* p, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_partial_block_strided_int32_2(zfp_stream* stream, int32* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_partial_block_strided_int64_2(zfp_stream* stream, int64* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_partial_block_strided_float_2(zfp_stream* stream, float* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+size_t zfp_decode_partial_block_strided_double_2(zfp_stream* stream, double* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
 
 /* decode 3D contiguous block of 4x4x4 values */
-uint zfp_decode_block_int32_3(zfp_stream* stream, int32* block);
-uint zfp_decode_block_int64_3(zfp_stream* stream, int64* block);
-uint zfp_decode_block_float_3(zfp_stream* stream, float* block);
-uint zfp_decode_block_double_3(zfp_stream* stream, double* block);
+size_t zfp_decode_block_int32_3(zfp_stream* stream, int32* block);
+size_t zfp_decode_block_int64_3(zfp_stream* stream, int64* block);
+size_t zfp_decode_block_float_3(zfp_stream* stream, float* block);
+size_t zfp_decode_block_double_3(zfp_stream* stream, double* block);
 
 /* decode 3D complete or partial block from strided array */
-uint zfp_decode_block_strided_int32_3(zfp_stream* stream, int32* p, int sx, int sy, int sz);
-uint zfp_decode_block_strided_int64_3(zfp_stream* stream, int64* p, int sx, int sy, int sz);
-uint zfp_decode_block_strided_float_3(zfp_stream* stream, float* p, int sx, int sy, int sz);
-uint zfp_decode_block_strided_double_3(zfp_stream* stream, double* p, int sx, int sy, int sz);
-uint zfp_decode_partial_block_strided_int32_3(zfp_stream* stream, int32* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
-uint zfp_decode_partial_block_strided_int64_3(zfp_stream* stream, int64* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
-uint zfp_decode_partial_block_strided_float_3(zfp_stream* stream, float* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
-uint zfp_decode_partial_block_strided_double_3(zfp_stream* stream, double* p, uint nx, uint ny, uint nz, int sx, int sy, int sz);
+size_t zfp_decode_block_strided_int32_3(zfp_stream* stream, int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_block_strided_int64_3(zfp_stream* stream, int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_block_strided_float_3(zfp_stream* stream, float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_block_strided_double_3(zfp_stream* stream, double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_partial_block_strided_int32_3(zfp_stream* stream, int32* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_partial_block_strided_int64_3(zfp_stream* stream, int64* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_partial_block_strided_float_3(zfp_stream* stream, float* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+size_t zfp_decode_partial_block_strided_double_3(zfp_stream* stream, double* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
 
 /* decode 4D contiguous block of 4x4x4x4 values */
-uint zfp_decode_block_int32_4(zfp_stream* stream, int32* block);
-uint zfp_decode_block_int64_4(zfp_stream* stream, int64* block);
-uint zfp_decode_block_float_4(zfp_stream* stream, float* block);
-uint zfp_decode_block_double_4(zfp_stream* stream, double* block);
+size_t zfp_decode_block_int32_4(zfp_stream* stream, int32* block);
+size_t zfp_decode_block_int64_4(zfp_stream* stream, int64* block);
+size_t zfp_decode_block_float_4(zfp_stream* stream, float* block);
+size_t zfp_decode_block_double_4(zfp_stream* stream, double* block);
 
 /* decode 4D complete or partial block from strided array */
-uint zfp_decode_block_strided_int32_4(zfp_stream* stream, int32* p, int sx, int sy, int sz, int sw);
-uint zfp_decode_block_strided_int64_4(zfp_stream* stream, int64* p, int sx, int sy, int sz, int sw);
-uint zfp_decode_block_strided_float_4(zfp_stream* stream, float* p, int sx, int sy, int sz, int sw);
-uint zfp_decode_block_strided_double_4(zfp_stream* stream, double* p, int sx, int sy, int sz, int sw);
-uint zfp_decode_partial_block_strided_int32_4(zfp_stream* stream, int32* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
-uint zfp_decode_partial_block_strided_int64_4(zfp_stream* stream, int64* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
-uint zfp_decode_partial_block_strided_float_4(zfp_stream* stream, float* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
-uint zfp_decode_partial_block_strided_double_4(zfp_stream* stream, double* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw);
+size_t zfp_decode_block_strided_int32_4(zfp_stream* stream, int32* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_block_strided_int64_4(zfp_stream* stream, int64* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_block_strided_float_4(zfp_stream* stream, float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_block_strided_double_4(zfp_stream* stream, double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_partial_block_strided_int32_4(zfp_stream* stream, int32* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_partial_block_strided_int64_4(zfp_stream* stream, int64* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_partial_block_strided_float_4(zfp_stream* stream, float* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+size_t zfp_decode_partial_block_strided_double_4(zfp_stream* stream, double* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
 
 /* low-level API: utility functions ---------------------------------------- */
 
diff --git a/include/zfp.hpp b/include/zfp.hpp
new file mode 100644
index 00000000..5ec93fd4
--- /dev/null
+++ b/include/zfp.hpp
@@ -0,0 +1,289 @@
+#ifndef ZFP_HPP
+#define ZFP_HPP
+
+// Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC and
+// other zfp project contributors. See the top-level LICENSE file for details.
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include "zfp.h"
+
+// templated C++ wrappers around libzfp low-level C functions
+namespace zfp {
+
+// encoder declarations -------------------------------------------------------
+
+template <typename Scalar, uint dims>
+inline size_t
+encode_block(zfp_stream* zfp, const Scalar* block);
+
+template <typename Scalar>
+inline size_t
+encode_block_strided(zfp_stream* zfp, const Scalar* p, ptrdiff_t sx);
+
+template <typename Scalar>
+inline size_t
+encode_block_strided(zfp_stream* zfp, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy);
+
+template <typename Scalar>
+inline size_t
+encode_block_strided(zfp_stream* zfp, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+
+template <typename Scalar>
+inline size_t
+encode_block_strided(zfp_stream* zfp, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+
+template <typename Scalar>
+inline size_t
+encode_partial_block_strided(zfp_stream* zfp, const Scalar* p, size_t nx, ptrdiff_t sx);
+
+template <typename Scalar>
+inline size_t
+encode_partial_block_strided(zfp_stream* zfp, const Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+
+template <typename Scalar>
+inline size_t
+encode_partial_block_strided(zfp_stream* zfp, const Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+
+template <typename Scalar>
+inline size_t
+encode_partial_block_strided(zfp_stream* zfp, const Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+
+// encoder specializations ----------------------------------------------------
+
+template<>
+inline size_t
+encode_block<float, 1>(zfp_stream* zfp, const float* block) { return zfp_encode_block_float_1(zfp, block); }
+
+template<>
+inline size_t
+encode_block<float, 2>(zfp_stream* zfp, const float* block) { return zfp_encode_block_float_2(zfp, block); }
+
+template<>
+inline size_t
+encode_block<float, 3>(zfp_stream* zfp, const float* block) { return zfp_encode_block_float_3(zfp, block); }
+
+template<>
+inline size_t
+encode_block<float, 4>(zfp_stream* zfp, const float* block) { return zfp_encode_block_float_4(zfp, block); }
+
+template<>
+inline size_t
+encode_block<double, 1>(zfp_stream* zfp, const double* block) { return zfp_encode_block_double_1(zfp, block); }
+
+template<>
+inline size_t
+encode_block<double, 2>(zfp_stream* zfp, const double* block) { return zfp_encode_block_double_2(zfp, block); }
+
+template<>
+inline size_t
+encode_block<double, 3>(zfp_stream* zfp, const double* block) { return zfp_encode_block_double_3(zfp, block); }
+
+template<>
+inline size_t
+encode_block<double, 4>(zfp_stream* zfp, const double* block) { return zfp_encode_block_double_4(zfp, block); }
+
+template <>
+inline size_t
+encode_block_strided<float>(zfp_stream* zfp, const float* p, ptrdiff_t sx) { return zfp_encode_block_strided_float_1(zfp, p, sx); }
+
+template <>
+inline size_t
+encode_block_strided<float>(zfp_stream* zfp, const float* p, ptrdiff_t sx, ptrdiff_t sy) { return zfp_encode_block_strided_float_2(zfp, p, sx, sy); }
+
+template <>
+inline size_t
+encode_block_strided<float>(zfp_stream* zfp, const float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_encode_block_strided_float_3(zfp, p, sx, sy, sz); }
+
+template <>
+inline size_t
+encode_block_strided<float>(zfp_stream* zfp, const float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_encode_block_strided_float_4(zfp, p, sx, sy, sz, sw); }
+
+template <>
+inline size_t
+encode_block_strided<double>(zfp_stream* zfp, const double* p, ptrdiff_t sx) { return zfp_encode_block_strided_double_1(zfp, p, sx); }
+
+template <>
+inline size_t
+encode_block_strided<double>(zfp_stream* zfp, const double* p, ptrdiff_t sx, ptrdiff_t sy) { return zfp_encode_block_strided_double_2(zfp, p, sx, sy); }
+
+template <>
+inline size_t
+encode_block_strided<double>(zfp_stream* zfp, const double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_encode_block_strided_double_3(zfp, p, sx, sy, sz); }
+
+template <>
+inline size_t
+encode_block_strided<double>(zfp_stream* zfp, const double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_encode_block_strided_double_4(zfp, p, sx, sy, sz, sw); }
+
+template <>
+inline size_t
+encode_partial_block_strided<float>(zfp_stream* zfp, const float* p, size_t nx, ptrdiff_t sx)
+{ return zfp_encode_partial_block_strided_float_1(zfp, p, nx, sx); }
+
+template <>
+inline size_t
+encode_partial_block_strided<float>(zfp_stream* zfp, const float* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy) { return zfp_encode_partial_block_strided_float_2(zfp, p, nx, ny, sx, sy); }
+
+template <>
+inline size_t
+encode_partial_block_strided<float>(zfp_stream* zfp, const float* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_encode_partial_block_strided_float_3(zfp, p, nx, ny, nz, sx, sy, sz); }
+
+template <>
+inline size_t
+encode_partial_block_strided<float>(zfp_stream* zfp, const float* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_encode_partial_block_strided_float_4(zfp, p, nx, ny, nz, nw, sx, sy, sz, sw); }
+
+template <>
+inline size_t
+encode_partial_block_strided<double>(zfp_stream* zfp, const double* p, size_t nx, ptrdiff_t sx)
+{ return zfp_encode_partial_block_strided_double_1(zfp, p, nx, sx); }
+
+template <>
+inline size_t
+encode_partial_block_strided<double>(zfp_stream* zfp, const double* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy) { return zfp_encode_partial_block_strided_double_2(zfp, p, nx, ny, sx, sy); }
+
+template <>
+inline size_t
+encode_partial_block_strided<double>(zfp_stream* zfp, const double* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_encode_partial_block_strided_double_3(zfp, p, nx, ny, nz, sx, sy, sz); }
+
+template <>
+inline size_t
+encode_partial_block_strided<double>(zfp_stream* zfp, const double* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_encode_partial_block_strided_double_4(zfp, p, nx, ny, nz, nw, sx, sy, sz, sw); }
+
+// decoder declarations -------------------------------------------------------
+
+template <typename Scalar, uint dims>
+inline size_t
+decode_block(zfp_stream* zfp, Scalar* block);
+
+template <typename Scalar>
+inline size_t
+decode_block_strided(zfp_stream* zfp, Scalar* p, ptrdiff_t sx);
+
+template <typename Scalar>
+inline size_t
+decode_block_strided(zfp_stream* zfp, Scalar* p, ptrdiff_t sx, ptrdiff_t sy);
+
+template <typename Scalar>
+inline size_t
+decode_block_strided(zfp_stream* zfp, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+
+template <typename Scalar>
+inline size_t
+decode_block_strided(zfp_stream* zfp, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+
+template <typename Scalar>
+inline size_t
+decode_partial_block_strided(zfp_stream* zfp, Scalar* p, size_t nx, ptrdiff_t sx);
+
+template <typename Scalar>
+inline size_t
+decode_partial_block_strided(zfp_stream* zfp, Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy);
+
+template <typename Scalar>
+inline size_t
+decode_partial_block_strided(zfp_stream* zfp, Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz);
+
+template <typename Scalar>
+inline size_t
+decode_partial_block_strided(zfp_stream* zfp, Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw);
+
+// decoder specializations ----------------------------------------------------
+
+template<>
+inline size_t
+decode_block<float, 1>(zfp_stream* zfp, float* block) { return zfp_decode_block_float_1(zfp, block); }
+
+template<>
+inline size_t
+decode_block<float, 2>(zfp_stream* zfp, float* block) { return zfp_decode_block_float_2(zfp, block); }
+
+template<>
+inline size_t
+decode_block<float, 3>(zfp_stream* zfp, float* block) { return zfp_decode_block_float_3(zfp, block); }
+
+template<>
+inline size_t
+decode_block<float, 4>(zfp_stream* zfp, float* block) { return zfp_decode_block_float_4(zfp, block); }
+
+template<>
+inline size_t
+decode_block<double, 1>(zfp_stream* zfp, double* block) { return zfp_decode_block_double_1(zfp, block); }
+
+template<>
+inline size_t
+decode_block<double, 2>(zfp_stream* zfp, double* block) { return zfp_decode_block_double_2(zfp, block); }
+
+template<>
+inline size_t
+decode_block<double, 3>(zfp_stream* zfp, double* block) { return zfp_decode_block_double_3(zfp, block); }
+
+template<>
+inline size_t
+decode_block<double, 4>(zfp_stream* zfp, double* block) { return zfp_decode_block_double_4(zfp, block); }
+
+template <>
+inline size_t
+decode_block_strided<float>(zfp_stream* zfp, float* p, ptrdiff_t sx) { return zfp_decode_block_strided_float_1(zfp, p, sx); }
+
+template <>
+inline size_t
+decode_block_strided<float>(zfp_stream* zfp, float* p, ptrdiff_t sx, ptrdiff_t sy) { return zfp_decode_block_strided_float_2(zfp, p, sx, sy); }
+
+template <>
+inline size_t
+decode_block_strided<float>(zfp_stream* zfp, float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_decode_block_strided_float_3(zfp, p, sx, sy, sz); }
+
+template <>
+inline size_t
+decode_block_strided<float>(zfp_stream* zfp, float* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_decode_block_strided_float_4(zfp, p, sx, sy, sz, sw); }
+
+template <>
+inline size_t
+decode_block_strided<double>(zfp_stream* zfp, double* p, ptrdiff_t sx) { return zfp_decode_block_strided_double_1(zfp, p, sx); }
+
+template <>
+inline size_t
+decode_block_strided<double>(zfp_stream* zfp, double* p, ptrdiff_t sx, ptrdiff_t sy) { return zfp_decode_block_strided_double_2(zfp, p, sx, sy); }
+
+template <>
+inline size_t
+decode_block_strided<double>(zfp_stream* zfp, double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_decode_block_strided_double_3(zfp, p, sx, sy, sz); }
+
+template <>
+inline size_t
+decode_block_strided<double>(zfp_stream* zfp, double* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_decode_block_strided_double_4(zfp, p, sx, sy, sz, sw); }
+
+template <>
+inline size_t
+decode_partial_block_strided<float>(zfp_stream* zfp, float* p, size_t nx, ptrdiff_t sx) { return zfp_decode_partial_block_strided_float_1(zfp, p, nx, sx); }
+
+template <>
+inline size_t
+decode_partial_block_strided<float>(zfp_stream* zfp, float* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy) { return zfp_decode_partial_block_strided_float_2(zfp, p, nx, ny, sx, sy); }
+
+template <>
+inline size_t
+decode_partial_block_strided<float>(zfp_stream* zfp, float* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_decode_partial_block_strided_float_3(zfp, p, nx, ny, nz, sx, sy, sz); }
+
+template <>
+inline size_t
+decode_partial_block_strided<float>(zfp_stream* zfp, float* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_decode_partial_block_strided_float_4(zfp, p, nx, ny, nz, nw, sx, sy, sz, sw); }
+
+template <>
+inline size_t
+decode_partial_block_strided<double>(zfp_stream* zfp, double* p, size_t nx, ptrdiff_t sx) { return zfp_decode_partial_block_strided_double_1(zfp, p, nx, sx); }
+
+template <>
+inline size_t
+decode_partial_block_strided<double>(zfp_stream* zfp, double* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy) { return zfp_decode_partial_block_strided_double_2(zfp, p, nx, ny, sx, sy); }
+
+template <>
+inline size_t
+decode_partial_block_strided<double>(zfp_stream* zfp, double* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) { return zfp_decode_partial_block_strided_double_3(zfp, p, nx, ny, nz, sx, sy, sz); }
+
+template <>
+inline size_t
+decode_partial_block_strided<double>(zfp_stream* zfp, double* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) { return zfp_decode_partial_block_strided_double_4(zfp, p, nx, ny, nz, nw, sx, sy, sz, sw); }
+
+}
+
+#endif
diff --git a/include/zfp/array.h b/include/zfp/array.h
new file mode 100644
index 00000000..b503abc4
--- /dev/null
+++ b/include/zfp/array.h
@@ -0,0 +1,32 @@
+#ifndef CFP_ARRAY_H
+#define CFP_ARRAY_H
+
+#include <stddef.h>
+#include "zfp/internal/cfp/header.h"
+#include "zfp/internal/cfp/array1f.h"
+#include "zfp/internal/cfp/array1d.h"
+#include "zfp/internal/cfp/array2f.h"
+#include "zfp/internal/cfp/array2d.h"
+#include "zfp/internal/cfp/array3f.h"
+#include "zfp/internal/cfp/array3d.h"
+#include "zfp/internal/cfp/array4f.h"
+#include "zfp/internal/cfp/array4d.h"
+
+typedef struct {
+  cfp_array1f_api array1f;
+  cfp_array1d_api array1d;
+  cfp_array2f_api array2f;
+  cfp_array2d_api array2d;
+  cfp_array3f_api array3f;
+  cfp_array3d_api array3d;
+  cfp_array4f_api array4f;
+  cfp_array4d_api array4d;
+} cfp_api;
+
+#ifndef CFP_NAMESPACE
+  #define CFP_NAMESPACE cfp
+#endif
+
+extern_ const cfp_api CFP_NAMESPACE;
+
+#endif
diff --git a/include/zfp/array.hpp b/include/zfp/array.hpp
new file mode 100644
index 00000000..07d5b08b
--- /dev/null
+++ b/include/zfp/array.hpp
@@ -0,0 +1,95 @@
+#ifndef ZFP_ARRAY_HPP
+#define ZFP_ARRAY_HPP
+
+#include <algorithm>
+#include <climits>
+#include <string>
+#include "zfp.h"
+#include "zfp/internal/array/exception.hpp"
+
+namespace zfp {
+
+// abstract base class for compressed array of scalars
+class array {
+public:
+  #include "zfp/internal/array/header.hpp"
+
+  // factory function (see zfpfactory.h)
+  static zfp::array* construct(const zfp::array::header& header, const void* buffer = 0, size_t buffer_size_bytes = 0);
+
+  // public virtual destructor (can delete array through base class pointer)
+  virtual ~array() {}
+
+  // underlying scalar type
+  zfp_type scalar_type() const { return type; }
+
+  // dimensionality
+  uint dimensionality() const { return dims; }
+
+  // rate in bits per value
+  virtual double rate() const = 0;
+
+  // compressed data size and buffer
+  virtual size_t compressed_size() const = 0;
+  virtual void* compressed_data() const = 0;
+
+protected:
+  // default constructor
+  array() :
+    type(zfp_type_none),
+    dims(0),
+    nx(0), ny(0), nz(0), nw(0)
+  {}
+
+  // generic array with 'dims' dimensions and scalar type 'type'
+  explicit array(uint dims, zfp_type type) :
+    type(type),
+    dims(dims),
+    nx(0), ny(0), nz(0), nw(0)
+  {}
+
+  // constructor from previously-serialized compressed array
+  explicit array(uint dims, zfp_type type, const zfp::array::header& header) :
+    type(type),
+    dims(dims),
+    nx(header.size_x()), ny(header.size_y()), nz(header.size_z()), nw(header.size_w())
+  {
+    if (header.scalar_type() != type)
+      throw zfp::exception("zfp array scalar type does not match header");
+    if (header.dimensionality() != dims)
+      throw zfp::exception("zfp array dimensionality does not match header");
+  }
+
+  // copy constructor--performs a deep copy
+  array(const array& a)
+  {
+    deep_copy(a);
+  }
+
+  // assignment operator--performs a deep copy
+  array& operator=(const array& a)
+  {
+    deep_copy(a);
+    return *this;
+  }
+
+  // perform a deep copy
+  void deep_copy(const array& a)
+  {
+    // copy metadata
+    type = a.type;
+    dims = a.dims;
+    nx = a.nx;
+    ny = a.ny;
+    nz = a.nz;
+    nw = a.nw;
+  }
+
+  zfp_type type;         // scalar type
+  uint dims;             // array dimensionality (1, 2, 3, or 4)
+  size_t nx, ny, nz, nw; // array dimensions
+};
+
+}
+
+#endif
diff --git a/include/zfp/array1.hpp b/include/zfp/array1.hpp
new file mode 100644
index 00000000..6b89fefa
--- /dev/null
+++ b/include/zfp/array1.hpp
@@ -0,0 +1,265 @@
+#ifndef ZFP_ARRAY1_HPP
+#define ZFP_ARRAY1_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache1.hpp"
+#include "zfp/internal/array/handle1.hpp"
+#include "zfp/internal/array/iterator1.hpp"
+#include "zfp/internal/array/pointer1.hpp"
+#include "zfp/internal/array/reference1.hpp"
+#include "zfp/internal/array/store1.hpp"
+#include "zfp/internal/array/view1.hpp"
+
+namespace zfp {
+
+// compressed 2D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp1<Scalar>,
+  class Index = zfp::index::implicit
+>
+class array1 : public array {
+public:
+  // types utilized by nested classes
+  typedef array1 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore1<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache1<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim1::const_reference<array1> const_reference;
+  typedef zfp::internal::dim1::const_pointer<array1> const_pointer;
+  typedef zfp::internal::dim1::const_iterator<array1> const_iterator;
+  typedef zfp::internal::dim1::const_view<array1> const_view;
+  typedef zfp::internal::dim1::private_const_view<array1> private_const_view;
+  typedef zfp::internal::dim1::reference<array1> reference;
+  typedef zfp::internal::dim1::pointer<array1> pointer;
+  typedef zfp::internal::dim1::iterator<array1> iterator;
+  typedef zfp::internal::dim1::view<array1> view;
+  typedef zfp::internal::dim1::private_view<array1> private_view;
+
+  // default constructor
+  array1() :
+    array(1, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx-element array using rate bits per value, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  array1(size_t nx, double rate, const value_type* p = 0, size_t cache_size = 0) :
+    array(1, Codec::type),
+    store(nx, zfp_config_rate(rate, true)),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    if (p)
+      set(p);
+  }
+
+  // constructor, from previously-serialized compressed array
+  array1(const zfp::array::header& header, const void* buffer = 0, size_t buffer_size_bytes = 0) :
+    array(1, Codec::type, header),
+    store(header.size_x(), zfp_config_rate(header.rate(), true)),
+    cache(store)
+  {
+    if (buffer) {
+      if (buffer_size_bytes && buffer_size_bytes < store.compressed_size())
+        throw zfp::exception("buffer size is smaller than required");
+      std::memcpy(store.compressed_data(), buffer, store.compressed_size());
+    }
+  }
+
+  // copy constructor--performs a deep copy
+  array1(const array1& a) :
+    array(),
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // construction from view--perform deep copy of (sub)array
+  template <class View>
+  array1(const View& v) :
+    array(1, Codec::type),
+    store(v.size_x(), zfp_config_rate(v.rate(), true)),
+    cache(store)
+  {
+    this->nx = v.size_x();
+    // initialize array in its preferred order
+    for (iterator it = begin(); it != end(); ++it)
+      *it = v(it.i());
+  }
+
+  // virtual destructor
+  virtual ~array1() {}
+
+  // assignment operator--performs a deep copy
+  array1& operator=(const array1& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    store.resize(nx, clear);
+  }
+
+  // rate in bits per value
+  double rate() const { return store.rate(); }
+
+  // set rate in bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, true);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const ptrdiff_t sx = 1;
+    size_t block_index = 0;
+    for (size_t i = 0; i < bx; i++, p += 4)
+      cache.get_block(block_index++, p, sx);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p)
+  {
+    const size_t bx = store.block_size_x();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      for (size_t i = 0; i < bx; i++, p += 4)
+        cache.put_block(block_index++, p, sx);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4] = {};
+      while (block_index < bx)
+        cache.put_block(block_index++, block, 1);
+    }
+  }
+
+  // accessors
+  const_reference operator()(size_t i) const { return const_reference(const_cast<container_type*>(this), i); }
+  reference operator()(size_t i) { return reference(this, i); }
+
+  // flat index accessors
+  const_reference operator[](size_t index) const { return const_reference(const_cast<container_type*>(this), index); }
+  reference operator[](size_t index) { return reference(this, index); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0); }
+  const_iterator cend() const { return const_iterator(this, nx); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, 0); }
+  iterator end() { return iterator(this, nx); }
+
+protected:
+  friend class zfp::internal::dim1::const_handle<array1>;
+  friend class zfp::internal::dim1::const_reference<array1>;
+  friend class zfp::internal::dim1::const_pointer<array1>;
+  friend class zfp::internal::dim1::const_iterator<array1>;
+  friend class zfp::internal::dim1::const_view<array1>;
+  friend class zfp::internal::dim1::private_const_view<array1>;
+  friend class zfp::internal::dim1::reference<array1>;
+  friend class zfp::internal::dim1::pointer<array1>;
+  friend class zfp::internal::dim1::iterator<array1>;
+  friend class zfp::internal::dim1::view<array1>;
+  friend class zfp::internal::dim1::private_view<array1>;
+
+  // perform a deep copy
+  void deep_copy(const array1& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+
+  // inspector
+  value_type get(size_t i) const { return cache.get(i); }
+
+  // mutators (called from proxy reference)
+  void set(size_t i, value_type val) { cache.set(i, val); }
+  void add(size_t i, value_type val) { cache.ref(i) += val; }
+  void sub(size_t i, value_type val) { cache.ref(i) -= val; }
+  void mul(size_t i, value_type val) { cache.ref(i) *= val; }
+  void div(size_t i, value_type val) { cache.ref(i) /= val; }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef array1<float> array1f;
+typedef array1<double> array1d;
+
+}
+
+#endif
diff --git a/include/zfp/array2.hpp b/include/zfp/array2.hpp
new file mode 100644
index 00000000..d669f7c0
--- /dev/null
+++ b/include/zfp/array2.hpp
@@ -0,0 +1,301 @@
+#ifndef ZFP_ARRAY2_HPP
+#define ZFP_ARRAY2_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache2.hpp"
+#include "zfp/internal/array/handle2.hpp"
+#include "zfp/internal/array/iterator2.hpp"
+#include "zfp/internal/array/pointer2.hpp"
+#include "zfp/internal/array/reference2.hpp"
+#include "zfp/internal/array/store2.hpp"
+#include "zfp/internal/array/view2.hpp"
+
+namespace zfp {
+
+// compressed 2D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp2<Scalar>,
+  class Index = zfp::index::implicit
+>
+class array2 : public array {
+public:
+  // types utilized by nested classes
+  typedef array2 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore2<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache2<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim2::const_reference<array2> const_reference;
+  typedef zfp::internal::dim2::const_pointer<array2> const_pointer;
+  typedef zfp::internal::dim2::const_iterator<array2> const_iterator;
+  typedef zfp::internal::dim2::const_view<array2> const_view;
+  typedef zfp::internal::dim2::private_const_view<array2> private_const_view;
+  typedef zfp::internal::dim2::reference<array2> reference;
+  typedef zfp::internal::dim2::pointer<array2> pointer;
+  typedef zfp::internal::dim2::iterator<array2> iterator;
+  typedef zfp::internal::dim2::view<array2> view;
+  typedef zfp::internal::dim2::flat_view<array2> flat_view;
+  typedef zfp::internal::dim2::nested_view1<array2> nested_view1;
+  typedef zfp::internal::dim2::nested_view2<array2> nested_view2;
+  typedef zfp::internal::dim2::nested_view2<array2> nested_view;
+  typedef zfp::internal::dim2::private_view<array2> private_view;
+
+  // default constructor
+  array2() :
+    array(2, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx * ny array using rate bits per value, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  array2(size_t nx, size_t ny, double rate, const value_type* p = 0, size_t cache_size = 0) :
+    array(2, Codec::type),
+    store(nx, ny, zfp_config_rate(rate, true)),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    this->ny = ny;
+    if (p)
+      set(p);
+  }
+
+  // constructor, from previously-serialized compressed array
+  array2(const zfp::array::header& header, const void* buffer = 0, size_t buffer_size_bytes = 0) :
+    array(2, Codec::type, header),
+    store(header.size_x(), header.size_y(), zfp_config_rate(header.rate(), true)),
+    cache(store)
+  {
+    if (buffer) {
+      if (buffer_size_bytes && buffer_size_bytes < store.compressed_size())
+        throw zfp::exception("buffer size is smaller than required");
+      std::memcpy(store.compressed_data(), buffer, store.compressed_size());
+    }
+  }
+
+  // copy constructor--performs a deep copy
+  array2(const array2& a) :
+    array(),
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // construction from view--perform deep copy of (sub)array
+  template <class View>
+  array2(const View& v) :
+    array(2, Codec::type),
+    store(v.size_x(), v.size_y(), zfp_config_rate(v.rate(), true)),
+    cache(store)
+  {
+    this->nx = v.size_x();
+    this->ny = v.size_y();
+    // initialize array in its preferred order
+    for (iterator it = begin(); it != end(); ++it)
+      *it = v(it.i(), it.j());
+  }
+
+  // virtual destructor
+  virtual ~array2() {}
+
+  // assignment operator--performs a deep copy
+  array2& operator=(const array2& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx * ny; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, size_t ny, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    this->ny = ny;
+    store.resize(nx, ny, clear);
+  }
+
+  // rate in bits per value
+  double rate() const { return store.rate(); }
+
+  // set rate in bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, true);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const ptrdiff_t sx = 1;
+    const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+    size_t block_index = 0;
+    for (size_t j = 0; j < by; j++, p += 4 * sx * ptrdiff_t(nx - bx))
+      for (size_t i = 0; i < bx; i++, p += 4)
+        cache.get_block(block_index++, p, sx, sy);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p)
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+      for (size_t j = 0; j < by; j++, p += 4 * sx * ptrdiff_t(nx - bx))
+        for (size_t i = 0; i < bx; i++, p += 4)
+          cache.put_block(block_index++, p, sx, sy);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4 * 4] = {};
+      while (block_index < bx * by)
+        cache.put_block(block_index++, block, 1, 4);
+    }
+  }
+
+  // (i, j) accessors
+  const_reference operator()(size_t i, size_t j) const { return const_reference(const_cast<container_type*>(this), i, j); }
+  reference operator()(size_t i, size_t j) { return reference(this, i, j); }
+
+  // flat index accessors
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j;
+    ij(i, j, index);
+    return const_reference(const_cast<container_type*>(this), i, j);
+  }
+  reference operator[](size_t index)
+  {
+    size_t i, j;
+    ij(i, j, index);
+    return reference(this, i, j);
+  }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0, 0); }
+  const_iterator cend() const { return const_iterator(this, 0, ny); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, 0, 0); }
+  iterator end() { return iterator(this, 0, ny); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<array2>;
+  friend class zfp::internal::dim2::const_reference<array2>;
+  friend class zfp::internal::dim2::const_pointer<array2>;
+  friend class zfp::internal::dim2::const_iterator<array2>;
+  friend class zfp::internal::dim2::const_view<array2>;
+  friend class zfp::internal::dim2::private_const_view<array2>;
+  friend class zfp::internal::dim2::reference<array2>;
+  friend class zfp::internal::dim2::pointer<array2>;
+  friend class zfp::internal::dim2::iterator<array2>;
+  friend class zfp::internal::dim2::view<array2>;
+  friend class zfp::internal::dim2::flat_view<array2>;
+  friend class zfp::internal::dim2::nested_view1<array2>;
+  friend class zfp::internal::dim2::nested_view2<array2>;
+  friend class zfp::internal::dim2::private_view<array2>;
+
+  // perform a deep copy
+  void deep_copy(const array2& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+  size_t min_y() const { return 0; }
+  size_t max_y() const { return ny; }
+
+  // inspector
+  value_type get(size_t i, size_t j) const { return cache.get(i, j); }
+
+  // mutators (called from proxy reference)
+  void set(size_t i, size_t j, value_type val) { cache.set(i, j, val); }
+  void add(size_t i, size_t j, value_type val) { cache.ref(i, j) += val; }
+  void sub(size_t i, size_t j, value_type val) { cache.ref(i, j) -= val; }
+  void mul(size_t i, size_t j, value_type val) { cache.ref(i, j) *= val; }
+  void div(size_t i, size_t j, value_type val) { cache.ref(i, j) /= val; }
+
+  // convert flat index to (i, j)
+  void ij(size_t& i, size_t& j, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index;
+  }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef array2<float> array2f;
+typedef array2<double> array2d;
+
+}
+
+#endif
diff --git a/include/zfp/array3.hpp b/include/zfp/array3.hpp
new file mode 100644
index 00000000..7e60fade
--- /dev/null
+++ b/include/zfp/array3.hpp
@@ -0,0 +1,316 @@
+#ifndef ZFP_ARRAY3_HPP
+#define ZFP_ARRAY3_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache3.hpp"
+#include "zfp/internal/array/handle3.hpp"
+#include "zfp/internal/array/iterator3.hpp"
+#include "zfp/internal/array/pointer3.hpp"
+#include "zfp/internal/array/reference3.hpp"
+#include "zfp/internal/array/store3.hpp"
+#include "zfp/internal/array/view3.hpp"
+
+namespace zfp {
+
+// compressed 3D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp3<Scalar>,
+  class Index = zfp::index::implicit
+>
+class array3 : public array {
+public:
+  // types utilized by nested classes
+  typedef array3 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore3<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache3<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim3::const_reference<array3> const_reference;
+  typedef zfp::internal::dim3::const_pointer<array3> const_pointer;
+  typedef zfp::internal::dim3::const_iterator<array3> const_iterator;
+  typedef zfp::internal::dim3::const_view<array3> const_view;
+  typedef zfp::internal::dim3::private_const_view<array3> private_const_view;
+  typedef zfp::internal::dim3::reference<array3> reference;
+  typedef zfp::internal::dim3::pointer<array3> pointer;
+  typedef zfp::internal::dim3::iterator<array3> iterator;
+  typedef zfp::internal::dim3::view<array3> view;
+  typedef zfp::internal::dim3::flat_view<array3> flat_view;
+  typedef zfp::internal::dim3::nested_view1<array3> nested_view1;
+  typedef zfp::internal::dim3::nested_view2<array3> nested_view2;
+  typedef zfp::internal::dim3::nested_view2<array3> nested_view3;
+  typedef zfp::internal::dim3::nested_view3<array3> nested_view;
+  typedef zfp::internal::dim3::private_view<array3> private_view;
+
+  // default constructor
+  array3() :
+    array(3, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx * ny * nz array using rate bits per value, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  array3(size_t nx, size_t ny, size_t nz, double rate, const value_type* p = 0, size_t cache_size = 0) :
+    array(3, Codec::type),
+    store(nx, ny, nz, zfp_config_rate(rate, true)),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    if (p)
+      set(p);
+  }
+
+  // constructor, from previously-serialized compressed array
+  array3(const zfp::array::header& header, const void* buffer = 0, size_t buffer_size_bytes = 0) :
+    array(3, Codec::type, header),
+    store(header.size_x(), header.size_y(), header.size_z(), zfp_config_rate(header.rate(), true)),
+    cache(store)
+  {
+    if (buffer) {
+      if (buffer_size_bytes && buffer_size_bytes < store.compressed_size())
+        throw zfp::exception("buffer size is smaller than required");
+      std::memcpy(store.compressed_data(), buffer, store.compressed_size());
+    }
+  }
+
+  // copy constructor--performs a deep copy
+  array3(const array3& a) :
+    array(),
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // construction from view--perform deep copy of (sub)array
+  template <class View>
+  array3(const View& v) :
+    array(3, Codec::type),
+    store(v.size_x(), v.size_y(), v.size_z(), zfp_config_rate(v.rate(), true)),
+    cache(store)
+  {
+    this->nx = v.size_x();
+    this->ny = v.size_y();
+    this->nz = v.size_z();
+    // initialize array in its preferred order
+    for (iterator it = begin(); it != end(); ++it)
+      *it = v(it.i(), it.j(), it.k());
+  }
+
+  // virtual destructor
+  virtual ~array3() {}
+
+  // assignment operator--performs a deep copy
+  array3& operator=(const array3& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx * ny * nz; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, size_t ny, size_t nz, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    store.resize(nx, ny, nz, clear);
+  }
+
+  // rate in bits per value
+  double rate() const { return store.rate(); }
+
+  // set rate in bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, true);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    const ptrdiff_t sx = 1;
+    const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+    const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+    size_t block_index = 0;
+    for (size_t k = 0; k < bz; k++, p += 4 * sy * ptrdiff_t(ny - by))
+      for (size_t j = 0; j < by; j++, p += 4 * sx * ptrdiff_t(nx - bx))
+        for (size_t i = 0; i < bx; i++, p += 4)
+          cache.get_block(block_index++, p, sx, sy, sz);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p)
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+      const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+      for (size_t k = 0; k < bz; k++, p += 4 * sy * ptrdiff_t(ny - by))
+        for (size_t j = 0; j < by; j++, p += 4 * sx * ptrdiff_t(nx - bx))
+          for (size_t i = 0; i < bx; i++, p += 4)
+            cache.put_block(block_index++, p, sx, sy, sz);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4 * 4 * 4] = {};
+      while (block_index < bx * by * bz)
+        cache.put_block(block_index++, block, 1, 4, 16);
+    }
+  }
+
+  // (i, j, k) accessors
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(const_cast<container_type*>(this), i, j, k); }
+  reference operator()(size_t i, size_t j, size_t k) { return reference(this, i, j, k); }
+
+  // flat index accessors
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j, k;
+    ijk(i, j, k, index);
+    return const_reference(const_cast<container_type*>(this), i, j, k);
+  }
+  reference operator[](size_t index)
+  {
+    size_t i, j, k;
+    ijk(i, j, k, index);
+    return reference(this, i, j, k);
+  }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0, 0, 0); }
+  const_iterator cend() const { return const_iterator(this, 0, 0, nz); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, 0, 0, 0); }
+  iterator end() { return iterator(this, 0, 0, nz); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<array3>;
+  friend class zfp::internal::dim3::const_reference<array3>;
+  friend class zfp::internal::dim3::const_pointer<array3>;
+  friend class zfp::internal::dim3::const_iterator<array3>;
+  friend class zfp::internal::dim3::const_view<array3>;
+  friend class zfp::internal::dim3::private_const_view<array3>;
+  friend class zfp::internal::dim3::reference<array3>;
+  friend class zfp::internal::dim3::pointer<array3>;
+  friend class zfp::internal::dim3::iterator<array3>;
+  friend class zfp::internal::dim3::view<array3>;
+  friend class zfp::internal::dim3::flat_view<array3>;
+  friend class zfp::internal::dim3::nested_view1<array3>;
+  friend class zfp::internal::dim3::nested_view2<array3>;
+  friend class zfp::internal::dim3::nested_view3<array3>;
+  friend class zfp::internal::dim3::private_view<array3>;
+
+  // perform a deep copy
+  void deep_copy(const array3& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+  size_t min_y() const { return 0; }
+  size_t max_y() const { return ny; }
+  size_t min_z() const { return 0; }
+  size_t max_z() const { return nz; }
+
+  // inspector
+  value_type get(size_t i, size_t j, size_t k) const { return cache.get(i, j, k); }
+
+  // mutators (called from proxy reference)
+  void set(size_t i, size_t j, size_t k, value_type val) { cache.set(i, j, k, val); }
+  void add(size_t i, size_t j, size_t k, value_type val) { cache.ref(i, j, k) += val; }
+  void sub(size_t i, size_t j, size_t k, value_type val) { cache.ref(i, j, k) -= val; }
+  void mul(size_t i, size_t j, size_t k, value_type val) { cache.ref(i, j, k) *= val; }
+  void div(size_t i, size_t j, size_t k, value_type val) { cache.ref(i, j, k) /= val; }
+
+  // convert flat index to (i, j, k)
+  void ijk(size_t& i, size_t& j, size_t& k, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny; index /= ny;
+    k = index;
+  }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef array3<float> array3f;
+typedef array3<double> array3d;
+
+}
+
+#endif
diff --git a/include/zfp/array4.hpp b/include/zfp/array4.hpp
new file mode 100644
index 00000000..19c1d811
--- /dev/null
+++ b/include/zfp/array4.hpp
@@ -0,0 +1,331 @@
+#ifndef ZFP_ARRAY4_HPP
+#define ZFP_ARRAY4_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache4.hpp"
+#include "zfp/internal/array/handle4.hpp"
+#include "zfp/internal/array/iterator4.hpp"
+#include "zfp/internal/array/pointer4.hpp"
+#include "zfp/internal/array/reference4.hpp"
+#include "zfp/internal/array/store4.hpp"
+#include "zfp/internal/array/view4.hpp"
+
+namespace zfp {
+
+// compressed 3D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp4<Scalar>,
+  class Index = zfp::index::implicit
+>
+class array4 : public array {
+public:
+  // types utilized by nested classes
+  typedef array4 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore4<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache4<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim4::const_reference<array4> const_reference;
+  typedef zfp::internal::dim4::const_pointer<array4> const_pointer;
+  typedef zfp::internal::dim4::const_iterator<array4> const_iterator;
+  typedef zfp::internal::dim4::const_view<array4> const_view;
+  typedef zfp::internal::dim4::private_const_view<array4> private_const_view;
+  typedef zfp::internal::dim4::reference<array4> reference;
+  typedef zfp::internal::dim4::pointer<array4> pointer;
+  typedef zfp::internal::dim4::iterator<array4> iterator;
+  typedef zfp::internal::dim4::view<array4> view;
+  typedef zfp::internal::dim4::flat_view<array4> flat_view;
+  typedef zfp::internal::dim4::nested_view1<array4> nested_view1;
+  typedef zfp::internal::dim4::nested_view2<array4> nested_view2;
+  typedef zfp::internal::dim4::nested_view3<array4> nested_view3;
+  typedef zfp::internal::dim4::nested_view4<array4> nested_view4;
+  typedef zfp::internal::dim4::nested_view4<array4> nested_view;
+  typedef zfp::internal::dim4::private_view<array4> private_view;
+
+  // default constructor
+  array4() :
+    array(4, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx * ny * nz * nw array using rate bits per value, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  array4(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const value_type* p = 0, size_t cache_size = 0) :
+    array(4, Codec::type),
+    store(nx, ny, nz, nw, zfp_config_rate(rate, true)),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    this->nw = nw;
+    if (p)
+      set(p);
+  }
+
+  // constructor, from previously-serialized compressed array
+  array4(const zfp::array::header& header, const void* buffer = 0, size_t buffer_size_bytes = 0) :
+    array(4, Codec::type, header),
+    store(header.size_x(), header.size_y(), header.size_z(), header.size_w(), zfp_config_rate(header.rate(), true)),
+    cache(store)
+  {
+    if (buffer) {
+      if (buffer_size_bytes && buffer_size_bytes < store.compressed_size())
+        throw zfp::exception("buffer size is smaller than required");
+      std::memcpy(store.compressed_data(), buffer, store.compressed_size());
+    }
+  }
+
+  // copy constructor--performs a deep copy
+  array4(const array4& a) :
+    array(),
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // construction from view--perform deep copy of (sub)array
+  template <class View>
+  array4(const View& v) :
+    array(4, Codec::type),
+    store(v.size_x(), v.size_y(), v.size_z(), v.size_w(), zfp_config_rate(v.rate(), true)),
+    cache(store)
+  {
+    this->nx = v.size_x();
+    this->ny = v.size_y();
+    this->nz = v.size_z();
+    this->nw = v.size_w();
+    // initialize array in its preferred order
+    for (iterator it = begin(); it != end(); ++it)
+      *it = v(it.i(), it.j(), it.k(), it.l());
+  }
+
+  // virtual destructor
+  virtual ~array4() {}
+
+  // assignment operator--performs a deep copy
+  array4& operator=(const array4& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx * ny * nz * nw; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+  size_t size_w() const { return nw; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, size_t ny, size_t nz, size_t nw, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    this->nw = nw;
+    store.resize(nx, ny, nz, nw, clear);
+  }
+
+  // rate in bits per value
+  double rate() const { return store.rate(); }
+
+  // set rate in bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, true);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    const size_t bw = store.block_size_w();
+    const ptrdiff_t sx = 1;
+    const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+    const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+    const ptrdiff_t sw = static_cast<ptrdiff_t>(nx * ny * nz);
+    size_t block_index = 0;
+    for (size_t l = 0; l < bw; l++, p += 4 * sz * ptrdiff_t(nz - bz))
+      for (size_t k = 0; k < bz; k++, p += 4 * sy * ptrdiff_t(ny - by))
+        for (size_t j = 0; j < by; j++, p += 4 * sx * ptrdiff_t(nx - bx))
+          for (size_t i = 0; i < bx; i++, p += 4)
+            cache.get_block(block_index++, p, sx, sy, sz, sw);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p)
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    const size_t bw = store.block_size_w();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+      const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+      const ptrdiff_t sw = static_cast<ptrdiff_t>(nx * ny * nz);
+      for (size_t l = 0; l < bw; l++, p += 4 * sz * ptrdiff_t(nz - bz))
+        for (size_t k = 0; k < bz; k++, p += 4 * sy * ptrdiff_t(ny - by))
+          for (size_t j = 0; j < by; j++, p += 4 * sx * ptrdiff_t(nx - bx))
+            for (size_t i = 0; i < bx; i++, p += 4)
+              cache.put_block(block_index++, p, sx, sy, sz, sw);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4 * 4 * 4 * 4] = {};
+      while (block_index < bx * by * bz * bw)
+        cache.put_block(block_index++, block, 1, 4, 16, 64);
+    }
+  }
+
+  // (i, j, k) accessors
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(const_cast<container_type*>(this), i, j, k, l); }
+  reference operator()(size_t i, size_t j, size_t k, size_t l) { return reference(this, i, j, k, l); }
+
+  // flat index accessors
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j, k, l;
+    ijkl(i, j, k, l, index);
+    return const_reference(const_cast<container_type*>(this), i, j, k, l);
+  }
+  reference operator[](size_t index)
+  {
+    size_t i, j, k, l;
+    ijkl(i, j, k, l, index);
+    return reference(this, i, j, k, l);
+  }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0, 0, 0, 0); }
+  const_iterator cend() const { return const_iterator(this, 0, 0, 0, nw); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, 0, 0, 0, 0); }
+  iterator end() { return iterator(this, 0, 0, 0, nw); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<array4>;
+  friend class zfp::internal::dim4::const_reference<array4>;
+  friend class zfp::internal::dim4::const_pointer<array4>;
+  friend class zfp::internal::dim4::const_iterator<array4>;
+  friend class zfp::internal::dim4::const_view<array4>;
+  friend class zfp::internal::dim4::private_const_view<array4>;
+  friend class zfp::internal::dim4::reference<array4>;
+  friend class zfp::internal::dim4::pointer<array4>;
+  friend class zfp::internal::dim4::iterator<array4>;
+  friend class zfp::internal::dim4::view<array4>;
+  friend class zfp::internal::dim4::flat_view<array4>;
+  friend class zfp::internal::dim4::nested_view1<array4>;
+  friend class zfp::internal::dim4::nested_view2<array4>;
+  friend class zfp::internal::dim4::nested_view3<array4>;
+  friend class zfp::internal::dim4::nested_view4<array4>;
+  friend class zfp::internal::dim4::private_view<array4>;
+
+  // perform a deep copy
+  void deep_copy(const array4& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+  size_t min_y() const { return 0; }
+  size_t max_y() const { return ny; }
+  size_t min_z() const { return 0; }
+  size_t max_z() const { return nz; }
+  size_t min_w() const { return 0; }
+  size_t max_w() const { return nw; }
+
+  // inspector
+  value_type get(size_t i, size_t j, size_t k, size_t l) const { return cache.get(i, j, k, l); }
+
+  // mutators (called from proxy reference)
+  void set(size_t i, size_t j, size_t k, size_t l, value_type val) { cache.set(i, j, k, l, val); }
+  void add(size_t i, size_t j, size_t k, size_t l, value_type val) { cache.ref(i, j, k, l) += val; }
+  void sub(size_t i, size_t j, size_t k, size_t l, value_type val) { cache.ref(i, j, k, l) -= val; }
+  void mul(size_t i, size_t j, size_t k, size_t l, value_type val) { cache.ref(i, j, k, l) *= val; }
+  void div(size_t i, size_t j, size_t k, size_t l, value_type val) { cache.ref(i, j, k, l) /= val; }
+
+  // convert flat index to (i, j, k)
+  void ijkl(size_t& i, size_t& j, size_t& k, size_t& l, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny; index /= ny;
+    k = index % nz; index /= nz;
+    l = index;
+  }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef array4<float> array4f;
+typedef array4<double> array4d;
+
+}
+
+#endif
diff --git a/include/bitstream.h b/include/zfp/bitstream.h
similarity index 64%
rename from include/bitstream.h
rename to include/zfp/bitstream.h
index ad5475fe..44598227 100644
--- a/include/bitstream.h
+++ b/include/zfp/bitstream.h
@@ -2,12 +2,21 @@
 #define ZFP_BITSTREAM_H
 
 #include <stddef.h>
-#include "zfp/types.h"
-#include "zfp/system.h"
+#include "zfp/internal/zfp/types.h"
+#include "zfp/internal/zfp/system.h"
 
 /* forward declaration of opaque type */
 typedef struct bitstream bitstream;
 
+/* bit offset into stream where bits are read/written */
+typedef uint64 bitstream_offset;
+
+/* type for counting number of bits in a stream */
+typedef bitstream_offset bitstream_size;
+
+/* type for counting a small number of bits in a stream */
+typedef size_t bitstream_count;
+
 extern_ const size_t stream_word_bits; /* bit stream granularity */
 
 #ifndef inline_
@@ -24,6 +33,9 @@ void stream_close(bitstream* stream);
 /* make a copy of bit stream to shared memory buffer */
 bitstream* stream_clone(const bitstream* stream);
 
+/* word size in bits (equal to stream_word_bits) */
+bitstream_count stream_alignment(void);
+
 /* pointer to beginning of stream */
 void* stream_data(const bitstream* stream);
 
@@ -46,40 +58,40 @@ uint stream_read_bit(bitstream* stream);
 uint stream_write_bit(bitstream* stream, uint bit);
 
 /* read 0 <= n <= 64 bits */
-uint64 stream_read_bits(bitstream* stream, uint n);
+uint64 stream_read_bits(bitstream* stream, bitstream_count n);
 
 /* write 0 <= n <= 64 low bits of value and return remaining bits */
-uint64 stream_write_bits(bitstream* stream, uint64 value, uint n);
+uint64 stream_write_bits(bitstream* stream, uint64 value, bitstream_count n);
 
 /* return bit offset to next bit to be read */
-size_t stream_rtell(const bitstream* stream);
+bitstream_offset stream_rtell(const bitstream* stream);
 
 /* return bit offset to next bit to be written */
-size_t stream_wtell(const bitstream* stream);
+bitstream_offset stream_wtell(const bitstream* stream);
 
 /* rewind stream to beginning */
 void stream_rewind(bitstream* stream);
 
 /* position stream for reading at given bit offset */
-void stream_rseek(bitstream* stream, size_t offset);
+void stream_rseek(bitstream* stream, bitstream_offset offset);
 
 /* position stream for writing at given bit offset */
-void stream_wseek(bitstream* stream, size_t offset);
+void stream_wseek(bitstream* stream, bitstream_offset offset);
 
 /* skip over the next n bits */
-void stream_skip(bitstream* stream, uint n);
+void stream_skip(bitstream* stream, bitstream_size n);
 
 /* append n zero-bits to stream */
-void stream_pad(bitstream* stream, uint n);
+void stream_pad(bitstream* stream, bitstream_size n);
 
 /* align stream on next word boundary */
-size_t stream_align(bitstream* stream);
+bitstream_count stream_align(bitstream* stream);
 
 /* flush out any remaining buffered bits */
-size_t stream_flush(bitstream* stream);
+bitstream_count stream_flush(bitstream* stream);
 
 /* copy n bits from one bit stream to another */
-void stream_copy(bitstream* dst, bitstream* src, size_t n);
+void stream_copy(bitstream* dst, bitstream* src, bitstream_size n);
 
 #ifdef BIT_STREAM_STRIDED
 /* set block size in number of words and spacing in number of blocks */
diff --git a/src/inline/bitstream.c b/include/zfp/bitstream.inl
similarity index 73%
rename from src/inline/bitstream.c
rename to include/zfp/bitstream.inl
index aa58b73f..80294ee9 100644
--- a/src/inline/bitstream.c
+++ b/include/zfp/bitstream.inl
@@ -22,35 +22,36 @@ The following assumptions and restrictions apply:
    stream for writing.  In read mode, the following functions may be called:
 
      size_t stream_size(stream);
-     size_t stream_rtell(stream);
+     bitstream_offset stream_rtell(stream);
      void stream_rewind(stream);
      void stream_rseek(stream, offset);
-     void stream_skip(stream, uint n);
-     size_t stream_align(stream);
+     void stream_skip(stream, n);
+     bitstream_count stream_align(stream);
      uint stream_read_bit(stream);
      uint64 stream_read_bits(stream, n);
 
    Each of the above read calls has a corresponding write call:
 
      size_t stream_size(stream);
-     size_t stream_wtell(stream);
+     bitstream_offset stream_wtell(stream);
      void stream_rewind(stream);
      void stream_wseek(stream, offset);
      void stream_pad(stream, n);
-     size_t stream_flush(stream);
+     bitstream_count stream_flush(stream);
      uint stream_write_bit(stream, bit);
      uint64 stream_write_bits(stream, value, n);
 
 3. The stream buffer is an unsigned integer of a user-specified type given
    by the BIT_STREAM_WORD_TYPE macro.  Bits are read and written in units of
    this integer word type.  Supported types are 8, 16, 32, or 64 bits wide.
-   The bit width of the buffer is denoted by 'wsize' and can be accessed via
-   the global constant stream_word_bits.  A small wsize allows for fine
-   granularity reads and writes, and may be preferable when working with many
-   small blocks of data that require non-sequential access.  The default
-   maximum size of 64 bits ensures maximum speed.  Note that even when
-   wsize < 64, it is still possible to read and write up to 64 bits at a time
-   using stream_read_bits() and stream_write_bits().
+   The bit width of the buffer is denoted by 'wsize' and can be accessed
+   either via the global constant stream_word_bits or stream_alignment().
+   A small wsize allows for fine granularity reads and writes, and may be
+   preferable when working with many small blocks of data that require
+   non-sequential access.  The default maximum size of 64 bits ensures maximum
+   speed.  Note that even when wsize < 64, it is still possible to read and
+   write up to 64 bits at a time using stream_read_bits() and
+   stream_write_bits().
 
 4. If BIT_STREAM_STRIDED is defined, words read from or written to the stream
    may be accessed noncontiguously by setting a power-of-two block size (which
@@ -58,7 +59,7 @@ The following assumptions and restrictions apply:
    word pointer is always incremented by one word each time a word is accessed.
    Once advanced past a block boundary, the word pointer is also advanced by
    the stride to the next block.  This feature may be used to store blocks of
-   data interleaved, e.g. for progressive coding or for noncontiguous parallel
+   data interleaved, e.g., for progressive coding or for noncontiguous parallel
    access to the bit stream  Note that the block size is measured in words,
    while the stride is measured in multiples of the block size.  Strided access
    can have a significant performance penalty.
@@ -71,7 +72,7 @@ The following assumptions and restrictions apply:
    is essentially equivalent to (but faster than)
 
        for (i = 0; i < n; i++, value >>= 1)
-         stream_write_bit(value & 1);
+         stream_write_bit(stream, value & 1);
 
    when 0 <= n <= 64.  The same holds for read calls, and thus
 
@@ -80,11 +81,15 @@ The following assumptions and restrictions apply:
    is essentially equivalent to
 
        for (i = 0, value = 0; i < n; i++)
-         value += (uint64)stream_read_bit() << i;
+         value += (uint64)stream_read_bit(stream) << i;
 
    Note that it is possible to write fewer bits than the argument 'value'
    holds (possibly even no bits), in which case any unwritten bits are
-   returned.
+   shifted right to the least significant position and returned.  That is,
+   value = stream_write_bits(stream, value, n); is equivalent to
+
+       for (i = 0; i < n; i++)
+         value = stream_write_bits(stream, value, 1);
 
 6. Although the stream_wseek(stream, offset) call allows positioning the
    stream for writing at any bit offset without any data loss (i.e. all
@@ -107,41 +112,43 @@ The following assumptions and restrictions apply:
   #define inline_
 #endif
 
+#include "zfp/bitstream.h"
+
 /* satisfy compiler when args unused */
 #define unused_(x) ((void)(x))
 
 /* bit stream word/buffer type; granularity of stream I/O operations */
 #ifdef BIT_STREAM_WORD_TYPE
   /* may be 8-, 16-, 32-, or 64-bit unsigned integer type */
-  typedef BIT_STREAM_WORD_TYPE word;
+  typedef BIT_STREAM_WORD_TYPE bitstream_word;
 #else
   /* use maximum word size by default for highest speed */
-  typedef uint64 word;
+  typedef uint64 bitstream_word;
 #endif
 
 /* number of bits in a buffered word */
-#define wsize ((uint)(CHAR_BIT * sizeof(word)))
+#define wsize ((bitstream_count)(sizeof(bitstream_word) * CHAR_BIT))
 
 /* bit stream structure (opaque to caller) */
 struct bitstream {
-  uint bits;   /* number of buffered bits (0 <= bits < wsize) */
-  word buffer; /* buffer for incoming/outgoing bits (buffer < 2^bits) */
-  word* ptr;   /* pointer to next word to be read/written */
-  word* begin; /* beginning of stream */
-  word* end;   /* end of stream (currently unused) */
+  bitstream_count bits;  /* number of buffered bits (0 <= bits < wsize) */
+  bitstream_word buffer; /* incoming/outgoing bits (buffer < 2^bits) */
+  bitstream_word* ptr;   /* pointer to next word to be read/written */
+  bitstream_word* begin; /* beginning of stream */
+  bitstream_word* end;   /* end of stream (not enforced) */
 #ifdef BIT_STREAM_STRIDED
-  size_t mask;     /* one less the block size in number of words */
-  ptrdiff_t delta; /* number of words between consecutive blocks */
+  size_t mask;           /* one less the block size in number of words */
+  ptrdiff_t delta;       /* number of words between consecutive blocks */
 #endif
 };
 
 /* private functions ------------------------------------------------------- */
 
 /* read a single word from memory */
-static word
+static bitstream_word
 stream_read_word(bitstream* s)
 {
-  word w = *s->ptr++;
+  bitstream_word w = *s->ptr++;
 #ifdef BIT_STREAM_STRIDED
   if (!((s->ptr - s->begin) & s->mask))
     s->ptr += s->delta;
@@ -151,7 +158,7 @@ stream_read_word(bitstream* s)
 
 /* write a single word to memory */
 static void
-stream_write_word(bitstream* s, word value)
+stream_write_word(bitstream* s, bitstream_word value)
 {
   *s->ptr++ = value;
 #ifdef BIT_STREAM_STRIDED
@@ -162,6 +169,13 @@ stream_write_word(bitstream* s, word value)
 
 /* public functions -------------------------------------------------------- */
 
+/* word size in bits (equals bitstream_word_bits) */
+inline_ bitstream_count
+stream_alignment(void)
+{
+  return wsize;
+}
+
 /* pointer to beginning of stream */
 inline_ void*
 stream_data(const bitstream* s)
@@ -173,14 +187,14 @@ stream_data(const bitstream* s)
 inline_ size_t
 stream_size(const bitstream* s)
 {
-  return sizeof(word) * (s->ptr - s->begin);
+  return (size_t)(s->ptr - s->begin) * sizeof(bitstream_word);
 }
 
 /* byte capacity of stream */
 inline_ size_t
 stream_capacity(const bitstream* s)
 {
-  return sizeof(word) * (s->end - s->begin);
+  return (size_t)(s->end - s->begin) * sizeof(bitstream_word);
 }
 
 /* number of words per block */
@@ -226,7 +240,7 @@ stream_read_bit(bitstream* s)
 inline_ uint
 stream_write_bit(bitstream* s, uint bit)
 {
-  s->buffer += (word)bit << s->bits;
+  s->buffer += (bitstream_word)bit << s->bits;
   if (++s->bits == wsize) {
     stream_write_word(s, s->buffer);
     s->buffer = 0;
@@ -237,7 +251,7 @@ stream_write_bit(bitstream* s, uint bit)
 
 /* read 0 <= n <= 64 bits */
 inline_ uint64
-stream_read_bits(bitstream* s, uint n)
+stream_read_bits(bitstream* s, bitstream_count n)
 {
   uint64 value = s->buffer;
   if (s->bits < n) {
@@ -272,10 +286,10 @@ stream_read_bits(bitstream* s, uint n)
 
 /* write 0 <= n <= 64 low bits of value and return remaining bits */
 inline_ uint64
-stream_write_bits(bitstream* s, uint64 value, uint n)
+stream_write_bits(bitstream* s, uint64 value, bitstream_count n)
 {
   /* append bit string to buffer */
-  s->buffer += (word)(value << s->bits);
+  s->buffer += (bitstream_word)(value << s->bits);
   s->bits += n;
   /* is buffer full? */
   if (s->bits >= wsize) {
@@ -289,27 +303,27 @@ stream_write_bits(bitstream* s, uint64 value, uint n)
       /* assert: 0 <= s->bits <= n */
       stream_write_word(s, s->buffer);
       /* assert: 0 <= n - s->bits < 64 */
-      s->buffer = (word)(value >> (n - s->bits));
+      s->buffer = (bitstream_word)(value >> (n - s->bits));
     } while (sizeof(s->buffer) < sizeof(value) && s->bits >= wsize);
   }
   /* assert: 0 <= s->bits < wsize */
-  s->buffer &= ((word)1 << s->bits) - 1;
+  s->buffer &= ((bitstream_word)1 << s->bits) - 1;
   /* assert: 0 <= n < 64 */
   return value >> n;
 }
 
 /* return bit offset to next bit to be read */
-inline_ size_t
+inline_ bitstream_offset
 stream_rtell(const bitstream* s)
 {
-  return wsize * (s->ptr - s->begin) - s->bits;
+  return (bitstream_offset)(s->ptr - s->begin) * wsize - s->bits;
 }
 
 /* return bit offset to next bit to be written */
-inline_ size_t
+inline_ bitstream_offset
 stream_wtell(const bitstream* s)
 {
-  return wsize * (s->ptr - s->begin) + s->bits;
+  return (bitstream_offset)(s->ptr - s->begin) * wsize + s->bits;
 }
 
 /* position stream for reading or writing at beginning */
@@ -323,10 +337,10 @@ stream_rewind(bitstream* s)
 
 /* position stream for reading at given bit offset */
 inline_ void
-stream_rseek(bitstream* s, size_t offset)
+stream_rseek(bitstream* s, bitstream_offset offset)
 {
-  uint n = offset % wsize;
-  s->ptr = s->begin + offset / wsize;
+  bitstream_count n = (bitstream_count)(offset % wsize);
+  s->ptr = s->begin + (size_t)(offset / wsize);
   if (n) {
     s->buffer = stream_read_word(s) >> n;
     s->bits = wsize - n;
@@ -339,13 +353,13 @@ stream_rseek(bitstream* s, size_t offset)
 
 /* position stream for writing at given bit offset */
 inline_ void
-stream_wseek(bitstream* s, size_t offset)
+stream_wseek(bitstream* s, bitstream_offset offset)
 {
-  uint n = offset % wsize;
-  s->ptr = s->begin + offset / wsize;
+  bitstream_count n = (bitstream_count)(offset % wsize);
+  s->ptr = s->begin + (size_t)(offset / wsize);
   if (n) {
-    word buffer = *s->ptr;
-    buffer &= ((word)1 << n) - 1;
+    bitstream_word buffer = *s->ptr;
+    buffer &= ((bitstream_word)1 << n) - 1;
     s->buffer = buffer;
     s->bits = n;
   }
@@ -357,36 +371,38 @@ stream_wseek(bitstream* s, size_t offset)
 
 /* skip over the next n bits (n >= 0) */
 inline_ void
-stream_skip(bitstream* s, uint n)
+stream_skip(bitstream* s, bitstream_size n)
 {
   stream_rseek(s, stream_rtell(s) + n);
 }
 
 /* append n zero-bits to stream (n >= 0) */
 inline_ void
-stream_pad(bitstream* s, uint n)
+stream_pad(bitstream* s, bitstream_size n)
 {
-  for (s->bits += n; s->bits >= wsize; s->bits -= wsize) {
+  bitstream_offset bits = s->bits;
+  for (bits += n; bits >= wsize; bits -= wsize) {
     stream_write_word(s, s->buffer);
     s->buffer = 0;
   }
+  s->bits = (bitstream_count)bits;
 }
 
 /* align stream on next word boundary */
-inline_ size_t
+inline_ bitstream_count
 stream_align(bitstream* s)
 {
-  uint bits = s->bits;
+  bitstream_count bits = s->bits;
   if (bits)
     stream_skip(s, bits);
   return bits;
 }
 
 /* write any remaining buffered bits and align stream on next word boundary */
-inline_ size_t
+inline_ bitstream_count
 stream_flush(bitstream* s)
 {
-  uint bits = (wsize - s->bits) % wsize;
+  bitstream_count bits = (wsize - s->bits) % wsize;
   if (bits)
     stream_pad(s, bits);
   return bits;
@@ -394,16 +410,16 @@ stream_flush(bitstream* s)
 
 /* copy n bits from one bit stream to another */
 inline_ void
-stream_copy(bitstream* dst, bitstream* src, size_t n)
+stream_copy(bitstream* dst, bitstream* src, bitstream_size n)
 {
   while (n > wsize) {
-    word w = (word)stream_read_bits(src, wsize);
+    bitstream_word w = (bitstream_word)stream_read_bits(src, wsize);
     stream_write_bits(dst, w, wsize);
     n -= wsize;
   }
   if (n) {
-    word w = (word)stream_read_bits(src, (uint)n);
-    stream_write_bits(dst, w, (uint)n);
+    bitstream_word w = (bitstream_word)stream_read_bits(src, (bitstream_count)n);
+    stream_write_bits(dst, w, (bitstream_count)n);
   }
 }
 
@@ -427,8 +443,8 @@ stream_open(void* buffer, size_t bytes)
 {
   bitstream* s = (bitstream*)malloc(sizeof(bitstream));
   if (s) {
-    s->begin = (word*)buffer;
-    s->end = s->begin + bytes / sizeof(word);
+    s->begin = (bitstream_word*)buffer;
+    s->end = s->begin + bytes / sizeof(bitstream_word);
 #ifdef BIT_STREAM_STRIDED
     stream_set_stride(s, 0, 0);
 #endif
diff --git a/include/zfp/codec/gencodec.hpp b/include/zfp/codec/gencodec.hpp
new file mode 100644
index 00000000..b0eb3230
--- /dev/null
+++ b/include/zfp/codec/gencodec.hpp
@@ -0,0 +1,421 @@
+#ifndef ZFP_GENERIC_CODEC_HPP
+#define ZFP_GENERIC_CODEC_HPP
+
+// This CODEC allows interfacing with the zfp::array classes via a user-facing
+// scalar type, ExternalType (e.g., double), while storing data in memory using
+// a possibly less precise scalar type, InternalType (e.g., float).  Using
+// zfp's caching mechanism, blocks of data may reside for some time in cache
+// as ExternalType.  This potentially allows a sequence of more precise
+// operations to be performed on the data before it is down-converted to
+// InternalType and stored to memory.  When ExternalType = InternalType, this
+// CODEC allows defining arrays that support the full zfp array API but use
+// uncompressed storage.  To use this CODEC, pass it as the Codec template
+// parameter to a zfp::array class of matching dimensionality.
+
+#include <algorithm>
+#include <climits>
+#include <cstring>
+#include "zfp.h"
+#include "zfp/internal/array/memory.hpp"
+#include "zfp/internal/array/traits.hpp"
+
+namespace zfp {
+namespace codec {
+
+// abstract base class for storing 1D-4D uncompressed blocks of scalars
+template <
+  uint dims,                           // data dimensionality (1-4)
+  typename ExternalType,               // scalar type exposed through array API
+  typename InternalType = ExternalType // scalar type used for storage
+>
+class generic_base {
+protected:
+  // default constructor
+  generic_base() :
+    bytes(0),
+    buffer(0)
+  {}
+
+public:
+  // conservative buffer size for current codec settings
+  size_t buffer_size(const zfp_field* field) const
+  {
+    return zfp_field_blocks(field) * block_size * sizeof(InternalType);
+  }
+
+  // open 
+  void open(void* data, size_t size)
+  {
+    bytes = size;
+    buffer = static_cast<InternalType*>(data);
+  }
+
+  // close bit stream
+  void close()
+  {
+    bytes = 0;
+    buffer = 0;
+  }
+
+  // pointer to beginning of bit stream
+  void* data() const { return static_cast<void*>(buffer); }
+
+  // compression mode
+  zfp_mode mode() const { return zfp_mode_fixed_rate; }
+
+  // rate in compressed bits/value (equals precision)
+  double rate() const { return static_cast<double>(precision()); }
+
+  // precision in uncompressed bits/value
+  uint precision() const { return internal_size_bits; }
+
+  // accuracy as absolute error tolerance (unsupported)
+  double accuracy() const { return -1; }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const
+  {
+    if (minbits)
+      *minbits = block_size_bits;
+    if (maxbits)
+      *maxbits = block_size_bits;
+    if (maxprec)
+      *maxprec = precision();
+    if (minexp)
+      *minexp = ZFP_MIN_EXP;
+  }
+
+  // enable reversible (lossless) mode
+  void set_reversible()
+  {
+    throw zfp::exception("zfp generic codec does not support reversible mode");
+  }
+
+  // set rate in compressed bits/value (equals precision)
+  double set_rate(double rate, bool)
+  {
+    return static_cast<double>(set_precision(static_cast<uint>(rate)));
+  }
+
+  // set precision in uncompressed bits/value (must equal InternalType width)
+  uint set_precision(uint precision)
+  {
+    if (precision != internal_size_bits)
+      throw zfp::exception("zfp generic codec precision mismatch");
+    return precision;
+  }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double)
+  {
+    throw zfp::exception("zfp generic codec does not support fixed-accuracy mode");
+    return -1;
+  }
+
+  // set expert mode parameters
+  bool set_params(uint, uint, uint, int)
+  {
+    throw zfp::exception("zfp generic codec does not support expert mode");
+    return false;
+  }
+
+  // set thread safety mode (not required by this codec)
+  void set_thread_safety(bool) {}
+
+  // byte size of codec data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // unit of allocated data in bytes
+  static size_t alignment() { return sizeof(InternalType); }
+
+  static const zfp_type type = zfp::internal::trait<ExternalType>::type; // scalar type
+
+  // zfp::codec::generic_base::header class for array (de)serialization
+  #include "zfp/internal/codec/genheader.hpp"
+
+protected:
+  // pointer to beginning of block
+  InternalType* begin(bitstream_offset offset) const
+  {
+    if (offset % internal_size_bits)
+      throw zfp::exception("zfp generic codec bit offset alignment error");
+    return buffer + offset / internal_size_bits;
+  }
+
+  // store full contiguous block to memory
+  size_t encode_block(bitstream_offset offset, const ExternalType* block) const
+  {
+    InternalType* ptr = begin(offset);
+    for (size_t n = block_size; n--;)
+      *ptr++ = static_cast<InternalType>(*block++);
+    return block_size_bits;
+  }
+
+  // load full contiguous block from memory
+  size_t decode_block(bitstream_offset offset, ExternalType* block) const
+  {
+    const InternalType* ptr = begin(offset);
+    for (size_t n = block_size; n--;)
+      *block++ = static_cast<ExternalType>(*ptr++);
+    return block_size_bits;
+  }
+
+  // constants associated with template arguments
+  static const size_t internal_size_bits = sizeof(InternalType) * CHAR_BIT;
+  static const size_t block_size = 1u << (2 * dims);
+  static const size_t block_size_bits = block_size * internal_size_bits;
+
+  size_t bytes;         // number of bytes of storage
+  InternalType* buffer; // pointer to storage managed by block store
+};
+
+// 1D codec
+template <typename ExternalType, typename InternalType = ExternalType>
+class generic1 : public generic_base<1, ExternalType, InternalType> {
+public:
+  // encode contiguous 1D block
+  size_t encode_block(bitstream_offset offset, uint shape, const ExternalType* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 1D block
+  size_t decode_block(bitstream_offset offset, uint shape, ExternalType* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1)
+                 : decode_block(offset, block);
+  }
+
+  // encode 1D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const ExternalType* p, ptrdiff_t sx) const
+  {
+    InternalType* q = begin(offset);
+    size_t nx = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+    }
+    for (size_t x = 0; x < nx; x++, p += sx, q++)
+      *q = static_cast<InternalType>(*p);
+    return block_size_bits;
+  }
+
+  // decode 1D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, ExternalType* p, ptrdiff_t sx) const
+  {
+    const InternalType* q = begin(offset);
+    size_t nx = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+    }
+    for (size_t x = 0; x < nx; x++, p += sx, q++)
+      *p = static_cast<ExternalType>(*q);
+    return block_size_bits;
+  }
+
+protected:
+  using generic_base<1, ExternalType, InternalType>::begin;
+  using generic_base<1, ExternalType, InternalType>::encode_block;
+  using generic_base<1, ExternalType, InternalType>::decode_block;
+  using generic_base<1, ExternalType, InternalType>::block_size_bits;
+};
+
+// 2D codec
+template <typename ExternalType, typename InternalType = ExternalType>
+class generic2 : public generic_base<2, ExternalType, InternalType> {
+public:
+  // encode contiguous 2D block
+  size_t encode_block(bitstream_offset offset, uint shape, const ExternalType* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1, 4)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 2D block
+  size_t decode_block(bitstream_offset offset, uint shape, ExternalType* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1, 4)
+                 : decode_block(offset, block);
+  }
+
+  // encode 2D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const ExternalType* p, ptrdiff_t sx, ptrdiff_t sy) const
+  {
+    InternalType* q = begin(offset);
+    size_t nx = 4;
+    size_t ny = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+      ny -= shape & 3u; shape >>= 2;
+    }
+    for (size_t y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+      for (size_t x = 0; x < nx; x++, p += sx, q++)
+        *q = static_cast<InternalType>(*p);
+    return block_size_bits;
+  }
+
+  // decode 2D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, ExternalType* p, ptrdiff_t sx, ptrdiff_t sy) const
+  {
+    const InternalType* q = begin(offset);
+    size_t nx = 4;
+    size_t ny = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+      ny -= shape & 3u; shape >>= 2;
+    }
+    for (size_t y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+      for (size_t x = 0; x < nx; x++, p += sx, q++)
+        *p = static_cast<ExternalType>(*q);
+    return block_size_bits;
+  }
+
+protected:
+  using generic_base<2, ExternalType, InternalType>::begin;
+  using generic_base<2, ExternalType, InternalType>::encode_block;
+  using generic_base<2, ExternalType, InternalType>::decode_block;
+  using generic_base<2, ExternalType, InternalType>::block_size_bits;
+};
+
+// 3D codec
+template <typename ExternalType, typename InternalType = ExternalType>
+class generic3 : public generic_base<3, ExternalType, InternalType> {
+public:
+  // encode contiguous 3D block
+  size_t encode_block(bitstream_offset offset, uint shape, const ExternalType* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1, 4, 16)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 3D block
+  size_t decode_block(bitstream_offset offset, uint shape, ExternalType* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1, 4, 16)
+                 : decode_block(offset, block);
+  }
+
+  // encode 3D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const ExternalType* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    InternalType* q = begin(offset);
+    size_t nx = 4;
+    size_t ny = 4;
+    size_t nz = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+      ny -= shape & 3u; shape >>= 2;
+      nz -= shape & 3u; shape >>= 2;
+    }
+    for (size_t z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+      for (size_t y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+        for (size_t x = 0; x < nx; x++, p += sx, q++)
+          *q = static_cast<InternalType>(*p);
+    return block_size_bits;
+  }
+
+  // decode 3D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, ExternalType* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    const InternalType* q = begin(offset);
+    size_t nx = 4;
+    size_t ny = 4;
+    size_t nz = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+      ny -= shape & 3u; shape >>= 2;
+      nz -= shape & 3u; shape >>= 2;
+    }
+    for (size_t z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+      for (size_t y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+        for (size_t x = 0; x < nx; x++, p += sx, q++)
+          *p = static_cast<ExternalType>(*q);
+    return block_size_bits;
+  }
+
+protected:
+  using generic_base<3, ExternalType, InternalType>::begin;
+  using generic_base<3, ExternalType, InternalType>::encode_block;
+  using generic_base<3, ExternalType, InternalType>::decode_block;
+  using generic_base<3, ExternalType, InternalType>::block_size_bits;
+};
+
+// 4D codec
+template <typename ExternalType, typename InternalType = ExternalType>
+class generic4 : public generic_base<4, ExternalType, InternalType> {
+public:
+  // encode contiguous 4D block
+  size_t encode_block(bitstream_offset offset, uint shape, const ExternalType* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1, 4, 16, 64)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 4D block
+  size_t decode_block(bitstream_offset offset, uint shape, ExternalType* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1, 4, 16, 64)
+                 : decode_block(offset, block);
+  }
+
+  // encode 4D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const ExternalType* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    InternalType* q = begin(offset);
+    size_t nx = 4;
+    size_t ny = 4;
+    size_t nz = 4;
+    size_t nw = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+      ny -= shape & 3u; shape >>= 2;
+      nz -= shape & 3u; shape >>= 2;
+      nw -= shape & 3u; shape >>= 2;
+    }
+    for (size_t w = 0; w < nw; w++, p += sw - (ptrdiff_t)nz * sz, q += 64 - 16 * nz)
+      for (size_t z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+        for (size_t y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+          for (size_t x = 0; x < nx; x++, p += sx, q++)
+            *q = static_cast<InternalType>(*p);
+    return block_size_bits;
+  }
+
+  // decode 4D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, ExternalType* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    const InternalType* q = begin(offset);
+    size_t nx = 4;
+    size_t ny = 4;
+    size_t nz = 4;
+    size_t nw = 4;
+    if (shape) {
+      nx -= shape & 3u; shape >>= 2;
+      ny -= shape & 3u; shape >>= 2;
+      nz -= shape & 3u; shape >>= 2;
+      nw -= shape & 3u; shape >>= 2;
+    }
+    for (size_t w = 0; w < nw; w++, p += sw - (ptrdiff_t)nz * sz, q += 64 - 16 * nz)
+      for (size_t z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+        for (size_t y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+          for (size_t x = 0; x < nx; x++, p += sx, q++)
+            *p = static_cast<ExternalType>(*q);
+    return block_size_bits;
+  }
+
+protected:
+  using generic_base<4, ExternalType, InternalType>::begin;
+  using generic_base<4, ExternalType, InternalType>::encode_block;
+  using generic_base<4, ExternalType, InternalType>::decode_block;
+  using generic_base<4, ExternalType, InternalType>::block_size_bits;
+};
+
+} // codec
+} // zfp
+
+#endif
diff --git a/include/zfp/codec/zfpcodec.hpp b/include/zfp/codec/zfpcodec.hpp
new file mode 100644
index 00000000..5a880cdd
--- /dev/null
+++ b/include/zfp/codec/zfpcodec.hpp
@@ -0,0 +1,551 @@
+#ifndef ZFP_ZFP_CODEC_HPP
+#define ZFP_ZFP_CODEC_HPP
+
+#include <algorithm>
+#include <climits>
+#include <cstring>
+#include "zfp.h"
+#include "zfp.hpp"
+#include "zfp/internal/array/memory.hpp"
+#include "zfp/internal/array/traits.hpp"
+
+namespace zfp {
+namespace codec {
+
+// abstract base class for zfp coding of {float, double} x {1D, 2D, 3D, 4D} data
+template <uint dims, typename Scalar>
+class zfp_base {
+protected:
+  // default constructor
+  zfp_base() :
+    stream(zfp_stream_open(0))
+#ifdef _OPENMP
+    , thread_safety(false)
+#endif
+  {}
+
+  // destructor
+  ~zfp_base()
+  {
+    close();
+    zfp_stream_close(stream);
+  }
+
+public:
+  // assignment operator--performs deep copy
+  zfp_base& operator=(const zfp_base& codec)
+  {
+    if (this != &codec)
+      deep_copy(codec);
+    return *this;
+  }
+
+  // conservative buffer size for current codec settings
+  size_t buffer_size(const zfp_field* field) const
+  {
+    // empty field case
+    if (!field->nx && !field->ny && !field->nz && !field->nw)
+      return 0;
+    // variable-rate case
+    if (zfp_stream_compression_mode(stream) != zfp_mode_fixed_rate)
+      return zfp_stream_maximum_size(stream, field);
+    // fixed-rate case: exclude header
+    size_t blocks = zfp_field_blocks(field);
+    return zfp::internal::round_up(blocks * stream->maxbits, stream_alignment()) / CHAR_BIT;
+  }
+
+  // open bit stream
+  void open(void* data, size_t size)
+  {
+    zfp_stream_set_bit_stream(stream, stream_open(data, size));
+  }
+
+  // close bit stream
+  void close()
+  {
+    stream_close(zfp_stream_bit_stream(stream));
+    zfp_stream_set_bit_stream(stream, 0);
+  }
+
+  // compression mode
+  zfp_mode mode() const { return zfp_stream_compression_mode(stream); }
+
+  // rate in compressed bits/value (fixed-rate mode only)
+  double rate() const { return zfp_stream_rate(stream, dims); }
+
+  // precision in uncompressed bits/value (fixed-precision mode only)
+  uint precision() const { return zfp_stream_precision(stream); }
+
+  // accuracy as absolute error tolerance (fixed-accuracy mode only)
+  double accuracy() const { return zfp_stream_accuracy(stream); }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const { zfp_stream_params(stream, minbits, maxbits, maxprec, minexp); }
+
+  // enable reversible (lossless) mode
+  void set_reversible() { zfp_stream_set_reversible(stream); }
+
+  // set rate in compressed bits/value
+  double set_rate(double rate, bool align) { return zfp_stream_set_rate(stream, rate, type, dims, align); }
+
+  // set precision in uncompressed bits/value
+  uint set_precision(uint precision) { return zfp_stream_set_precision(stream, precision); }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double tolerance) { return zfp_stream_set_accuracy(stream, tolerance); }
+
+  // set expert mode parameters
+  bool set_params(uint minbits, uint maxbits, uint maxprec, int maxexp) { return zfp_stream_set_params(stream, minbits, maxbits, maxprec, maxexp) == zfp_true; }
+
+  // set thread safety mode
+#ifdef _OPENMP
+  void set_thread_safety(bool safety) { thread_safety = safety; }
+#else
+  void set_thread_safety(bool) {}
+#endif
+
+  // byte size of codec data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_META) {
+      size += sizeof(*stream);
+      size += sizeof(*this);
+    }
+    return size;
+  }
+
+  // unit of allocated data in bytes
+  static size_t alignment() { return stream_alignment() / CHAR_BIT; }
+
+  static const zfp_type type = zfp::internal::trait<Scalar>::type; // scalar type
+
+  // zfp::codec::zfp_base::header class for array (de)serialization
+  #include "zfp/internal/codec/zfpheader.hpp"
+
+protected:
+  // deep copy
+  void deep_copy(const zfp_base& codec)
+  {
+    stream = zfp_stream_open(0);
+    *stream = *codec.stream;
+    stream->stream = 0;
+#ifdef _OPENMP
+    thread_safety = codec.thread_safety;
+#endif
+  }
+
+  // make a thread-local copy of zfp stream and bit stream
+  zfp_stream clone_stream() const
+  {
+    zfp_stream zfp = *stream;
+    zfp.stream = stream_clone(zfp.stream);
+    return zfp;
+  }
+
+  // encode full contiguous block
+  size_t encode_block(bitstream_offset offset, const Scalar* block) const
+  {
+    if (thread_safety) {
+      // make a thread-local copy of zfp stream and bit stream
+      zfp_stream zfp = clone_stream();
+      size_t size = encode_block(&zfp, offset, block);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return encode_block(stream, offset, block);
+  }
+
+  // decode full contiguous block
+  size_t decode_block(bitstream_offset offset, Scalar* block) const
+  {
+    if (thread_safety) {
+      // make a thread-local copy of zfp stream and bit stream
+      zfp_stream zfp = clone_stream();
+      size_t size = decode_block(&zfp, offset, block);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return decode_block(stream, offset, block);
+  }
+
+  // encode full contiguous block
+  static size_t encode_block(zfp_stream* zfp, bitstream_offset offset, const Scalar* block)
+  {
+    stream_wseek(zfp->stream, offset);
+    size_t size = zfp::encode_block<Scalar, dims>(zfp, block);
+    stream_flush(zfp->stream);
+    return size;
+  }
+
+  // decode full contiguous block
+  static size_t decode_block(zfp_stream* zfp, bitstream_offset offset, Scalar* block)
+  {
+    stream_rseek(zfp->stream, offset);
+    size_t size = zfp::decode_block<Scalar, dims>(zfp, block);
+    stream_align(zfp->stream);
+    return size;
+  }
+
+  zfp_stream* stream; // compressed zfp stream
+#ifdef _OPENMP
+  bool thread_safety; // thread safety state
+#else
+  static const bool thread_safety = false; // not needed without OpenMP
+#endif
+};
+
+// 1D codec
+template <typename Scalar>
+class zfp1 : public zfp_base<1, Scalar> {
+public:
+  // encode contiguous 1D block
+  size_t encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 1D block
+  size_t decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1)
+                 : decode_block(offset, block);
+  }
+
+  // encode 1D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = encode_block_strided(&zfp, offset, shape, p, sx);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return encode_block_strided(stream, offset, shape, p, sx);
+  }
+
+  // decode 1D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = decode_block_strided(&zfp, offset, shape, p, sx);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return decode_block_strided(stream, offset, shape, p, sx);
+  }
+
+protected:
+  using zfp_base<1, Scalar>::clone_stream;
+  using zfp_base<1, Scalar>::encode_block;
+  using zfp_base<1, Scalar>::decode_block;
+  using zfp_base<1, Scalar>::stream;
+  using zfp_base<1, Scalar>::thread_safety;
+
+  // encode 1D block from strided storage
+  static size_t encode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx)
+  {
+    size_t size;
+    stream_wseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::encode_partial_block_strided<Scalar>(zfp, p, nx, sx);
+    }
+    else
+      size = zfp::encode_block_strided<Scalar>(zfp, p, sx);
+    stream_flush(zfp->stream);
+    return size;
+  }
+
+  // decode 1D block to strided storage
+  static size_t decode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx)
+  {
+    size_t size;
+    stream_rseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::decode_partial_block_strided<Scalar>(zfp, p, nx, sx);
+    }
+    else
+      size = zfp::decode_block_strided<Scalar>(zfp, p, sx);
+    stream_align(zfp->stream);
+    return size;
+  }
+};
+
+// 2D codec
+template <typename Scalar>
+class zfp2 : public zfp_base<2, Scalar> {
+public:
+  // encode contiguous 2D block
+  size_t encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1, 4)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 2D block
+  size_t decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1, 4)
+                 : decode_block(offset, block);
+  }
+
+  // encode 2D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = encode_block_strided(&zfp, offset, shape, p, sx, sy);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return encode_block_strided(stream, offset, shape, p, sx, sy);
+  }
+
+  // decode 2D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = decode_block_strided(&zfp, offset, shape, p, sx, sy);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return decode_block_strided(stream, offset, shape, p, sx, sy);
+  }
+
+protected:
+  using zfp_base<2, Scalar>::clone_stream;
+  using zfp_base<2, Scalar>::encode_block;
+  using zfp_base<2, Scalar>::decode_block;
+  using zfp_base<2, Scalar>::stream;
+  using zfp_base<2, Scalar>::thread_safety;
+
+  // encode 2D block from strided storage
+  static size_t encode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+  {
+    size_t size;
+    stream_wseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      uint ny = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::encode_partial_block_strided<Scalar>(zfp, p, nx, ny, sx, sy);
+    }
+    else
+      size = zfp::encode_block_strided<Scalar>(zfp, p, sx, sy);
+    stream_flush(zfp->stream);
+    return size;
+  }
+
+  // decode 2D block to strided storage
+  static size_t decode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+  {
+    size_t size;
+    stream_rseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      uint ny = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::decode_partial_block_strided<Scalar>(zfp, p, nx, ny, sx, sy);
+    }
+    else
+      size = zfp::decode_block_strided<Scalar>(zfp, p, sx, sy);
+    stream_align(zfp->stream);
+    return size;
+  }
+};
+
+// 3D codec
+template <typename Scalar>
+class zfp3 : public zfp_base<3, Scalar> {
+public:
+  // encode contiguous 3D block
+  size_t encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1, 4, 16)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 3D block
+  size_t decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1, 4, 16)
+                 : decode_block(offset, block);
+  }
+
+  // encode 3D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = encode_block_strided(&zfp, offset, shape, p, sx, sy, sz);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return encode_block_strided(stream, offset, shape, p, sx, sy, sz);
+  }
+
+  // decode 3D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = decode_block_strided(&zfp, offset, shape, p, sx, sy, sz);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return decode_block_strided(stream, offset, shape, p, sx, sy, sz);
+  }
+
+protected:
+  using zfp_base<3, Scalar>::clone_stream;
+  using zfp_base<3, Scalar>::encode_block;
+  using zfp_base<3, Scalar>::decode_block;
+  using zfp_base<3, Scalar>::stream;
+  using zfp_base<3, Scalar>::thread_safety;
+
+  // encode 3D block from strided storage
+  static size_t encode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+  {
+    size_t size;
+    stream_wseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      uint ny = 4 - (shape & 3u); shape >>= 2;
+      uint nz = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::encode_partial_block_strided<Scalar>(zfp, p, nx, ny, nz, sx, sy, sz);
+    }
+    else
+      size = zfp::encode_block_strided<Scalar>(zfp, p, sx, sy, sz);
+    stream_flush(zfp->stream);
+    return size;
+  }
+
+  // decode 3D block to strided storage
+  static size_t decode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+  {
+    size_t size;
+    stream_rseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      uint ny = 4 - (shape & 3u); shape >>= 2;
+      uint nz = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::decode_partial_block_strided<Scalar>(zfp, p, nx, ny, nz, sx, sy, sz);
+    }
+    else
+      size = zfp::decode_block_strided<Scalar>(zfp, p, sx, sy, sz);
+    stream_align(zfp->stream);
+    return size;
+  }
+};
+
+// 4D codec
+template <typename Scalar>
+class zfp4 : public zfp_base<4, Scalar> {
+public:
+  // encode contiguous 4D block
+  size_t encode_block(bitstream_offset offset, uint shape, const Scalar* block) const
+  {
+    return shape ? encode_block_strided(offset, shape, block, 1, 4, 16, 64)
+                 : encode_block(offset, block);
+  }
+
+  // decode contiguous 4D block
+  size_t decode_block(bitstream_offset offset, uint shape, Scalar* block) const
+  {
+    return shape ? decode_block_strided(offset, shape, block, 1, 4, 16, 64)
+                 : decode_block(offset, block);
+  }
+
+  // encode 4D block from strided storage
+  size_t encode_block_strided(bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = encode_block_strided(&zfp, offset, shape, p, sx, sy, sz, sw);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return encode_block_strided(stream, offset, shape, p, sx, sy, sz, sw);
+  }
+
+  // decode 4D block to strided storage
+  size_t decode_block_strided(bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    if (thread_safety) {
+      // thread-safe implementation
+      zfp_stream zfp = clone_stream();
+      size_t size = decode_block_strided(&zfp, offset, shape, p, sx, sy, sz, sw);
+      stream_close(zfp.stream);
+      return size;
+    }
+    else
+      return decode_block_strided(stream, offset, shape, p, sx, sy, sz, sw);
+  }
+
+protected:
+  using zfp_base<4, Scalar>::clone_stream;
+  using zfp_base<4, Scalar>::encode_block;
+  using zfp_base<4, Scalar>::decode_block;
+  using zfp_base<4, Scalar>::stream;
+  using zfp_base<4, Scalar>::thread_safety;
+
+  // encode 4D block from strided storage
+  static size_t encode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+  {
+    size_t size;
+    stream_wseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      uint ny = 4 - (shape & 3u); shape >>= 2;
+      uint nz = 4 - (shape & 3u); shape >>= 2;
+      uint nw = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::encode_partial_block_strided<Scalar>(zfp, p, nx, ny, nz, nw, sx, sy, sz, sw);
+    }
+    else
+      size = zfp::encode_block_strided<Scalar>(zfp, p, sx, sy, sz, sw);
+    stream_flush(zfp->stream);
+    return size;
+  }
+
+  // decode 4D block to strided storage
+  static size_t decode_block_strided(zfp_stream* zfp, bitstream_offset offset, uint shape, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+  {
+    size_t size;
+    stream_rseek(zfp->stream, offset);
+    if (shape) {
+      uint nx = 4 - (shape & 3u); shape >>= 2;
+      uint ny = 4 - (shape & 3u); shape >>= 2;
+      uint nz = 4 - (shape & 3u); shape >>= 2;
+      uint nw = 4 - (shape & 3u); shape >>= 2;
+      size = zfp::decode_partial_block_strided<Scalar>(zfp, p, nx, ny, nz, nw, sx, sy, sz, sw);
+    }
+    else
+      size = zfp::decode_block_strided<Scalar>(zfp, p, sx, sy, sz, sw);
+    stream_align(zfp->stream);
+    return size;
+  }
+};
+
+} // codec
+} // zfp
+
+#endif
diff --git a/include/zfp/constarray1.hpp b/include/zfp/constarray1.hpp
new file mode 100644
index 00000000..f2f501de
--- /dev/null
+++ b/include/zfp/constarray1.hpp
@@ -0,0 +1,265 @@
+#ifndef ZFP_CONSTARRAY1_HPP
+#define ZFP_CONSTARRAY1_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache1.hpp"
+#include "zfp/internal/array/handle1.hpp"
+#include "zfp/internal/array/iterator1.hpp"
+#include "zfp/internal/array/pointer1.hpp"
+#include "zfp/internal/array/reference1.hpp"
+#include "zfp/internal/array/store1.hpp"
+#include "zfp/internal/array/view1.hpp"
+
+namespace zfp {
+
+// compressed 1D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp1<Scalar>,
+  class Index = zfp::index::hybrid4
+>
+class const_array1 : public array {
+public:
+  // types utilized by nested classes
+  typedef const_array1 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore1<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache1<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim1::const_reference<const_array1> const_reference;
+  typedef zfp::internal::dim1::const_pointer<const_array1> const_pointer;
+  typedef zfp::internal::dim1::const_iterator<const_array1> const_iterator;
+  typedef zfp::internal::dim1::const_view<const_array1> const_view;
+  typedef zfp::internal::dim1::private_const_view<const_array1> private_const_view;
+
+  // default constructor
+  const_array1() :
+    array(1, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx-element array using given configuration, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  const_array1(size_t nx, const zfp_config& config, const value_type* p = 0, size_t cache_size = 0) :
+    array(1, Codec::type),
+    store(nx, config),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    set(p);
+  }
+
+  // copy constructor--performs a deep copy
+  const_array1(const const_array1& a) :
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // virtual destructor
+  virtual ~const_array1() {}
+
+  // assignment operator--performs a deep copy
+  const_array1& operator=(const const_array1& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    store.resize(nx, clear);
+  }
+
+  // compression mode
+  zfp_mode mode() const { return store.mode(); }
+
+  // rate in compressed bits per value (fixed-rate mode only)
+  double rate() const { return store.rate(); }
+
+  // precision in uncompressed bits per value (fixed-precision mode only)
+  uint precision() const { return store.precision(); }
+
+  // accuracy as absolute error tolerance (fixed-accuracy mode only)
+  double accuracy() const { return store.accuracy(); }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const { return store.params(minbits, maxbits, maxprec, minexp); }
+
+  // set rate in compressed bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, false);
+  }
+
+  // set precision in uncompressed bits per value
+  uint set_precision(uint precision)
+  {
+    cache.clear();
+    return store.set_precision(precision);
+  }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double tolerance)
+  {
+    cache.clear();
+    return store.set_accuracy(tolerance);
+  }
+
+  // enable reversible (lossless) mode
+  void set_reversible()
+  {
+    cache.clear();
+    store.set_reversible();
+  }
+
+  // set expert mode compression parameters
+  bool set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+  {
+    cache.clear();
+    return store.set_params(minbits, maxbits, maxprec, minexp);
+  }
+
+  // set compression mode and parameters
+  void set_config(const zfp_config& config)
+  {
+    cache.clear();
+    store.set_config(config);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const ptrdiff_t sx = 1;
+    size_t block_index = 0;
+    for (size_t i = 0; i < bx; i++, p += 4)
+      cache.get_block(block_index++, p, sx);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p, bool compact = true)
+  {
+    cache.clear();
+    store.clear();
+    const size_t bx = store.block_size_x();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      for (size_t i = 0; i < bx; i++, p += 4)
+        store.encode(block_index++, p, sx);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4] = {};
+      while (block_index < bx)
+        store.encode(block_index++, block);
+    }
+    store.flush();
+    if (compact)
+      store.compact();
+  }
+
+  // accessor
+  const_reference operator()(size_t i) const { return const_reference(const_cast<container_type*>(this), i); }
+
+  // flat index accessor
+  const_reference operator[](size_t index) const { return const_reference(const_cast<container_type*>(this), index); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0); }
+  const_iterator cend() const { return const_iterator(this, nx); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim1::const_handle<const_array1>;
+  friend class zfp::internal::dim1::const_reference<const_array1>;
+  friend class zfp::internal::dim1::const_pointer<const_array1>;
+  friend class zfp::internal::dim1::const_iterator<const_array1>;
+  friend class zfp::internal::dim1::const_view<const_array1>;
+  friend class zfp::internal::dim1::private_const_view<const_array1>;
+
+  // perform a deep copy
+  void deep_copy(const const_array1& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+
+  // inspector
+  value_type get(size_t i) const { return cache.get(i); }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef const_array1<float> const_array1f;
+typedef const_array1<double> const_array1d;
+
+}
+
+#endif
diff --git a/include/zfp/constarray2.hpp b/include/zfp/constarray2.hpp
new file mode 100644
index 00000000..e8928629
--- /dev/null
+++ b/include/zfp/constarray2.hpp
@@ -0,0 +1,288 @@
+#ifndef ZFP_CONSTARRAY2_HPP
+#define ZFP_CONSTARRAY2_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache2.hpp"
+#include "zfp/internal/array/handle2.hpp"
+#include "zfp/internal/array/iterator2.hpp"
+#include "zfp/internal/array/pointer2.hpp"
+#include "zfp/internal/array/reference2.hpp"
+#include "zfp/internal/array/store2.hpp"
+#include "zfp/internal/array/view2.hpp"
+
+namespace zfp {
+
+// compressed 2D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp2<Scalar>,
+  class Index = zfp::index::hybrid4
+>
+class const_array2 : public array {
+public:
+  // types utilized by nested classes
+  typedef const_array2 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore2<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache2<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim2::const_reference<const_array2> const_reference;
+  typedef zfp::internal::dim2::const_pointer<const_array2> const_pointer;
+  typedef zfp::internal::dim2::const_iterator<const_array2> const_iterator;
+  typedef zfp::internal::dim2::const_view<const_array2> const_view;
+  typedef zfp::internal::dim2::private_const_view<const_array2> private_const_view;
+
+  // default constructor
+  const_array2() :
+    array(2, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx * ny array using given configuration, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  const_array2(size_t nx, size_t ny, const zfp_config& config, const value_type* p = 0, size_t cache_size = 0) :
+    array(2, Codec::type),
+    store(nx, ny, config),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    this->ny = ny;
+    set(p);
+  }
+
+  // copy constructor--performs a deep copy
+  const_array2(const const_array2& a) :
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // virtual destructor
+  virtual ~const_array2() {}
+
+  // assignment operator--performs a deep copy
+  const_array2& operator=(const const_array2& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx * ny; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, size_t ny, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    this->ny = ny;
+    store.resize(nx, ny, clear);
+  }
+
+  // compression mode
+  zfp_mode mode() const { return store.mode(); }
+
+  // rate in compressed bits per value (fixed-rate mode only)
+  double rate() const { return store.rate(); }
+
+  // precision in uncompressed bits per value (fixed-precision mode only)
+  uint precision() const { return store.precision(); }
+
+  // accuracy as absolute error tolerance (fixed-accuracy mode only)
+  double accuracy() const { return store.accuracy(); }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const { return store.params(minbits, maxbits, maxprec, minexp); }
+
+  // set rate in compressed bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, false);
+  }
+
+  // set precision in uncompressed bits per value
+  uint set_precision(uint precision)
+  {
+    cache.clear();
+    return store.set_precision(precision);
+  }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double tolerance)
+  {
+    cache.clear();
+    return store.set_accuracy(tolerance);
+  }
+
+  // enable reversible (lossless) mode
+  void set_reversible()
+  {
+    cache.clear();
+    store.set_reversible();
+  }
+
+  // set expert mode compression parameters
+  bool set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+  { 
+    cache.clear();
+    return store.set_params(minbits, maxbits, maxprec, minexp);
+  }
+
+  // set compression mode and parameters
+  void set_config(const zfp_config& config)
+  {
+    cache.clear();
+    store.set_config(config);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const ptrdiff_t sx = 1;
+    const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+    size_t block_index = 0;
+    for (size_t j = 0; j < by; j++, p += 4 * sx * (nx - bx))
+      for (size_t i = 0; i < bx; i++, p += 4)
+        cache.get_block(block_index++, p, sx, sy);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p, bool compact = true)
+  {
+    cache.clear();
+    store.clear();
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+      for (size_t j = 0; j < by; j++, p += 4 * sx * (nx - bx))
+        for (size_t i = 0; i < bx; i++, p += 4)
+          store.encode(block_index++, p, sx, sy);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4 * 4] = {};
+      while (block_index < bx * by)
+        store.encode(block_index++, block);
+    }
+    store.flush();
+    if (compact)
+      store.compact();
+  }
+
+  // (i, j) accessor
+  const_reference operator()(size_t i, size_t j) const { return const_reference(const_cast<container_type*>(this), i, j); }
+
+  // flat index accessor
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j;
+    ij(i, j, index);
+    return const_reference(const_cast<container_type*>(this), i, j);
+  }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0, 0); }
+  const_iterator cend() const { return const_iterator(this, 0, ny); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<const_array2>;
+  friend class zfp::internal::dim2::const_reference<const_array2>;
+  friend class zfp::internal::dim2::const_pointer<const_array2>;
+  friend class zfp::internal::dim2::const_iterator<const_array2>;
+  friend class zfp::internal::dim2::const_view<const_array2>;
+  friend class zfp::internal::dim2::private_const_view<const_array2>;
+
+  // perform a deep copy
+  void deep_copy(const const_array2& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+  size_t min_y() const { return 0; }
+  size_t max_y() const { return ny; }
+
+  // inspector
+  value_type get(size_t i, size_t j) const { return cache.get(i, j); }
+
+  // convert flat index to (i, j)
+  void ij(size_t& i, size_t& j, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny;
+  }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef const_array2<float> const_array2f;
+typedef const_array2<double> const_array2d;
+
+}
+
+#endif
diff --git a/include/zfp/constarray3.hpp b/include/zfp/constarray3.hpp
new file mode 100644
index 00000000..61d65d46
--- /dev/null
+++ b/include/zfp/constarray3.hpp
@@ -0,0 +1,300 @@
+#ifndef ZFP_CONSTARRAY3_HPP
+#define ZFP_CONSTARRAY3_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache3.hpp"
+#include "zfp/internal/array/handle3.hpp"
+#include "zfp/internal/array/iterator3.hpp"
+#include "zfp/internal/array/pointer3.hpp"
+#include "zfp/internal/array/reference3.hpp"
+#include "zfp/internal/array/store3.hpp"
+#include "zfp/internal/array/view3.hpp"
+
+namespace zfp {
+
+// compressed 3D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp3<Scalar>,
+  class Index = zfp::index::hybrid4
+>
+class const_array3 : public array {
+public:
+  // types utilized by nested classes
+  typedef const_array3 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore3<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache3<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim3::const_reference<const_array3> const_reference;
+  typedef zfp::internal::dim3::const_pointer<const_array3> const_pointer;
+  typedef zfp::internal::dim3::const_iterator<const_array3> const_iterator;
+  typedef zfp::internal::dim3::const_view<const_array3> const_view;
+  typedef zfp::internal::dim3::private_const_view<const_array3> private_const_view;
+
+  // default constructor
+  const_array3() :
+    array(3, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx * ny * nz array using given configuration, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  const_array3(size_t nx, size_t ny, size_t nz, const zfp_config& config, const value_type* p = 0, size_t cache_size = 0) :
+    array(3, Codec::type),
+    store(nx, ny, nz, config),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    set(p);
+  }
+
+  // copy constructor--performs a deep copy
+  const_array3(const const_array3& a) :
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // virtual destructor
+  virtual ~const_array3() {}
+
+  // assignment operator--performs a deep copy
+  const_array3& operator=(const const_array3& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx * ny * nz; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, size_t ny, size_t nz, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    store.resize(nx, ny, nz, clear);
+  }
+
+  // compression mode
+  zfp_mode mode() const { return store.mode(); }
+
+  // rate in compressed bits per value (fixed-rate mode only)
+  double rate() const { return store.rate(); }
+
+  // precision in uncompressed bits per value (fixed-precision mode only)
+  uint precision() const { return store.precision(); }
+
+  // accuracy as absolute error tolerance (fixed-accuracy mode only)
+  double accuracy() const { return store.accuracy(); }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const { return store.params(minbits, maxbits, maxprec, minexp); }
+
+  // set rate in compressed bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, false);
+  }
+
+  // set precision in uncompressed bits per value
+  uint set_precision(uint precision)
+  {
+    cache.clear();
+    return store.set_precision(precision);
+  }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double tolerance)
+  {
+    cache.clear();
+    return store.set_accuracy(tolerance);
+  }
+
+  // enable reversible (lossless) mode
+  void set_reversible()
+  {
+    cache.clear();
+    store.set_reversible();
+  }
+
+  // set expert mode compression parameters
+  bool set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+  { 
+    cache.clear();
+    return store.set_params(minbits, maxbits, maxprec, minexp);
+  }
+
+  // set compression mode and parameters
+  void set_config(const zfp_config& config)
+  {
+    cache.clear();
+    store.set_config(config);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    const ptrdiff_t sx = 1;
+    const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+    const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+    size_t block_index = 0;
+    for (size_t k = 0; k < bz; k++, p += 4 * sy * (ny - by))
+      for (size_t j = 0; j < by; j++, p += 4 * sx * (nx - bx))
+        for (size_t i = 0; i < bx; i++, p += 4)
+          cache.get_block(block_index++, p, sx, sy, sz);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p, bool compact = true)
+  {
+    cache.clear();
+    store.clear();
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+      const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+      for (size_t k = 0; k < bz; k++, p += 4 * sy * (ny - by))
+        for (size_t j = 0; j < by; j++, p += 4 * sx * (nx - bx))
+          for (size_t i = 0; i < bx; i++, p += 4)
+            store.encode(block_index++, p, sx, sy, sz);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4 * 4 * 4] = {};
+      while (block_index < bx * by * bz)
+        store.encode(block_index++, block);
+    }
+    store.flush();
+    if (compact)
+      store.compact();
+  }
+
+  // (i, j, k) accessor
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(const_cast<container_type*>(this), i, j, k); }
+
+  // flat index accessor
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j, k;
+    ijk(i, j, k, index);
+    return const_reference(const_cast<container_type*>(this), i, j, k);
+  }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0, 0, 0); }
+  const_iterator cend() const { return const_iterator(this, 0, 0, nz); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<const_array3>;
+  friend class zfp::internal::dim3::const_reference<const_array3>;
+  friend class zfp::internal::dim3::const_pointer<const_array3>;
+  friend class zfp::internal::dim3::const_iterator<const_array3>;
+  friend class zfp::internal::dim3::const_view<const_array3>;
+  friend class zfp::internal::dim3::private_const_view<const_array3>;
+
+  // perform a deep copy
+  void deep_copy(const const_array3& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+  size_t min_y() const { return 0; }
+  size_t max_y() const { return ny; }
+  size_t min_z() const { return 0; }
+  size_t max_z() const { return nz; }
+
+  // inspector
+  value_type get(size_t i, size_t j, size_t k) const { return cache.get(i, j, k); }
+
+  // convert flat index to (i, j, k)
+  void ijk(size_t& i, size_t& j, size_t& k, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny; index /= ny;
+    k = index;
+  }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef const_array3<float> const_array3f;
+typedef const_array3<double> const_array3d;
+
+}
+
+#endif
diff --git a/include/zfp/constarray4.hpp b/include/zfp/constarray4.hpp
new file mode 100644
index 00000000..63680f16
--- /dev/null
+++ b/include/zfp/constarray4.hpp
@@ -0,0 +1,312 @@
+#ifndef ZFP_CONSTARRAY4_HPP
+#define ZFP_CONSTARRAY4_HPP
+
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "zfp/array.hpp"
+#include "zfp/index.hpp"
+#include "zfp/codec/zfpcodec.hpp"
+#include "zfp/internal/array/cache4.hpp"
+#include "zfp/internal/array/handle4.hpp"
+#include "zfp/internal/array/iterator4.hpp"
+#include "zfp/internal/array/pointer4.hpp"
+#include "zfp/internal/array/reference4.hpp"
+#include "zfp/internal/array/store4.hpp"
+#include "zfp/internal/array/view4.hpp"
+
+namespace zfp {
+
+// compressed 4D array of scalars
+template <
+  typename Scalar,
+  class Codec = zfp::codec::zfp4<Scalar>,
+  class Index = zfp::index::hybrid4
+>
+class const_array4 : public array {
+public:
+  // types utilized by nested classes
+  typedef const_array4 container_type;
+  typedef Scalar value_type;
+  typedef Codec codec_type;
+  typedef Index index_type;
+  typedef zfp::internal::BlockStore4<value_type, codec_type, index_type> store_type;
+  typedef zfp::internal::BlockCache4<value_type, store_type> cache_type;
+  typedef typename Codec::header header;
+
+  // accessor classes
+  typedef zfp::internal::dim4::const_reference<const_array4> const_reference;
+  typedef zfp::internal::dim4::const_pointer<const_array4> const_pointer;
+  typedef zfp::internal::dim4::const_iterator<const_array4> const_iterator;
+  typedef zfp::internal::dim4::const_view<const_array4> const_view;
+  typedef zfp::internal::dim4::private_const_view<const_array4> private_const_view;
+
+  // default constructor
+  const_array4() :
+    array(4, Codec::type),
+    cache(store)
+  {}
+
+  // constructor of nx * ny * nz * nw array using given configuration, at least
+  // cache_size bytes of cache, and optionally initialized from flat array p
+  const_array4(size_t nx, size_t ny, size_t nz, size_t nw, const zfp_config& config, const value_type* p = 0, size_t cache_size = 0) :
+    array(4, Codec::type),
+    store(nx, ny, nz, nw, config),
+    cache(store, cache_size)
+  {
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    this->nw = nw;
+    set(p);
+  }
+
+  // copy constructor--performs a deep copy
+  const_array4(const const_array4& a) :
+    cache(store)
+  {
+    deep_copy(a);
+  }
+
+  // virtual destructor
+  virtual ~const_array4() {}
+
+  // assignment operator--performs a deep copy
+  const_array4& operator=(const const_array4& a)
+  {
+    if (this != &a)
+      deep_copy(a);
+    return *this;
+  }
+
+  // total number of elements in array
+  size_t size() const { return nx * ny * nz * nw; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+  size_t size_w() const { return nw; }
+
+  // resize the array (all previously stored data will be lost)
+  void resize(size_t nx, size_t ny, size_t nz, size_t nw, bool clear = true)
+  {
+    cache.clear();
+    this->nx = nx;
+    this->ny = ny;
+    this->nz = nz;
+    this->nw = nw;
+    store.resize(nx, ny, nz, nw, clear);
+  }
+
+  // compression mode
+  zfp_mode mode() const { return store.mode(); }
+
+  // rate in compressed bits per value (fixed-rate mode only)
+  double rate() const { return store.rate(); }
+
+  // precision in uncompressed bits per value (fixed-precision mode only)
+  uint precision() const { return store.precision(); }
+
+  // accuracy as absolute error tolerance (fixed-accuracy mode only)
+  double accuracy() const { return store.accuracy(); }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const { return store.params(minbits, maxbits, maxprec, minexp); }
+
+  // set rate in compressed bits per value
+  double set_rate(double rate)
+  {
+    cache.clear();
+    return store.set_rate(rate, false);
+  }
+
+  // set precision in uncompressed bits per value
+  uint set_precision(uint precision)
+  {
+    cache.clear();
+    return store.set_precision(precision);
+  }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double tolerance)
+  {
+    cache.clear();
+    return store.set_accuracy(tolerance);
+  }
+
+  // enable reversible (lossless) mode
+  void set_reversible()
+  {
+    cache.clear();
+    store.set_reversible();
+  }
+
+  // set expert mode compression parameters
+  bool set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+  { 
+    cache.clear();
+    return store.set_params(minbits, maxbits, maxprec, minexp);
+  }
+
+  // set compression mode and parameters
+  void set_config(const zfp_config& config)
+  {
+    cache.clear();
+    store.set_config(config);
+  }
+
+  // byte size of array data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += store.size_bytes(mask);
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return store.compressed_size(); }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const
+  {
+    cache.flush();
+    return store.compressed_data();
+  }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes)
+  {
+    cache.flush();
+    cache.resize(bytes);
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // decompress array and store at p
+  void get(value_type* p) const
+  {
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    const size_t bw = store.block_size_w();
+    const ptrdiff_t sx = 1;
+    const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+    const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+    const ptrdiff_t sw = static_cast<ptrdiff_t>(nx * ny * nz);
+    size_t block_index = 0;
+    for (size_t l = 0; l < bw; l++, p += 4 * sz * (nz - bz))
+      for (size_t k = 0; k < bz; k++, p += 4 * sy * (ny - by))
+        for (size_t j = 0; j < by; j++, p += 4 * sx * (nx - bx))
+          for (size_t i = 0; i < bx; i++, p += 4)
+            cache.get_block(block_index++, p, sx, sy, sz, sw);
+  }
+
+  // initialize array by copying and compressing data stored at p
+  void set(const value_type* p, bool compact = true)
+  {
+    cache.clear();
+    store.clear();
+    const size_t bx = store.block_size_x();
+    const size_t by = store.block_size_y();
+    const size_t bz = store.block_size_z();
+    const size_t bw = store.block_size_w();
+    size_t block_index = 0;
+    if (p) {
+      // compress data stored at p
+      const ptrdiff_t sx = 1;
+      const ptrdiff_t sy = static_cast<ptrdiff_t>(nx);
+      const ptrdiff_t sz = static_cast<ptrdiff_t>(nx * ny);
+      const ptrdiff_t sw = static_cast<ptrdiff_t>(nx * ny * nz);
+      for (size_t l = 0; l < bw; l++, p += 4 * sz * (nz - bz))
+        for (size_t k = 0; k < bz; k++, p += 4 * sy * (ny - by))
+          for (size_t j = 0; j < by; j++, p += 4 * sx * (nx - bx))
+            for (size_t i = 0; i < bx; i++, p += 4)
+              store.encode(block_index++, p, sx, sy, sz, sw);
+    }
+    else {
+      // zero-initialize array
+      const value_type block[4 * 4 * 4 * 4] = {};
+      while (block_index < bx * by * bz * bw)
+        store.encode(block_index++, block);
+    }
+    store.flush();
+    if (compact)
+      store.compact();
+  }
+
+  // (i, j, k, l) accessor
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(const_cast<container_type*>(this), i, j, k, l); }
+
+  // flat index accessor
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j, k, l;
+    ijkl(i, j, k, l, index);
+    return const_reference(const_cast<container_type*>(this), i, j, k, l);
+  }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, 0, 0, 0, 0); }
+  const_iterator cend() const { return const_iterator(this, 0, 0, 0, nw); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<const_array4>;
+  friend class zfp::internal::dim4::const_reference<const_array4>;
+  friend class zfp::internal::dim4::const_pointer<const_array4>;
+  friend class zfp::internal::dim4::const_iterator<const_array4>;
+  friend class zfp::internal::dim4::const_view<const_array4>;
+  friend class zfp::internal::dim4::private_const_view<const_array4>;
+
+  // perform a deep copy
+  void deep_copy(const const_array4& a)
+  {
+    // copy base class members
+    array::deep_copy(a);
+    // copy persistent storage
+    store.deep_copy(a.store);
+    // copy cached data
+    cache.deep_copy(a.cache);
+  }
+
+  // global index bounds
+  size_t min_x() const { return 0; }
+  size_t max_x() const { return nx; }
+  size_t min_y() const { return 0; }
+  size_t max_y() const { return ny; }
+  size_t min_z() const { return 0; }
+  size_t max_z() const { return nz; }
+  size_t min_w() const { return 0; }
+  size_t max_w() const { return nw; }
+
+  // inspector
+  value_type get(size_t i, size_t j, size_t k, size_t l) const { return cache.get(i, j, k, l); }
+
+  // convert flat index to (i, j, k, l)
+  void ijkl(size_t& i, size_t& j, size_t& k, size_t& l, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny; index /= ny;
+    k = index % nz; index /= nz;
+    l = index;
+  }
+
+  store_type store; // persistent storage of compressed blocks
+  cache_type cache; // cache of decompressed blocks
+};
+
+typedef const_array4<float> const_array4f;
+typedef const_array4<double> const_array4d;
+
+}
+
+#endif
diff --git a/include/zfp/factory.hpp b/include/zfp/factory.hpp
new file mode 100644
index 00000000..73091514
--- /dev/null
+++ b/include/zfp/factory.hpp
@@ -0,0 +1,119 @@
+#ifndef ZFP_FACTORY_HPP
+#define ZFP_FACTORY_HPP
+
+// ensure zfp/array.hpp has already been included
+#ifndef ZFP_ARRAY_HPP
+  #error "zfp/array.hpp must be included before zfp/factory.hpp"
+#endif
+
+zfp::array* zfp::array::construct(const zfp::array::header& header, const void* buffer, size_t buffer_size_bytes)
+{
+  // extract metadata from header
+  const zfp_type type = header.scalar_type();
+  const double rate = header.rate();
+  const uint dims = header.dimensionality();
+  const size_t nx = header.size_x();
+  const size_t ny = header.size_y();
+  const size_t nz = header.size_z();
+  const size_t nw = header.size_w();
+
+  // construct once (passing zfp::array::header will read it again)
+  zfp::array* arr = 0;
+  std::string error;
+  switch (dims) {
+    case 4:
+#ifdef ZFP_ARRAY4_HPP
+      switch (type) {
+        case zfp_type_float:
+          arr = new zfp::array4f(nx, ny, nz, nw, rate);
+          break;
+        case zfp_type_double:
+          arr = new zfp::array4d(nx, ny, nz, nw, rate);
+          break;
+        default:
+          /* NOTREACHED */
+          error = "zfp scalar type not supported";
+          break;
+      }
+#else
+      error = "array4 not supported; include zfp/array4.hpp before zfp/factory.hpp";
+#endif
+      break;
+
+    case 3:
+#ifdef ZFP_ARRAY3_HPP
+      switch (type) {
+        case zfp_type_float:
+          arr = new zfp::array3f(nx, ny, nz, rate);
+          break;
+        case zfp_type_double:
+          arr = new zfp::array3d(nx, ny, nz, rate);
+          break;
+        default:
+          /* NOTREACHED */
+          error = "zfp scalar type not supported";
+          break;
+      }
+#else
+      error = "array3 not supported; include zfp/array3.hpp before zfp/factory.hpp";
+#endif
+      break;
+
+    case 2:
+#ifdef ZFP_ARRAY2_HPP
+      switch (type) {
+        case zfp_type_float:
+          arr = new zfp::array2f(nx, ny, rate);
+          break;
+        case zfp_type_double:
+          arr = new zfp::array2d(nx, ny, rate);
+          break;
+        default:
+          /* NOTREACHED */
+          error = "zfp scalar type not supported";
+          break;
+      }
+#else
+      error = "array2 not supported; include zfp/array2.hpp before zfp/factory.hpp";
+#endif
+      break;
+
+    case 1:
+#ifdef ZFP_ARRAY1_HPP
+      switch (type) {
+        case zfp_type_float:
+          arr = new zfp::array1f(nx, rate);
+          break;
+        case zfp_type_double:
+          arr = new zfp::array1d(nx, rate);
+          break;
+        default:
+          /* NOTREACHED */
+          error = "zfp scalar type not supported";
+          break;
+      }
+#else
+      error = "array1 not supported; include zfp/array1.hpp before zfp/factory.hpp";
+#endif
+      break;
+
+    default:
+      error = "zfp array dimensionality other than {1, 2, 3, 4} not supported";
+      break;
+  }
+
+  if (!error.empty())
+    throw zfp::exception(error);
+
+  if (buffer) {
+    if (buffer_size_bytes && buffer_size_bytes < arr->compressed_size()) {
+      delete arr;
+      throw zfp::exception("zfp buffer size is smaller than required");
+    }
+    std::memcpy(arr->compressed_data(), buffer, arr->compressed_size());
+  }
+
+  return arr;
+}
+
+#endif
diff --git a/include/zfp/index.hpp b/include/zfp/index.hpp
new file mode 100644
index 00000000..b84e9b75
--- /dev/null
+++ b/include/zfp/index.hpp
@@ -0,0 +1,537 @@
+#ifndef ZFP_INDEX_HPP
+#define ZFP_INDEX_HPP
+
+#include <algorithm>
+#include "zfp/internal/array/memory.hpp"
+
+namespace zfp {
+namespace index {
+
+// implicit block index (fixed-size blocks; 0 bits/block; 64-bit offsets) -----
+class implicit {
+public:
+  // constructor
+  implicit(size_t blocks) :
+    bits_per_block(0)
+  {
+    resize(blocks);
+  }
+
+  // destructor
+  ~implicit() {}
+
+  // byte size of index data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // range of offsets spanned by indexed data in bits
+  bitstream_size range() const { return block_offset(blocks); }
+
+  // bit size of given block
+  size_t block_size(size_t /*block_index*/) const { return bits_per_block; }
+
+  // bit offset of given block
+  bitstream_offset block_offset(size_t block_index) const { return block_index * bits_per_block; }
+
+  // reset index
+  void clear() {}
+
+  // resize index in number of blocks
+  void resize(size_t blocks) { this->blocks = blocks; }
+
+  // flush any buffered data
+  void flush() {}
+
+  // set bit size of all blocks
+  void set_block_size(size_t size) { bits_per_block = size; }
+
+  // set bit size of given block (ignored for performance reasons)
+  void set_block_size(size_t /*block_index*/, size_t /*size*/) {}
+
+  // does not support variable rate
+  static bool has_variable_rate() { return false; }
+
+protected:
+  size_t blocks;         // number of blocks
+  size_t bits_per_block; // fixed number of bits per block
+};
+
+// verbatim block index (64 bits/block; 64-bit offsets) -----------------------
+class verbatim {
+public:
+  // constructor for given nbumber of blocks
+  verbatim(size_t blocks) :
+    data(0)
+  {
+    resize(blocks);
+  }
+
+  // destructor
+  ~verbatim() { zfp::internal::deallocate(data); }
+
+  // assignment operator--performs a deep copy
+  verbatim& operator=(const verbatim& index)
+  {
+    if (this != &index)
+      deep_copy(index);
+    return *this;
+  }
+
+  // byte size of index data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_INDEX)
+      size += capacity() * sizeof(*data);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // range of offsets spanned by indexed data in bits
+  bitstream_size range() const { return block_offset(blocks); }
+
+  // bit size of given block
+  size_t block_size(size_t block_index) const { return static_cast<size_t>(block_offset(block_index + 1) - block_offset(block_index)); }
+
+  // bit offset of given block
+  bitstream_offset block_offset(size_t block_index) const { return static_cast<bitstream_offset>(data[block_index]); }
+
+  // reset index
+  void clear() { block = 0; }
+
+  // resize index in number of blocks
+  void resize(size_t blocks)
+  {
+    this->blocks = blocks;
+    zfp::internal::reallocate(data, capacity() * sizeof(*data));
+    *data = 0;
+    clear();
+  }
+
+  // flush any buffered data
+  void flush() {}
+
+  // set bit size of all blocks
+  void set_block_size(size_t size)
+  {
+    clear();
+    while (block < blocks)
+      set_block_size(block, size);
+    clear();
+  }
+
+  // set bit size of given block (in sequential order)
+  void set_block_size(size_t block_index, size_t size)
+  {
+    if (block_index != block)
+      throw zfp::exception("zfp index supports only sequential build");
+    if (block == blocks)
+      throw zfp::exception("zfp index overflow");
+    data[block + 1] = data[block] + size;
+    block++;
+  }
+
+  // supports variable rate
+  static bool has_variable_rate() { return true; }
+
+protected:
+  // capacity of data array
+  size_t capacity() const { return blocks + 1; }
+
+  // make a deep copy of index
+  void deep_copy(const verbatim& index)
+  {
+    zfp::internal::clone(data, index.data, index.capacity());
+    blocks = index.blocks;
+    block = index.block;
+  }
+
+  uint64* data;  // block offset array
+  size_t blocks; // number of blocks
+  size_t block;  // current block index
+};
+
+// hybrid block index (4 blocks/chunk; 24 bits/block; 44-bit offsets) ---------
+class hybrid4 {
+public:
+  // constructor for given number of blocks
+  hybrid4(size_t blocks) :
+    data(0)
+  {
+    resize(blocks);
+  }
+
+  // destructor
+  ~hybrid4() { zfp::internal::deallocate(data); }
+
+  // assignment operator--performs a deep copy
+  hybrid4& operator=(const hybrid4& index)
+  {
+    if (this != &index)
+      deep_copy(index);
+    return *this;
+  }
+
+  // byte size of index data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_INDEX)
+      size += capacity() * sizeof(*data);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // range of offsets spanned by indexed data in bits
+  bitstream_size range() const { return end; }
+
+  // bit size of given block
+  size_t block_size(size_t block_index) const
+  {
+    size_t chunk = block_index / 4;
+    size_t which = block_index % 4;
+    return which == 3u
+             ? static_cast<size_t>(block_offset(block_index + 1) - block_offset(block_index))
+             : static_cast<size_t>(data[chunk].lo[which + 1] - data[chunk].lo[which]);
+  }
+
+  // bit offset of given block
+  bitstream_offset block_offset(size_t block_index) const
+  {
+    // if index is being built, point offset to end
+    if (block_index == block)
+      return end;
+    // index has already been built; decode offset
+    size_t chunk = block_index / 4;
+    size_t which = block_index % 4;
+    return (bitstream_offset(data[chunk].hi) << shift) + data[chunk].lo[which];
+  }
+
+  // reset index
+  void clear()
+  {
+    block = 0;
+    ptr = 0;
+    end = 0;
+  }
+
+  void resize(size_t blocks)
+  {
+    this->blocks = blocks;
+    zfp::internal::reallocate(data, capacity() * sizeof(*data));
+    clear();
+  }
+
+  // flush any buffered data
+  void flush()
+  {
+    while (block & 0x3u)
+      set_block_size(block, 0);
+  }
+
+  // set bit size of all blocks
+  void set_block_size(size_t size)
+  {
+    clear();
+    while (block < blocks)
+      set_block_size(block, size);
+    flush();
+    clear();
+  }
+
+  // set bit size of given block (in sequential order)
+  void set_block_size(size_t block_index, size_t size)
+  {
+    // ensure block_index is next in sequence
+    if (block_index != block)
+      throw zfp::exception("zfp index supports only sequential build");
+    // ensure block index is within bounds, but allow 0-size blocks for padding 
+    if (block >= blocks && size)
+      throw zfp::exception("zfp index overflow");
+    // ensure block size is valid
+    if (size > ZFP_MAX_BITS)
+      throw zfp::exception("zfp block size is too large for hybrid4 index");
+    // advance end pointer
+    end += size;
+    // buffer chunk of 4 block sizes at a time
+    size_t chunk = block / 4;
+    size_t which = block % 4;
+    buffer[which] = size;
+    if (which == 3u) {
+      // chunk is complete; encode it
+      if (ptr >> (32 + shift))
+        throw zfp::exception("zfp block offset is too large for hybrid4 index");
+      // store high bits
+      data[chunk].hi = static_cast<uint32>(ptr >> shift);
+      bitstream_offset base = bitstream_offset(data[chunk].hi) << shift;
+      // store low bits
+      for (uint k = 0; k < 4; k++) {
+        data[chunk].lo[k] = static_cast<uint16>(ptr - base);
+        ptr += buffer[k];
+      }
+    }
+    block++;
+  }
+
+  // supports variable rate
+  static bool has_variable_rate() { return true; }
+
+protected:
+  // chunk record encoding 4 block offsets
+  typedef struct {
+    uint32 hi;    // 32 most significant bits of 44-bit base offset
+    uint16 lo[4]; // 16-bit offsets from base
+  } record;
+
+  // capacity of data array
+  size_t capacity() const { return (blocks + 3) / 4; }
+
+  // make a deep copy of index
+  void deep_copy(const hybrid4& index)
+  {
+    zfp::internal::clone(data, index.data, index.capacity());
+    blocks = index.blocks;
+    block = index.block;
+    ptr = index.ptr;
+    end = index.end;
+    std::copy(index.buffer, index.buffer + 4, buffer);
+  }
+
+  static const uint shift = 12; // number of bits to shift hi bits
+
+  record* data;         // block offset array
+  size_t blocks;        // number of blocks
+  size_t block;         // current block index
+  bitstream_offset end; // offset to last block
+  bitstream_offset ptr; // offset to current chunk of blocks
+  size_t buffer[4];     // bit sizes 4 blocks to be stored together
+};
+
+// hybrid block index (8 blocks/chunk; 16 bits/block; 86-14dims bit offsets) --
+template <uint dims>
+class hybrid8 {
+public:
+  // constructor for given number of blocks
+  hybrid8(size_t blocks) :
+    data(0)
+  {
+    resize(blocks);
+  }
+
+  // destructor
+  ~hybrid8() { zfp::internal::deallocate(data); }
+
+  // assignment operator--performs a deep copy
+  hybrid8& operator=(const hybrid8& index)
+  {
+    if (this != &index)
+      deep_copy(index);
+    return *this;
+  }
+
+  // byte size of index data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_INDEX)
+      size += capacity() * sizeof(*data);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // range of offsets spanned by indexed data in bits
+  bitstream_size range() const { return end; }
+
+  // bit size of given block
+  size_t block_size(size_t block_index) const
+  {
+    size_t chunk = block_index / 8;
+    size_t which = block_index % 8;
+    return which == 7u
+             ? static_cast<size_t>(block_offset(block_index + 1) - block_offset(block_index))
+             : size(data[2 * chunk + 0], data[2 * chunk + 1], static_cast<uint>(which));
+  }
+
+  // bit offset of given block
+  bitstream_offset block_offset(size_t block_index) const
+  {
+    // if index is being built, point offset to end
+    if (block_index == block)
+      return end;
+    // index has already been built; decode offset
+    size_t chunk = block_index / 8;
+    size_t which = block_index % 8;
+    return offset(data[2 * chunk + 0], data[2 * chunk + 1], static_cast<uint>(which));
+  }
+
+  // reset index
+  void clear()
+  {
+    block = 0;
+    ptr = 0;
+    end = 0;
+  }
+
+  void resize(size_t blocks)
+  {
+    this->blocks = blocks;
+    zfp::internal::reallocate(data, capacity() * sizeof(*data));
+    clear();
+  }
+
+  // flush any buffered data
+  void flush()
+  {
+    while (block & 0x7u)
+      set_block_size(block, 0);
+  }
+
+  // set bit size of all blocks
+  void set_block_size(size_t size)
+  {
+    clear();
+    while (block < blocks)
+      set_block_size(block, size);
+    flush();
+    clear();
+  }
+
+  // set bit size of given block (in sequential order)
+  void set_block_size(size_t block_index, size_t size)
+  {
+    // ensure block_index is next in sequence
+    if (block_index != block)
+      throw zfp::exception("zfp index supports only sequential build");
+    // ensure block index is within bounds, but allow 0-size blocks for padding 
+    if (block >= blocks && size)
+      throw zfp::exception("zfp index overflow");
+    // ensure block size is valid
+    if (size >> (hbits + lbits))
+      throw zfp::exception("zfp block size is too large for hybrid8 index");
+    // advance end pointer
+    end += size;
+    // buffer chunk of 8 block sizes at a time
+    size_t chunk = block / 8;
+    size_t which = block % 8;
+    buffer[which] = size;
+    if (which == 7u) {
+      // partition chunk offset into low and high bits
+      uint64 h = ptr >> lbits;
+      uint64 l = ptr - (h << lbits);
+      uint64 hi = h << (7 * hbits);
+      uint64 lo = l << (7 * lbits);
+      // make sure base offset does not overflow
+      if ((hi >> (7 * hbits)) != h)
+        throw zfp::exception("zfp block offset is too large for hybrid8 index");
+      // store sizes of blocks 0-6
+      for (uint k = 0; k < 7; k++) {
+        size = buffer[k];
+        ptr += size;
+        // partition block size into hbits high and lbits low bits
+        h = size >> lbits;
+        l = size - (h << lbits);
+        hi += h << ((6 - k) * hbits);
+        lo += l << ((6 - k) * lbits);
+      }
+      ptr += buffer[7];
+      data[2 * chunk + 0] = hi;
+      data[2 * chunk + 1] = lo;
+    }
+    block++;
+  }
+
+  // supports variable rate
+  static bool has_variable_rate() { return true; }
+
+protected:
+  // capacity of data array
+  size_t capacity() const { return 2 * ((blocks + 7) / 8); }
+
+  // make a deep copy of index
+  void deep_copy(const hybrid8& index)
+  {
+    zfp::internal::clone(data, index.data, index.capacity());
+    blocks = index.blocks;
+    block = index.block;
+    ptr = index.ptr;
+    end = index.end;
+    std::copy(index.buffer, index.buffer + 8, buffer);
+  }
+
+  // kth size in chunk, 0 <= k <= 6
+  static size_t size(uint64 h, uint64 l, uint k)
+  {
+    // extract high and low bits
+    h >>= (6 - k) * hbits; h &= (UINT64C(1) << hbits) - 1;
+    l >>= (6 - k) * lbits; l &= (UINT64C(1) << lbits) - 1;
+    // combine base offset with high and low bits
+    return static_cast<size_t>((h << lbits) + l);
+  }
+
+  // kth offset in chunk, 0 <= k <= 7
+  static bitstream_offset offset(uint64 h, uint64 l, uint k)
+  {
+    // extract all but lowest (8 * hbits) bits
+    uint64 base = h >> (8 * hbits);
+    h -= base << (8 * hbits);
+    // add LSBs of base offset and k block sizes
+    h = hsum(h >> ((7 - k) * hbits));
+    l = lsum(l >> ((7 - k) * lbits));
+    // combine base offset with high and low bits
+    return static_cast<bitstream_offset>((((base << hbits) + h) << lbits) + l);
+  }
+
+  // sum of (up to) eight packed 8-bit numbers (efficient version of sum8)
+  static uint64 lsum(uint64 x)
+  {
+    // reduce in parallel
+    uint64 y = x & UINT64C(0xff00ff00ff00ff00);
+    x -= y;
+    x += y >> 8;
+    x += x >> 16;
+    x += x >> 32;
+    return x & UINT64C(0xffff);
+  }
+
+  // sum of (up to) eight packed h-bit numbers
+  static uint64 hsum(uint64 x) { return sum8(x, hbits); }
+
+  // compute sum of eight packed n-bit values (1 <= n <= 8)
+  static uint64 sum8(uint64 x, uint n)
+  {
+    // bit masks for extracting terms of sums
+    uint64 m3 = ~UINT64C(0) << (4 * n);
+    uint64 m2 = m3 ^ (m3 << (4 * n));
+    uint64 m1 = m2 ^ (m2 >> (2 * n));
+    uint64 m0 = m1 ^ (m1 >> (1 * n));
+    uint64 y;
+    // perform summations in parallel
+    y = x & m0; x -= y; x += y >> n; n *= 2; // four summations
+    y = x & m1; x -= y; x += y >> n; n *= 2; // two summations
+    y = x & m2; x -= y; x += y >> n; n *= 2; // final summation
+    return x;
+  }
+
+  static const uint lbits = 8;              // 64 bits partitioned into 8
+  static const uint hbits = 2 * (dims - 1); // log2(4^d * maxprec / 2^lbits)
+
+  uint64* data;         // block offset array
+  size_t blocks;        // number of blocks
+  size_t block;         // current block index
+  bitstream_offset end; // offset to last block
+  bitstream_offset ptr; // offset to current set of blocks
+  size_t buffer[8];     // sizes of 8 blocks to be stored together
+};
+
+} // index
+} // zfp
+
+#endif
diff --git a/array/zfp/cache.h b/include/zfp/internal/array/cache.hpp
similarity index 82%
rename from array/zfp/cache.h
rename to include/zfp/internal/array/cache.hpp
index 3630910a..533c37db 100644
--- a/array/zfp/cache.h
+++ b/include/zfp/internal/array/cache.hpp
@@ -1,7 +1,7 @@
-#ifndef ZFP_CACHE_H
-#define ZFP_CACHE_H
+#ifndef ZFP_CACHE_HPP
+#define ZFP_CACHE_HPP
 
-#include "memory.h"
+#include "zfp/internal/array/memory.hpp"
 
 #ifdef ZFP_WITH_CACHE_PROFILE
   // maintain stats on hit and miss rates
@@ -9,6 +9,7 @@
 #endif
 
 namespace zfp {
+namespace internal {
 
 // direct-mapped or two-way skew-associative write-back cache
 template <class Line>
@@ -87,11 +88,11 @@ class Cache {
   };
 
   // allocate cache with at least minsize lines
-  Cache(uint minsize = 0) : tag(0), line(0)
+  Cache(uint minsize = 0) : mask(0), tag(0), line(0)
   {
     resize(minsize);
 #ifdef ZFP_WITH_CACHE_PROFILE
-    std::cerr << "cache lines=" << mask + 1 << std::endl;
+    std::cerr << "cache lines=" << size() << std::endl;
     hit[0][0] = hit[1][0] = miss[0] = back[0] = 0;
     hit[0][1] = hit[1][1] = miss[1] = back[1] = 0;
 #endif
@@ -106,8 +107,8 @@ class Cache {
   // destructor
   ~Cache()
   {
-    zfp::deallocate_aligned(tag);
-    zfp::deallocate_aligned(line);
+    zfp::internal::deallocate_aligned(tag);
+    zfp::internal::deallocate_aligned(line);
 #ifdef ZFP_WITH_CACHE_PROFILE
     std::cerr << "cache R1=" << hit[0][0] << " R2=" << hit[1][0] << " RM=" << miss[0] << " RB=" << back[0]
               <<      " W1=" << hit[0][1] << " W2=" << hit[1][1] << " WM=" << miss[1] << " WB=" << back[1] << std::endl;
@@ -122,29 +123,47 @@ class Cache {
     return *this;
   }
 
+  // byte size of cache data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_CACHE)
+      size += this->size() * (sizeof(*tag) + sizeof(*line));
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
   // cache size in number of lines
   uint size() const { return mask + 1; }
 
   // change cache size to at least minsize lines (all contents will be lost)
   void resize(uint minsize)
   {
+    // compute smallest value of mask such that mask + 1 = 2^k >= minsize
     for (mask = minsize ? minsize - 1 : 1; mask & (mask + 1); mask |= mask + 1);
-    zfp::reallocate_aligned(tag, ((size_t)mask + 1) * sizeof(Tag), 0x100);
-    zfp::reallocate_aligned(line, ((size_t)mask + 1) * sizeof(Line), 0x100);
+    zfp::internal::reallocate_aligned(tag, size() * sizeof(Tag), ZFP_MEMORY_ALIGNMENT);
+    zfp::internal::reallocate_aligned(line, size() * sizeof(Line), ZFP_MEMORY_ALIGNMENT);
     clear();
   }
 
   // look up cache line #x and return pointer to it if in the cache;
   // otherwise return null
-  const Line* lookup(Index x) const
+  Line* lookup(Index x, bool write)
   {
     uint i = primary(x);
-    if (tag[i].index() == x)
+    if (tag[i].index() == x) {
+      if (write)
+        tag[i].mark();
       return line + i;
+    }
 #ifdef ZFP_WITH_CACHE_TWOWAY
     uint j = secondary(x);
-    if (tag[j].index() == x)
+    if (tag[j].index() == x) {
+      if (write)
+        tag[i].mark();
       return line + j;
+    }
 #endif
     return 0;
   }
@@ -211,8 +230,8 @@ class Cache {
   void deep_copy(const Cache& c)
   {
     mask = c.mask;
-    zfp::clone_aligned(tag, c.tag, mask + 1, 0x100u);
-    zfp::clone_aligned(line, c.line, mask + 1, 0x100u);
+    zfp::internal::clone_aligned(tag, c.tag, size(), ZFP_MEMORY_ALIGNMENT);
+    zfp::internal::clone_aligned(line, c.line, size(), ZFP_MEMORY_ALIGNMENT);
 #ifdef ZFP_WITH_CACHE_PROFILE
     hit[0][0] = c.hit[0][0];
     hit[0][1] = c.hit[0][1];
@@ -256,6 +275,7 @@ class Cache {
 #endif
 };
 
-}
+} // internal
+} // zfp
 
 #endif
diff --git a/include/zfp/internal/array/cache1.hpp b/include/zfp/internal/array/cache1.hpp
new file mode 100644
index 00000000..24f192e5
--- /dev/null
+++ b/include/zfp/internal/array/cache1.hpp
@@ -0,0 +1,201 @@
+#ifndef ZFP_CACHE1_HPP
+#define ZFP_CACHE1_HPP
+
+#include "zfp/internal/array/cache.hpp"
+
+namespace zfp {
+namespace internal {
+
+template <typename Scalar, class Store>
+class BlockCache1 {
+public:
+  // constructor of cache of given size
+  BlockCache1(Store& store, size_t bytes = 0) :
+    cache(lines(bytes, store.blocks())),
+    store(store)
+  {}
+
+  // byte size of cache data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // cache size in number of bytes (cache line payload data only)
+  size_t size() const { return cache.size() * sizeof(CacheLine); }
+
+  // set minimum cache size in bytes (inferred from blocks if zero)
+  void resize(size_t bytes)
+  {
+    flush();
+    cache.resize(lines(bytes, store.blocks()));
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush() const
+  {
+    for (typename zfp::internal::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
+      if (p->tag.dirty()) {
+        size_t block_index = p->tag.index() - 1;
+        store.encode(block_index, p->line->data());
+      }
+      cache.flush(p->line);
+    }
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockCache1& c) { cache = c.cache; }
+
+  // inspector
+  Scalar get(size_t i) const
+  {
+    const CacheLine* p = line(i, false);
+    return (*p)(i);
+  }
+
+  // mutator
+  void set(size_t i, Scalar val)
+  {
+    CacheLine* p = line(i, true);
+    (*p)(i) = val;
+  }
+
+  // reference to cached element
+  Scalar& ref(size_t i)
+  {
+    CacheLine* p = line(i, true);
+    return (*p)(i);
+  }
+
+  // read-no-allocate: copy block from cache on hit, else from store without caching
+  void get_block(size_t block_index, Scalar* p, ptrdiff_t sx) const
+  {
+    const CacheLine* line = cache.lookup((uint)block_index + 1, false);
+    if (line)
+      line->get(p, sx, store.block_shape(block_index));
+    else
+      store.decode(block_index, p, sx);
+  }
+
+  // write-no-allocate: copy block to cache on hit, else to store without caching
+  void put_block(size_t block_index, const Scalar* p, ptrdiff_t sx)
+  {
+    CacheLine* line = cache.lookup((uint)block_index + 1, true);
+    if (line)
+      line->put(p, sx, store.block_shape(block_index));
+    else
+      store.encode(block_index, p, sx);
+  }
+
+protected:
+  // cache line representing one block of decompressed values
+  class CacheLine {
+  public:
+    // accessors
+    Scalar operator()(size_t i) const { return a[index(i)]; }
+    Scalar& operator()(size_t i) { return a[index(i)]; }
+
+    // pointer to decompressed block data
+    const Scalar* data() const { return a; }
+    Scalar* data() { return a; }
+
+    // copy whole block from cache line
+    void get(Scalar* p, ptrdiff_t sx) const
+    {
+      const Scalar* q = a;
+      for (uint x = 0; x < 4; x++, p += sx, q++)
+        *p = *q;
+    }
+
+    // copy partial block from cache line
+    void get(Scalar* p, ptrdiff_t sx, uint shape) const
+    {
+      if (!shape)
+        get(p, sx);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        const Scalar* q = a;
+        for (uint x = 0; x < nx; x++, p += sx, q++)
+          *p = *q;
+      }
+    }
+
+    // copy whole block to cache line
+    void put(const Scalar* p, ptrdiff_t sx)
+    {
+      Scalar* q = a;
+      for (uint x = 0; x < 4; x++, p += sx, q++)
+        *q = *p;
+    }
+
+    // copy partial block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, uint shape)
+    {
+      if (!shape)
+        put(p, sx);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        Scalar* q = a;
+        for (uint x = 0; x < nx; x++, p += sx, q++)
+          *q = *p;
+      }
+    }
+
+  protected:
+    static size_t index(size_t i) { return (i & 3u); }
+    Scalar a[4];
+  };
+
+  // return cache line for i; may require write-back and fetch
+  CacheLine* line(size_t i, bool write) const
+  {
+    CacheLine* p = 0;
+    size_t block_index = store.block_index(i);
+    typename zfp::internal::Cache<CacheLine>::Tag tag = cache.access(p, (uint)block_index + 1, write);
+    size_t stored_block_index = tag.index() - 1;
+    if (stored_block_index != block_index) {
+      // write back occupied cache line if it is dirty
+      if (tag.dirty())
+        store.encode(stored_block_index, p->data());
+      // fetch cache line
+      store.decode(block_index, p->data());
+    }
+    return p;
+  }
+
+  // default number of cache lines for array with given number of blocks
+  static uint lines(size_t blocks)
+  {
+    // compute m = O(sqrt(n))
+    size_t m;
+    for (m = 1; m * m < blocks; m *= 2);
+    return static_cast<uint>(m);
+  }
+
+  // number of cache lines corresponding to size (or suggested size if zero)
+  static uint lines(size_t bytes, size_t blocks)
+  {
+    // ensure block index fits in tag
+    if (blocks >> ((sizeof(uint) * CHAR_BIT) - 1))
+      throw zfp::exception("zfp array too large for cache");
+    uint n = bytes ? static_cast<uint>((bytes + sizeof(CacheLine) - 1) / sizeof(CacheLine)) : lines(blocks);
+    return std::max(n, 1u);
+  }
+
+  mutable Cache<CacheLine> cache; // cache of decompressed blocks
+  Store& store;                   // store backed by cache
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/cache2.hpp b/include/zfp/internal/array/cache2.hpp
new file mode 100644
index 00000000..e7aa07d9
--- /dev/null
+++ b/include/zfp/internal/array/cache2.hpp
@@ -0,0 +1,207 @@
+#ifndef ZFP_CACHE2_HPP
+#define ZFP_CACHE2_HPP
+
+#include "zfp/internal/array/cache.hpp"
+
+namespace zfp {
+namespace internal {
+
+template <typename Scalar, class Store>
+class BlockCache2 {
+public:
+  // constructor of cache of given size
+  BlockCache2(Store& store, size_t bytes = 0) :
+    cache(lines(bytes, store.blocks())),
+    store(store)
+  {}
+
+  // byte size of cache data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // cache size in number of bytes (cache line payload data only)
+  size_t size() const { return cache.size() * sizeof(CacheLine); }
+
+  // set minimum cache size in bytes (inferred from blocks if zero)
+  void resize(size_t bytes)
+  {
+    flush();
+    cache.resize(lines(bytes, store.blocks()));
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush() const
+  {
+    for (typename zfp::internal::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
+      if (p->tag.dirty()) {
+        size_t block_index = p->tag.index() - 1;
+        store.encode(block_index, p->line->data());
+      }
+      cache.flush(p->line);
+    }
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockCache2& c) { cache = c.cache; }
+
+  // inspector
+  Scalar get(size_t i, size_t j) const
+  {
+    const CacheLine* p = line(i, j, false);
+    return (*p)(i, j);
+  }
+
+  // mutator
+  void set(size_t i, size_t j, Scalar val)
+  {
+    CacheLine* p = line(i, j, true);
+    (*p)(i, j) = val;
+  }
+
+  // reference to cached element
+  Scalar& ref(size_t i, size_t j)
+  {
+    CacheLine* p = line(i, j, true);
+    return (*p)(i, j);
+  }
+
+  // read-no-allocate: copy block from cache on hit, else from store without caching
+  void get_block(size_t block_index, Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+  {
+    const CacheLine* line = cache.lookup((uint)block_index + 1, false);
+    if (line)
+      line->get(p, sx, sy, store.block_shape(block_index));
+    else
+      store.decode(block_index, p, sx, sy);
+  }
+
+  // write-no-allocate: copy block to cache on hit, else to store without caching
+  void put_block(size_t block_index, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+  {
+    CacheLine* line = cache.lookup((uint)block_index + 1, true);
+    if (line)
+      line->put(p, sx, sy, store.block_shape(block_index));
+    else
+      store.encode(block_index, p, sx, sy);
+  }
+
+protected:
+  // cache line representing one block of decompressed values
+  class CacheLine {
+  public:
+    // accessors
+    Scalar operator()(size_t i, size_t j) const { return a[index(i, j)]; }
+    Scalar& operator()(size_t i, size_t j) { return a[index(i, j)]; }
+
+    // pointer to decompressed block data
+    const Scalar* data() const { return a; }
+    Scalar* data() { return a; }
+
+    // copy whole block from cache line
+    void get(Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+    {
+      const Scalar* q = a;
+      for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
+        for (uint x = 0; x < 4; x++, p += sx, q++)
+          *p = *q;
+    }
+
+    // copy partial block from cache line
+    void get(Scalar* p, ptrdiff_t sx, ptrdiff_t sy, uint shape) const
+    {
+      if (!shape)
+        get(p, sx, sy);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        uint ny = 4 - (shape & 3u); shape >>= 2;
+        const Scalar* q = a;
+        for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+          for (uint x = 0; x < nx; x++, p += sx, q++)
+            *p = *q;
+      }
+    }
+
+    // copy whole block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+    {
+      Scalar* q = a;
+      for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
+        for (uint x = 0; x < 4; x++, p += sx, q++)
+          *q = *p;
+    }
+
+    // copy partial block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, uint shape)
+    {
+      if (!shape)
+        put(p, sx, sy);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        uint ny = 4 - (shape & 3u); shape >>= 2;
+        Scalar* q = a;
+        for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+          for (uint x = 0; x < nx; x++, p += sx, q++)
+            *q = *p;
+      }
+    }
+
+  protected:
+    static size_t index(size_t i, size_t j) { return (i & 3u) + 4 * (j & 3u); }
+    Scalar a[4 * 4];
+  };
+
+  // return cache line for (i, j); may require write-back and fetch
+  CacheLine* line(size_t i, size_t j, bool write) const
+  {
+    CacheLine* p = 0;
+    size_t block_index = store.block_index(i, j);
+    typename zfp::internal::Cache<CacheLine>::Tag tag = cache.access(p, (uint)block_index + 1, write);
+    size_t stored_block_index = tag.index() - 1;
+    if (stored_block_index != block_index) {
+      // write back occupied cache line if it is dirty
+      if (tag.dirty())
+        store.encode(stored_block_index, p->data());
+      // fetch cache line
+      store.decode(block_index, p->data());
+    }
+    return p;
+  }
+
+  // default number of cache lines for array with given number of blocks
+  static uint lines(size_t blocks)
+  {
+    // compute m = O(sqrt(n))
+    size_t m;
+    for (m = 1; m * m < blocks; m *= 2);
+    return static_cast<uint>(m);
+  }
+
+  // number of cache lines corresponding to size (or suggested size if zero)
+  static uint lines(size_t bytes, size_t blocks)
+  {
+    // ensure block index fits in tag
+    if (blocks >> ((sizeof(uint) * CHAR_BIT) - 1))
+      throw zfp::exception("zfp array too large for cache");
+    uint n = bytes ? static_cast<uint>((bytes + sizeof(CacheLine) - 1) / sizeof(CacheLine)) : lines(blocks);
+    return std::max(n, 1u);
+  }
+
+  mutable Cache<CacheLine> cache; // cache of decompressed blocks
+  Store& store;                   // store backed by cache
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/cache3.hpp b/include/zfp/internal/array/cache3.hpp
new file mode 100644
index 00000000..1c4c9554
--- /dev/null
+++ b/include/zfp/internal/array/cache3.hpp
@@ -0,0 +1,213 @@
+#ifndef ZFP_CACHE3_HPP
+#define ZFP_CACHE3_HPP
+
+#include "zfp/internal/array/cache.hpp"
+
+namespace zfp {
+namespace internal {
+
+template <typename Scalar, class Store>
+class BlockCache3 {
+public:
+  // constructor of cache of given size
+  BlockCache3(Store& store, size_t bytes = 0) :
+    cache(lines(bytes, store.blocks())),
+    store(store)
+  {}
+
+  // byte size of cache data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // cache size in number of bytes (cache line payload data only)
+  size_t size() const { return cache.size() * sizeof(CacheLine); }
+
+  // set minimum cache size in bytes (inferred from blocks if zero)
+  void resize(size_t bytes)
+  {
+    flush();
+    cache.resize(lines(bytes, store.blocks()));
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush() const
+  {
+    for (typename zfp::internal::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
+      if (p->tag.dirty()) {
+        size_t block_index = p->tag.index() - 1;
+        store.encode(block_index, p->line->data());
+      }
+      cache.flush(p->line);
+    }
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockCache3& c) { cache = c.cache; }
+
+  // inspector
+  Scalar get(size_t i, size_t j, size_t k) const
+  {
+    const CacheLine* p = line(i, j, k, false);
+    return (*p)(i, j, k);
+  }
+
+  // mutator
+  void set(size_t i, size_t j, size_t k, Scalar val)
+  {
+    CacheLine* p = line(i, j, k, true);
+    (*p)(i, j, k) = val;
+  }
+
+  // reference to cached element
+  Scalar& ref(size_t i, size_t j, size_t k)
+  {
+    CacheLine* p = line(i, j, k, true);
+    return (*p)(i, j, k);
+  }
+
+  // read-no-allocate: copy block from cache on hit, else from store without caching
+  void get_block(size_t block_index, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    const CacheLine* line = cache.lookup((uint)block_index + 1, false);
+    if (line)
+      line->get(p, sx, sy, sz, store.block_shape(block_index));
+    else
+      store.decode(block_index, p, sx, sy, sz);
+  }
+
+  // write-no-allocate: copy block to cache on hit, else to store without caching
+  void put_block(size_t block_index, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    CacheLine* line = cache.lookup((uint)block_index + 1, true);
+    if (line)
+      line->put(p, sx, sy, sz, store.block_shape(block_index));
+    else
+      store.encode(block_index, p, sx, sy, sz);
+  }
+
+protected:
+  // cache line representing one block of decompressed values
+  class CacheLine {
+  public:
+    // accessors
+    Scalar operator()(size_t i, size_t j, size_t k) const { return a[index(i, j, k)]; }
+    Scalar& operator()(size_t i, size_t j, size_t k) { return a[index(i, j, k)]; }
+
+    // pointer to decompressed block data
+    const Scalar* data() const { return a; }
+    Scalar* data() { return a; }
+
+    // copy whole block from cache line
+    void get(Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+    {
+      const Scalar* q = a;
+      for (uint z = 0; z < 4; z++, p += sz - 4 * sy)
+        for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
+          for (uint x = 0; x < 4; x++, p += sx, q++)
+            *p = *q;
+    }
+
+    // copy partial block from cache line
+    void get(Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, uint shape) const
+    {
+      if (!shape)
+        get(p, sx, sy, sz);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        uint ny = 4 - (shape & 3u); shape >>= 2;
+        uint nz = 4 - (shape & 3u); shape >>= 2;
+        const Scalar* q = a;
+        for (uint z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+          for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+            for (uint x = 0; x < nx; x++, p += sx, q++)
+              *p = *q;
+      }
+    }
+
+    // copy whole block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+    {
+      Scalar* q = a;
+      for (uint z = 0; z < 4; z++, p += sz - 4 * sy)
+        for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
+          for (uint x = 0; x < 4; x++, p += sx, q++)
+            *q = *p;
+    }
+
+    // copy partial block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, uint shape)
+    {
+      if (!shape)
+        put(p, sx, sy, sz);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        uint ny = 4 - (shape & 3u); shape >>= 2;
+        uint nz = 4 - (shape & 3u); shape >>= 2;
+        Scalar* q = a;
+        for (uint z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+          for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+            for (uint x = 0; x < nx; x++, p += sx, q++)
+              *q = *p;
+      }
+    }
+
+  protected:
+    static size_t index(size_t i, size_t j, size_t k) { return (i & 3u) + 4 * ((j & 3u) + 4 * (k & 3u)); }
+    Scalar a[4 * 4 * 4];
+  };
+
+  // return cache line for (i, j, k); may require write-back and fetch
+  CacheLine* line(size_t i, size_t j, size_t k, bool write) const
+  {
+    CacheLine* p = 0;
+    size_t block_index = store.block_index(i, j, k);
+    typename zfp::internal::Cache<CacheLine>::Tag tag = cache.access(p, (uint)block_index + 1, write);
+    size_t stored_block_index = tag.index() - 1;
+    if (stored_block_index != block_index) {
+      // write back occupied cache line if it is dirty
+      if (tag.dirty())
+        store.encode(stored_block_index, p->data());
+      // fetch cache line
+      store.decode(block_index, p->data());
+    }
+    return p;
+  }
+
+  // default number of cache lines for array with given number of blocks
+  static uint lines(size_t blocks)
+  {
+    // compute m = O(sqrt(n))
+    size_t m;
+    for (m = 1; m * m < blocks; m *= 2);
+    return static_cast<uint>(m);
+  }
+
+  // number of cache lines corresponding to size (or suggested size if zero)
+  static uint lines(size_t bytes, size_t blocks)
+  {
+    // ensure block index fits in tag
+    if (blocks >> ((sizeof(uint) * CHAR_BIT) - 1))
+      throw zfp::exception("zfp array too large for cache");
+    uint n = bytes ? static_cast<uint>((bytes + sizeof(CacheLine) - 1) / sizeof(CacheLine)) : lines(blocks);
+    return std::max(n, 1u);
+  }
+
+  mutable Cache<CacheLine> cache; // cache of decompressed blocks
+  Store& store;                   // store backed by cache
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/cache4.hpp b/include/zfp/internal/array/cache4.hpp
new file mode 100644
index 00000000..69182b7e
--- /dev/null
+++ b/include/zfp/internal/array/cache4.hpp
@@ -0,0 +1,219 @@
+#ifndef ZFP_CACHE4_HPP
+#define ZFP_CACHE4_HPP
+
+#include "zfp/internal/array/cache.hpp"
+
+namespace zfp {
+namespace internal {
+
+template <typename Scalar, class Store>
+class BlockCache4 {
+public:
+  // constructor of cache of given size
+  BlockCache4(Store& store, size_t bytes = 0) :
+    cache(lines(bytes, store.blocks())),
+    store(store)
+  {}
+
+  // byte size of cache data structure components indicated by mask
+  size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += cache.size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // cache size in number of bytes (cache line payload data only)
+  size_t size() const { return cache.size() * sizeof(CacheLine); }
+
+  // set minimum cache size in bytes (inferred from blocks if zero)
+  void resize(size_t bytes)
+  {
+    flush();
+    cache.resize(lines(bytes, store.blocks()));
+  }
+
+  // empty cache without compressing modified cached blocks
+  void clear() const { cache.clear(); }
+
+  // flush cache by compressing all modified cached blocks
+  void flush() const
+  {
+    for (typename zfp::internal::Cache<CacheLine>::const_iterator p = cache.first(); p; p++) {
+      if (p->tag.dirty()) {
+        size_t block_index = p->tag.index() - 1;
+        store.encode(block_index, p->line->data());
+      }
+      cache.flush(p->line);
+    }
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockCache4& c) { cache = c.cache; }
+
+  // inspector
+  Scalar get(size_t i, size_t j, size_t k, size_t l) const
+  {
+    const CacheLine* p = line(i, j, k, l, false);
+    return (*p)(i, j, k, l);
+  }
+
+  // mutator
+  void set(size_t i, size_t j, size_t k, size_t l, Scalar val)
+  {
+    CacheLine* p = line(i, j, k, l, true);
+    (*p)(i, j, k, l) = val;
+  }
+
+  // reference to cached element
+  Scalar& ref(size_t i, size_t j, size_t k, size_t l)
+  {
+    CacheLine* p = line(i, j, k, l, true);
+    return (*p)(i, j, k, l);
+  }
+
+  // read-no-allocate: copy block from cache on hit, else from store without caching
+  void get_block(size_t block_index, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    const CacheLine* line = cache.lookup((uint)block_index + 1, false);
+    if (line)
+      line->get(p, sx, sy, sz, sw, store.block_shape(block_index));
+    else
+      store.decode(block_index, p, sx, sy, sz, sw);
+  }
+
+  // write-no-allocate: copy block to cache on hit, else to store without caching
+  void put_block(size_t block_index, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    CacheLine* line = cache.lookup((uint)block_index + 1, true);
+    if (line)
+      line->put(p, sx, sy, sz, sw, store.block_shape(block_index));
+    else
+      store.encode(block_index, p, sx, sy, sz, sw);
+  }
+
+protected:
+  // cache line representing one block of decompressed values
+  class CacheLine {
+  public:
+    // accessors
+    Scalar operator()(size_t i, size_t j, size_t k, size_t l) const { return a[index(i, j, k, l)]; }
+    Scalar& operator()(size_t i, size_t j, size_t k, size_t l) { return a[index(i, j, k, l)]; }
+
+    // pointer to decompressed block data
+    const Scalar* data() const { return a; }
+    Scalar* data() { return a; }
+
+    // copy whole block from cache line
+    void get(Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+    {
+      const Scalar* q = a;
+      for (uint w = 0; w < 4; w++, p += sw - 4 * sz)
+        for (uint z = 0; z < 4; z++, p += sz - 4 * sy)
+          for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
+            for (uint x = 0; x < 4; x++, p += sx, q++)
+              *p = *q;
+    }
+
+    // copy partial block from cache line
+    void get(Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw, uint shape) const
+    {
+      if (!shape)
+        get(p, sx, sy, sz, sw);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        uint ny = 4 - (shape & 3u); shape >>= 2;
+        uint nz = 4 - (shape & 3u); shape >>= 2;
+        uint nw = 4 - (shape & 3u); shape >>= 2;
+        const Scalar* q = a;
+        for (uint w = 0; w < nw; w++, p += sw - (ptrdiff_t)nz * sz, q += 64 - 16 * nz)
+          for (uint z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+            for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+              for (uint x = 0; x < nx; x++, p += sx, q++)
+                *p = *q;
+      }
+    }
+
+    // copy whole block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+    {
+      Scalar* q = a;
+      for (uint w = 0; w < 4; w++, p += sw - 4 * sz)
+        for (uint z = 0; z < 4; z++, p += sz - 4 * sy)
+          for (uint y = 0; y < 4; y++, p += sy - 4 * sx)
+            for (uint x = 0; x < 4; x++, p += sx, q++)
+              *q = *p;
+    }
+
+    // copy partial block to cache line
+    void put(const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw, uint shape)
+    {
+      if (!shape)
+        put(p, sx, sy, sz, sw);
+      else {
+        // determine block dimensions
+        uint nx = 4 - (shape & 3u); shape >>= 2;
+        uint ny = 4 - (shape & 3u); shape >>= 2;
+        uint nz = 4 - (shape & 3u); shape >>= 2;
+        uint nw = 4 - (shape & 3u); shape >>= 2;
+        Scalar* q = a;
+        for (uint w = 0; w < nw; w++, p += sw - (ptrdiff_t)nz * sz, q += 64 - 16 * nz)
+          for (uint z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 16 - 4 * ny)
+            for (uint y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
+              for (uint x = 0; x < nx; x++, p += sx, q++)
+                *q = *p;
+      }
+    }
+
+  protected:
+    static size_t index(size_t i, size_t j, size_t k, size_t l) { return (i & 3u) + 4 * ((j & 3u) + 4 * ((k & 3u) + 4 * (l & 3u))); }
+    Scalar a[4 * 4 * 4 * 4];
+  };
+
+  // return cache line for (i, j, k, l); may require write-back and fetch
+  CacheLine* line(size_t i, size_t j, size_t k, size_t l, bool write) const
+  {
+    CacheLine* p = 0;
+    size_t block_index = store.block_index(i, j, k, l);
+    typename zfp::internal::Cache<CacheLine>::Tag tag = cache.access(p, (uint)block_index + 1, write);
+    size_t stored_block_index = tag.index() - 1;
+    if (stored_block_index != block_index) {
+      // write back occupied cache line if it is dirty
+      if (tag.dirty())
+        store.encode(stored_block_index, p->data());
+      // fetch cache line
+      store.decode(block_index, p->data());
+    }
+    return p;
+  }
+
+  // default number of cache lines for array with given number of blocks
+  static uint lines(size_t blocks)
+  {
+    // compute m = O(sqrt(n))
+    size_t m;
+    for (m = 1; m * m < blocks; m *= 2);
+    return static_cast<uint>(m);
+  }
+
+  // number of cache lines corresponding to size (or suggested size if zero)
+  static uint lines(size_t bytes, size_t blocks)
+  {
+    // ensure block index fits in tag
+    if (blocks >> ((sizeof(uint) * CHAR_BIT) - 1))
+      throw zfp::exception("zfp array too large for cache");
+    uint n = bytes ? static_cast<uint>((bytes + sizeof(CacheLine) - 1) / sizeof(CacheLine)) : lines(blocks);
+    return std::max(n, 1u);
+  }
+
+  mutable Cache<CacheLine> cache; // cache of decompressed blocks
+  Store& store;                   // store backed by cache
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/exception.hpp b/include/zfp/internal/array/exception.hpp
new file mode 100644
index 00000000..747bf6bd
--- /dev/null
+++ b/include/zfp/internal/array/exception.hpp
@@ -0,0 +1,18 @@
+#ifndef ZFP_EXCEPTION_HPP
+#define ZFP_EXCEPTION_HPP
+
+#include <stdexcept>
+#include <string>
+
+namespace zfp {
+
+// generic exception thrown by array constructors
+class exception : public std::runtime_error {
+public:
+  exception(const std::string& msg) : runtime_error(msg) {}
+  virtual ~exception() throw() {}
+};
+
+}
+
+#endif
diff --git a/include/zfp/internal/array/handle1.hpp b/include/zfp/internal/array/handle1.hpp
new file mode 100644
index 00000000..72f5e91b
--- /dev/null
+++ b/include/zfp/internal/array/handle1.hpp
@@ -0,0 +1,38 @@
+#ifndef ZFP_HANDLE1_HPP
+#define ZFP_HANDLE1_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim1 {
+
+// forward declarations
+template <class Container> class const_reference;
+template <class Container> class const_pointer;
+template <class Container> class const_iterator;
+template <class Container> class reference;
+template <class Container> class pointer;
+template <class Container> class iterator;
+
+// const handle to a 1D array or view element
+template <class Container>
+class const_handle {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+protected:
+  // protected constructor
+  explicit const_handle(const container_type* container, size_t x) : container(const_cast<container_type*>(container)), x(x) {}
+
+  // dereference handle
+  value_type get() const { return container->get(x); }
+
+  container_type* container; // container
+  size_t x;                  // global element index
+};
+
+} // dim1
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/handle2.hpp b/include/zfp/internal/array/handle2.hpp
new file mode 100644
index 00000000..17b5043e
--- /dev/null
+++ b/include/zfp/internal/array/handle2.hpp
@@ -0,0 +1,38 @@
+#ifndef ZFP_HANDLE2_HPP
+#define ZFP_HANDLE2_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim2 {
+
+// forward declarations
+template <class Container> class const_reference;
+template <class Container> class const_pointer;
+template <class Container> class const_iterator;
+template <class Container> class reference;
+template <class Container> class pointer;
+template <class Container> class iterator;
+
+// const handle to a 2D array or view element
+template <class Container>
+class const_handle {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+protected:
+  // protected constructor
+  explicit const_handle(const container_type* container, size_t x, size_t y) : container(const_cast<container_type*>(container)), x(x), y(y) {}
+
+  // dereference handle
+  value_type get() const { return container->get(x, y); }
+
+  container_type* container; // container
+  size_t x, y;               // global element index
+};
+
+} // dim2
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/handle3.hpp b/include/zfp/internal/array/handle3.hpp
new file mode 100644
index 00000000..139b1d55
--- /dev/null
+++ b/include/zfp/internal/array/handle3.hpp
@@ -0,0 +1,38 @@
+#ifndef ZFP_HANDLE3_HPP
+#define ZFP_HANDLE3_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim3 {
+
+// forward declarations
+template <class Container> class const_reference;
+template <class Container> class const_pointer;
+template <class Container> class const_iterator;
+template <class Container> class reference;
+template <class Container> class pointer;
+template <class Container> class iterator;
+
+// const handle to a 3D array or view element
+template <class Container>
+class const_handle {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+protected:
+  // protected constructor
+  explicit const_handle(const container_type* container, size_t x, size_t y, size_t z) : container(const_cast<container_type*>(container)), x(x), y(y), z(z) {}
+
+  // dereference handle
+  value_type get() const { return container->get(x, y, z); }
+
+  container_type* container; // container
+  size_t x, y, z;            // global element index
+};
+
+} // dim3
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/handle4.hpp b/include/zfp/internal/array/handle4.hpp
new file mode 100644
index 00000000..da9ca385
--- /dev/null
+++ b/include/zfp/internal/array/handle4.hpp
@@ -0,0 +1,38 @@
+#ifndef ZFP_HANDLE4_HPP
+#define ZFP_HANDLE4_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim4 {
+
+// forward declarations
+template <class Container> class const_reference;
+template <class Container> class const_pointer;
+template <class Container> class const_iterator;
+template <class Container> class reference;
+template <class Container> class pointer;
+template <class Container> class iterator;
+
+// const handle to a 4D array or view element
+template <class Container>
+class const_handle {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+protected:
+  // protected constructor
+  explicit const_handle(const container_type* container, size_t x, size_t y, size_t z, size_t w) : container(const_cast<container_type*>(container)), x(x), y(y), z(z), w(w) {}
+
+  // dereference handle
+  value_type get() const { return container->get(x, y, z, w); }
+
+  container_type* container; // container
+  size_t x, y, z, w;         // global element index
+};
+
+} // dim4
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/header.hpp b/include/zfp/internal/array/header.hpp
new file mode 100644
index 00000000..7d9146de
--- /dev/null
+++ b/include/zfp/internal/array/header.hpp
@@ -0,0 +1,41 @@
+// abstract base class for array header
+class header {
+public:
+  // default constructor
+  header() :
+    type(zfp_type_none),
+    nx(0), ny(0), nz(0), nw(0)
+  {}
+
+  // constructor
+  header(const zfp::array& a) :
+    type(a.type),
+    nx(a.nx), ny(a.ny), nz(a.nz), nw(a.nw)
+  {}
+
+  // destructor
+  virtual ~header() {}
+
+  // array scalar type
+  zfp_type scalar_type() const { return type; }
+
+  // array dimensionality
+  uint dimensionality() const { return nw ? 4 : nz ? 3 : ny ? 2 : nx ? 1 : 0; }
+
+  // array dimensions
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+  size_t size_w() const { return nw; }
+
+  // rate in bits per value
+  virtual double rate() const = 0;
+
+  // header payload: data pointer and byte size
+  virtual const void* data() const = 0;
+  virtual size_t size_bytes(uint mask = ZFP_DATA_HEADER) const = 0;
+
+protected:
+  zfp_type type;         // array scalar type
+  size_t nx, ny, nz, nw; // array dimensions
+};
diff --git a/include/zfp/internal/array/iterator1.hpp b/include/zfp/internal/array/iterator1.hpp
new file mode 100644
index 00000000..73d5197d
--- /dev/null
+++ b/include/zfp/internal/array/iterator1.hpp
@@ -0,0 +1,137 @@
+#ifndef ZFP_ITERATOR1_HPP
+#define ZFP_ITERATOR1_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim1 {
+
+// random access const iterator that visits 1D array or view block by block
+template <class Container>
+class const_iterator : public const_handle<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim1::reference<Container> reference;
+  typedef zfp::internal::dim1::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  typedef zfp::internal::dim1::const_reference<Container> const_reference;
+  typedef zfp::internal::dim1::const_pointer<Container> const_pointer;
+
+  // default constructor
+  const_iterator() : const_handle<Container>(0, 0) {}
+
+  // constructor
+  explicit const_iterator(const container_type* container, size_t x) : const_handle<Container>(container, x) {}
+
+  // dereference iterator
+  const_reference operator*() const { return const_reference(container, x); }
+  const_reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  const_iterator operator+(difference_type d) const { const_iterator it = *this; it.advance(d); return it; }
+  const_iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const const_iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const const_iterator& it) const { return container == it.container && x == it.x; }
+  bool operator!=(const const_iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const const_iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const const_iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const const_iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const const_iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  const_iterator& operator++() { increment(); return *this; }
+  const_iterator& operator--() { decrement(); return *this; }
+  const_iterator operator++(int) { const_iterator it = *this; increment(); return it; }
+  const_iterator operator--(int) { const_iterator it = *this; decrement(); return it; }
+  const_iterator operator+=(difference_type d) { advance(+d); return *this; }
+  const_iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+  // local container index of value referenced by iterator
+  size_t i() const { return x - container->min_x(); }
+
+protected:
+  // sequential offset associated with index x plus delta d
+  difference_type offset(difference_type d = 0) const { return static_cast<difference_type>(x - container->min_x() + size_t(d)); }
+
+  // index x associated with sequential offset p
+  void index(size_t& x, difference_type p) const { x = container->min_x() + size_t(p); }
+
+  // advance iterator by d
+  void advance(difference_type d) { index(x, offset(d)); }
+
+  // increment iterator to next element
+  void increment() { ++x; }
+
+  // decrement iterator to previous element
+  void decrement() { --x; }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+};
+
+// random access iterator that visits 1D array or view block by block
+template <class Container>
+class iterator : public const_iterator<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim1::reference<Container> reference;
+  typedef zfp::internal::dim1::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  // default constructor
+  iterator() : const_iterator<Container>(0, 0) {}
+
+  // constructor
+  explicit iterator(container_type* container, size_t i) : const_iterator<Container>(container, i) {}
+
+  // dereference iterator
+  reference operator*() const { return reference(container, x); }
+  reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  iterator operator+(difference_type d) const { iterator it = *this; it.advance(d); return it; }
+  iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const iterator& it) const { return container == it.container && x == it.x; }
+  bool operator!=(const iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  iterator& operator++() { increment(); return *this; }
+  iterator& operator--() { decrement(); return *this; }
+  iterator operator++(int) { iterator it = *this; increment(); return it; }
+  iterator operator--(int) { iterator it = *this; decrement(); return it; }
+  iterator operator+=(difference_type d) { advance(+d); return *this; }
+  iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+protected:
+  using const_iterator<Container>::offset;
+  using const_iterator<Container>::advance;
+  using const_iterator<Container>::increment;
+  using const_iterator<Container>::decrement;
+  using const_iterator<Container>::container;
+  using const_iterator<Container>::x;
+};
+
+} // dim1
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/iterator2.hpp b/include/zfp/internal/array/iterator2.hpp
new file mode 100644
index 00000000..433d1825
--- /dev/null
+++ b/include/zfp/internal/array/iterator2.hpp
@@ -0,0 +1,230 @@
+#ifndef ZFP_ITERATOR2_HPP
+#define ZFP_ITERATOR2_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim2 {
+
+// random access const iterator that visits 2D array or view block by block
+template <class Container>
+class const_iterator : public const_handle<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim2::reference<Container> reference;
+  typedef zfp::internal::dim2::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  typedef zfp::internal::dim2::const_reference<Container> const_reference;
+  typedef zfp::internal::dim2::const_pointer<Container> const_pointer;
+
+  // default constructor
+  const_iterator() : const_handle<Container>(0, 0, 0) {}
+
+  // constructor
+  explicit const_iterator(const container_type* container, size_t x, size_t y) : const_handle<Container>(container, x, y) {}
+
+  // dereference iterator
+  const_reference operator*() const { return const_reference(container, x, y); }
+  const_reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  const_iterator operator+(difference_type d) const { const_iterator it = *this; it.advance(d); return it; }
+  const_iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const const_iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const const_iterator& it) const { return container == it.container && x == it.x && y == it.y; }
+  bool operator!=(const const_iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const const_iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const const_iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const const_iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const const_iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  const_iterator& operator++() { increment(); return *this; }
+  const_iterator& operator--() { decrement(); return *this; }
+  const_iterator operator++(int) { const_iterator it = *this; increment(); return it; }
+  const_iterator operator--(int) { const_iterator it = *this; decrement(); return it; }
+  const_iterator operator+=(difference_type d) { advance(+d); return *this; }
+  const_iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+  // local container index of value referenced by iterator
+  size_t i() const { return x - container->min_x(); }
+  size_t j() const { return y - container->min_y(); }
+
+protected:
+  // sequential offset associated with index (x, y) plus delta d
+  difference_type offset(difference_type d = 0) const
+  {
+    difference_type p = d;
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t nx = xmax - xmin;
+    size_t ny = ymax - ymin;
+    if (y == ymax)
+      p += nx * ny;
+    else {
+      size_t m = ~size_t(3);
+      size_t by = std::max(y & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p += (by - ymin) * nx;
+      size_t bx = std::max(x & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p += (bx - xmin) * sy;
+      p += (y - by) * sx;
+      p += (x - bx);
+    }
+    return p;
+  }
+
+  // index (x, y) associated with sequential offset p
+  void index(size_t& x, size_t& y, difference_type p) const
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t nx = xmax - xmin;
+    size_t ny = ymax - ymin;
+    if (size_t(p) == nx * ny) {
+      x = xmin;
+      y = ymax;
+    }
+    else {
+      size_t m = ~size_t(3);
+      size_t by = std::max((ymin + size_t(p / ptrdiff_t(nx))) & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p -= (by - ymin) * nx;
+      size_t bx = std::max((xmin + size_t(p / ptrdiff_t(sy))) & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p -= (bx - xmin) * sy;
+      y = by + size_t(p / ptrdiff_t(sx)); p -= (y - by) * sx;
+      x = bx + size_t(p);                 p -= (x - bx);
+    }
+  }
+
+  // advance iterator by d
+  void advance(difference_type d) { index(x, y, offset(d)); }
+
+  // increment iterator to next element
+  void increment()
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t m = ~size_t(3);
+    ++x;
+    if (!(x & 3u) || x == xmax) {
+      x = std::max((x - 1) & m, xmin);
+      ++y;
+      if (!(y & 3u) || y == ymax) {
+        y = std::max((y - 1) & m, ymin);
+        // done with block; advance to next
+        x = (x + 4) & m;
+        if (x >= xmax) {
+          x = xmin;
+          y = (y + 4) & m;
+          if (y >= ymax)
+            y = ymax;
+        }
+      }
+    }
+  }
+
+  // decrement iterator to previous element
+  void decrement()
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t m = ~size_t(3);
+    if (y == ymax) {
+      x = xmax - 1;
+      y = ymax - 1;
+    }
+    else {
+      if (!(x & 3u) || x == xmin) {
+        x = std::min((x + 4) & m, xmax);
+        if (!(y & 3u) || y == ymin) {
+          y = std::min((y + 4) & m, ymax);
+          // done with block; advance to next
+          x = (x - 1) & m;
+          if (x <= xmin) {
+            x = xmax;
+            y = (y - 1) & m;
+            if (y <= ymin)
+              y = ymin;
+          }
+        }
+        --y;
+      }
+      --x;
+    }
+  }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+};
+
+// random access iterator that visits 2D array or view block by block
+template <class Container>
+class iterator : public const_iterator<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim2::reference<Container> reference;
+  typedef zfp::internal::dim2::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  // default constructor
+  iterator() : const_iterator<Container>(0, 0, 0) {}
+
+  // constructor
+  explicit iterator(container_type* container, size_t x, size_t y) : const_iterator<Container>(container, x, y) {}
+
+  // dereference iterator
+  reference operator*() const { return reference(container, x, y); }
+  reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  iterator operator+(difference_type d) const { iterator it = *this; it.advance(d); return it; }
+  iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const iterator& it) const { return container == it.container && x == it.x && y == it.y; }
+  bool operator!=(const iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  iterator& operator++() { increment(); return *this; }
+  iterator& operator--() { decrement(); return *this; }
+  iterator operator++(int) { iterator it = *this; increment(); return it; }
+  iterator operator--(int) { iterator it = *this; decrement(); return it; }
+  iterator operator+=(difference_type d) { advance(+d); return *this; }
+  iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+protected:
+  using const_iterator<Container>::offset;
+  using const_iterator<Container>::advance;
+  using const_iterator<Container>::increment;
+  using const_iterator<Container>::decrement;
+  using const_iterator<Container>::container;
+  using const_iterator<Container>::x;
+  using const_iterator<Container>::y;
+};
+
+} // dim2
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/iterator3.hpp b/include/zfp/internal/array/iterator3.hpp
new file mode 100644
index 00000000..aa46b5ff
--- /dev/null
+++ b/include/zfp/internal/array/iterator3.hpp
@@ -0,0 +1,265 @@
+#ifndef ZFP_ITERATOR3_HPP
+#define ZFP_ITERATOR3_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim3 {
+
+// random access const iterator that visits 3D array or view block by block
+template <class Container>
+class const_iterator : public const_handle<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim3::reference<Container> reference;
+  typedef zfp::internal::dim3::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  typedef zfp::internal::dim3::const_reference<Container> const_reference;
+  typedef zfp::internal::dim3::const_pointer<Container> const_pointer;
+
+  // default constructor
+  const_iterator() : const_handle<Container>(0, 0, 0, 0) {}
+
+  // constructor
+  explicit const_iterator(const container_type* container, size_t x, size_t y, size_t z) : const_handle<Container>(container, x, y, z) {}
+
+  // dereference iterator
+  const_reference operator*() const { return const_reference(container, x, y, z); }
+  const_reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  const_iterator operator+(difference_type d) const { const_iterator it = *this; it.advance(d); return it; }
+  const_iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const const_iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const const_iterator& it) const { return container == it.container && x == it.x && y == it.y && z == it.z; }
+  bool operator!=(const const_iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const const_iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const const_iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const const_iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const const_iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  const_iterator& operator++() { increment(); return *this; }
+  const_iterator& operator--() { decrement(); return *this; }
+  const_iterator operator++(int) { const_iterator it = *this; increment(); return it; }
+  const_iterator operator--(int) { const_iterator it = *this; decrement(); return it; }
+  const_iterator operator+=(difference_type d) { advance(+d); return *this; }
+  const_iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+  // local container index of value referenced by iterator
+  size_t i() const { return x - container->min_x(); }
+  size_t j() const { return y - container->min_y(); }
+  size_t k() const { return z - container->min_z(); }
+
+protected:
+  // sequential offset associated with index (x, y, z) plus delta d
+  difference_type offset(difference_type d = 0) const
+  {
+    difference_type p = d;
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t nx = xmax - xmin;
+    size_t ny = ymax - ymin;
+    size_t nz = zmax - zmin;
+    if (z == zmax)
+      p += nx * ny * nz;
+    else {
+      size_t m = ~size_t(3);
+      size_t bz = std::max(z & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p += (bz - zmin) * nx * ny;
+      size_t by = std::max(y & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p += (by - ymin) * nx * sz;
+      size_t bx = std::max(x & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p += (bx - xmin) * sy * sz;
+      p += (z - bz) * sx * sy;
+      p += (y - by) * sx;
+      p += (x - bx);
+    }
+    return p;
+  }
+
+  // index (x, y, z) associated with sequential offset p
+  void index(size_t& x, size_t& y, size_t& z, difference_type p) const
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t nx = xmax - xmin;
+    size_t ny = ymax - ymin;
+    size_t nz = zmax - zmin;
+    if (size_t(p) == nx * ny * nz) {
+      x = xmin;
+      y = ymin;
+      z = zmax;
+    }
+    else {
+      size_t m = ~size_t(3);
+      size_t bz = std::max((zmin + size_t(p / ptrdiff_t(nx * ny))) & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p -= (bz - zmin) * nx * ny;
+      size_t by = std::max((ymin + size_t(p / ptrdiff_t(nx * sz))) & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p -= (by - ymin) * nx * sz;
+      size_t bx = std::max((xmin + size_t(p / ptrdiff_t(sy * sz))) & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p -= (bx - xmin) * sy * sz;
+      z = bz + size_t(p / ptrdiff_t(sx * sy)); p -= (z - bz) * sx * sy;
+      y = by + size_t(p / ptrdiff_t(sx));      p -= (y - by) * sx;
+      x = bx + size_t(p);                      p -= (x - bx);
+    }
+  }
+
+  // advance iterator by d
+  void advance(difference_type d) { index(x, y, z, offset(d)); }
+
+  // increment iterator to next element
+  void increment()
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t m = ~size_t(3);
+    ++x;
+    if (!(x & 3u) || x == xmax) {
+      x = std::max((x - 1) & m, xmin);
+      ++y;
+      if (!(y & 3u) || y == ymax) {
+        y = std::max((y - 1) & m, ymin);
+        ++z;
+        if (!(z & 3u) || z == zmax) {
+          z = std::max((z - 1) & m, zmin);
+          // done with block; advance to next
+          x = (x + 4) & m;
+          if (x >= xmax) {
+            x = xmin;
+            y = (y + 4) & m;
+            if (y >= ymax) {
+              y = ymin;
+              z = (z + 4) & m;
+              if (z >= zmax)
+                z = zmax;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // decrement iterator to previous element
+  void decrement()
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t m = ~size_t(3);
+    if (z == zmax) {
+      x = xmax - 1;
+      y = ymax - 1;
+      z = zmax - 1;
+    }
+    else {
+      if (!(x & 3u) || x == xmin) {
+        x = std::min((x + 4) & m, xmax);
+        if (!(y & 3u) || y == ymin) {
+          y = std::min((y + 4) & m, ymax);
+          if (!(z & 3u) || z == zmin) {
+            z = std::min((z + 4) & m, zmax);
+            // done with block; advance to next
+            x = (x - 1) & m;
+            if (x <= xmin) {
+              x = xmax;
+              y = (y - 1) & m;
+              if (y <= ymin) {
+                y = ymax;
+                z = (z - 1) & m;
+                if (z <= zmin)
+                  z = zmin;
+              }
+            }
+          }
+          --z;
+        }
+        --y;
+      }
+      --x;
+    }
+  }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+  using const_handle<Container>::z;
+};
+
+// random access iterator that visits 3D array or view block by block
+template <class Container>
+class iterator : public const_iterator<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim3::reference<Container> reference;
+  typedef zfp::internal::dim3::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  // default constructor
+  iterator() : const_iterator<Container>(0, 0, 0, 0) {}
+
+  // constructor
+  explicit iterator(container_type* container, size_t x, size_t y, size_t z) : const_iterator<Container>(container, x, y, z) {}
+
+  // dereference iterator
+  reference operator*() const { return reference(container, x, y, z); }
+  reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  iterator operator+(difference_type d) const { iterator it = *this; it.advance(d); return it; }
+  iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const iterator& it) const { return container == it.container && x == it.x && y == it.y && z == it.z; }
+  bool operator!=(const iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  iterator& operator++() { increment(); return *this; }
+  iterator& operator--() { decrement(); return *this; }
+  iterator operator++(int) { iterator it = *this; increment(); return it; }
+  iterator operator--(int) { iterator it = *this; decrement(); return it; }
+  iterator operator+=(difference_type d) { advance(+d); return *this; }
+  iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+protected:
+  using const_iterator<Container>::offset;
+  using const_iterator<Container>::advance;
+  using const_iterator<Container>::increment;
+  using const_iterator<Container>::decrement;
+  using const_iterator<Container>::container;
+  using const_iterator<Container>::x;
+  using const_iterator<Container>::y;
+  using const_iterator<Container>::z;
+};
+
+} // dim3
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/iterator4.hpp b/include/zfp/internal/array/iterator4.hpp
new file mode 100644
index 00000000..00b941a5
--- /dev/null
+++ b/include/zfp/internal/array/iterator4.hpp
@@ -0,0 +1,300 @@
+#ifndef ZFP_ITERATOR4_HPP
+#define ZFP_ITERATOR4_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim4 {
+
+// random access const iterator that visits 4D array or view block by block
+template <class Container>
+class const_iterator : public const_handle<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim4::reference<Container> reference;
+  typedef zfp::internal::dim4::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  typedef zfp::internal::dim4::const_reference<Container> const_reference;
+  typedef zfp::internal::dim4::const_pointer<Container> const_pointer;
+
+  // default constructor
+  const_iterator() : const_handle<Container>(0, 0, 0, 0, 0) {}
+
+  // constructor
+  explicit const_iterator(const container_type* container, size_t x, size_t y, size_t z, size_t w) : const_handle<Container>(container, x, y, z, w) {}
+
+  // dereference iterator
+  const_reference operator*() const { return const_reference(container, x, y, z, w); }
+  const_reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  const_iterator operator+(difference_type d) const { const_iterator it = *this; it.advance(d); return it; }
+  const_iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const const_iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const const_iterator& it) const { return container == it.container && x == it.x && y == it.y && z == it.z && w == it.w; }
+  bool operator!=(const const_iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const const_iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const const_iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const const_iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const const_iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  const_iterator& operator++() { increment(); return *this; }
+  const_iterator& operator--() { decrement(); return *this; }
+  const_iterator operator++(int) { const_iterator it = *this; increment(); return it; }
+  const_iterator operator--(int) { const_iterator it = *this; decrement(); return it; }
+  const_iterator operator+=(difference_type d) { advance(+d); return *this; }
+  const_iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+  // local container index of value referenced by iterator
+  size_t i() const { return x - container->min_x(); }
+  size_t j() const { return y - container->min_y(); }
+  size_t k() const { return z - container->min_z(); }
+  size_t l() const { return w - container->min_w(); }
+
+protected:
+  // sequential offset associated with index (x, y, z, w) plus delta d
+  difference_type offset(difference_type d = 0) const
+  {
+    difference_type p = d;
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t wmin = container->min_w();
+    size_t wmax = container->max_w();
+    size_t nx = xmax - xmin;
+    size_t ny = ymax - ymin;
+    size_t nz = zmax - zmin;
+    size_t nw = wmax - wmin;
+    if (w == wmax)
+      p += nx * ny * nz * nw;
+    else {
+      size_t m = ~size_t(3);
+      size_t bw = std::max(w & m, wmin); size_t sw = std::min((bw + 4) & m, wmax) - bw; p += (bw - wmin) * nx * ny * nz;
+      size_t bz = std::max(z & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p += (bz - zmin) * nx * ny * sw;
+      size_t by = std::max(y & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p += (by - ymin) * nx * sz * sw;
+      size_t bx = std::max(x & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p += (bx - xmin) * sy * sz * sw;
+      p += (w - bw) * sx * sy * sz;
+      p += (z - bz) * sx * sy;
+      p += (y - by) * sx;
+      p += (x - bx);
+    }
+    return p;
+  }
+
+  // index (x, y, z, w) associated with sequential offset p
+  void index(size_t& x, size_t& y, size_t& z, size_t& w, difference_type p) const
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t wmin = container->min_w();
+    size_t wmax = container->max_w();
+    size_t nx = xmax - xmin;
+    size_t ny = ymax - ymin;
+    size_t nz = zmax - zmin;
+    size_t nw = wmax - wmin;
+    if (size_t(p) == nx * ny * nz * nw) {
+      x = xmin;
+      y = ymin;
+      z = zmin;
+      w = wmax;
+    }
+    else {
+      size_t m = ~size_t(3);
+      size_t bw = std::max((wmin + size_t(p / ptrdiff_t(nx * ny * nz))) & m, wmin); size_t sw = std::min((bw + 4) & m, wmax) - bw; p -= (bw - wmin) * nx * ny * nz;
+      size_t bz = std::max((zmin + size_t(p / ptrdiff_t(nx * ny * sw))) & m, zmin); size_t sz = std::min((bz + 4) & m, zmax) - bz; p -= (bz - zmin) * nx * ny * sw;
+      size_t by = std::max((ymin + size_t(p / ptrdiff_t(nx * sz * sw))) & m, ymin); size_t sy = std::min((by + 4) & m, ymax) - by; p -= (by - ymin) * nx * sz * sw;
+      size_t bx = std::max((xmin + size_t(p / ptrdiff_t(sy * sz * sw))) & m, xmin); size_t sx = std::min((bx + 4) & m, xmax) - bx; p -= (bx - xmin) * sy * sz * sw;
+      w = bw + size_t(p / ptrdiff_t(sx * sy * sz)); p -= (w - bw) * sx * sy * sz;
+      z = bz + size_t(p / ptrdiff_t(sx * sy));      p -= (z - bz) * sx * sy;
+      y = by + size_t(p / ptrdiff_t(sx));           p -= (y - by) * sx;
+      x = bx + size_t(p);                           p -= (x - bx);
+    }
+  }
+
+  // advance iterator by d
+  void advance(difference_type d) { index(x, y, z, w, offset(d)); }
+
+  // increment iterator to next element
+  void increment()
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t wmin = container->min_w();
+    size_t wmax = container->max_w();
+    size_t m = ~size_t(3);
+    ++x;
+    if (!(x & 3u) || x == xmax) {
+      x = std::max((x - 1) & m, xmin);
+      ++y;
+      if (!(y & 3u) || y == ymax) {
+        y = std::max((y - 1) & m, ymin);
+        ++z;
+        if (!(z & 3u) || z == zmax) {
+          z = std::max((z - 1) & m, zmin);
+          ++w;
+          if (!(w & 3u) || w == wmax) {
+            w = std::max((w - 1) & m, wmin);
+            // done with block; advance to next
+            x = (x + 4) & m;
+            if (x >= xmax) {
+              x = xmin;
+              y = (y + 4) & m;
+              if (y >= ymax) {
+                y = ymin;
+                z = (z + 4) & m;
+                if (z >= zmax) {
+                  z = zmin;
+                  w = (w + 4) & m;
+                  if (w >= wmax)
+                    w = wmax;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // decrement iterator to previous element
+  void decrement()
+  {
+    size_t xmin = container->min_x();
+    size_t xmax = container->max_x();
+    size_t ymin = container->min_y();
+    size_t ymax = container->max_y();
+    size_t zmin = container->min_z();
+    size_t zmax = container->max_z();
+    size_t wmin = container->min_w();
+    size_t wmax = container->max_w();
+    size_t m = ~size_t(3);
+    if (w == wmax) {
+      x = xmax - 1;
+      y = ymax - 1;
+      z = zmax - 1;
+      w = wmax - 1;
+    }
+    else {
+      if (!(x & 3u) || x == xmin) {
+        x = std::min((x + 4) & m, xmax);
+        if (!(y & 3u) || y == ymin) {
+          y = std::min((y + 4) & m, ymax);
+          if (!(z & 3u) || z == zmin) {
+            z = std::min((z + 4) & m, zmax);
+            if (!(w & 3u) || w == wmin) {
+              w = std::min((w + 4) & m, wmax);
+              // done with block; advance to next
+              x = (x - 1) & m;
+              if (x <= xmin) {
+                x = xmax;
+                y = (y - 1) & m;
+                if (y <= ymin) {
+                  y = ymax;
+                  z = (z - 1) & m;
+                  if (z <= zmin) {
+                    z = zmax;
+                    w = (w - 1) & m;
+                    if (w <= wmin)
+                      w = wmin;
+                  }
+                }
+              }
+            }
+            --w;
+          }
+          --z;
+        }
+        --y;
+      }
+      --x;
+    }
+  }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+  using const_handle<Container>::z;
+  using const_handle<Container>::w;
+};
+
+// random access iterator that visits 4D array or view block by block
+template <class Container>
+class iterator : public const_iterator<Container> {
+public:
+  // typedefs for STL compatibility
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef ptrdiff_t difference_type;
+  typedef zfp::internal::dim4::reference<Container> reference;
+  typedef zfp::internal::dim4::pointer<Container> pointer;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  // default constructor
+  iterator() : const_iterator<Container>(0, 0, 0, 0, 0) {}
+
+  // constructor
+  explicit iterator(container_type* container, size_t x, size_t y, size_t z, size_t w) : const_iterator<Container>(container, x, y, z, w) {}
+
+  // dereference iterator
+  reference operator*() const { return reference(container, x, y, z, w); }
+  reference operator[](difference_type d) const { return *operator+(d); }
+
+  // iterator arithmetic
+  iterator operator+(difference_type d) const { iterator it = *this; it.advance(d); return it; }
+  iterator operator-(difference_type d) const { return operator+(-d); }
+  difference_type operator-(const iterator& it) const { return offset() - it.offset(); }
+
+  // equality operators
+  bool operator==(const iterator& it) const { return container == it.container && x == it.x && y == it.y && z == it.z && w == it.w; }
+  bool operator!=(const iterator& it) const { return !operator==(it); }
+
+  // relational operators
+  bool operator<=(const iterator& it) const { return container == it.container && offset() <= it.offset(); }
+  bool operator>=(const iterator& it) const { return container == it.container && offset() >= it.offset(); }
+  bool operator<(const iterator& it) const { return container == it.container && offset() < it.offset(); }
+  bool operator>(const iterator& it) const { return container == it.container && offset() > it.offset(); }
+
+  // increment and decrement
+  iterator& operator++() { increment(); return *this; }
+  iterator& operator--() { decrement(); return *this; }
+  iterator operator++(int) { iterator it = *this; increment(); return it; }
+  iterator operator--(int) { iterator it = *this; decrement(); return it; }
+  iterator operator+=(difference_type d) { advance(+d); return *this; }
+  iterator operator-=(difference_type d) { advance(-d); return *this; }
+
+protected:
+  using const_iterator<Container>::offset;
+  using const_iterator<Container>::advance;
+  using const_iterator<Container>::increment;
+  using const_iterator<Container>::decrement;
+  using const_iterator<Container>::container;
+  using const_iterator<Container>::x;
+  using const_iterator<Container>::y;
+  using const_iterator<Container>::z;
+  using const_iterator<Container>::w;
+};
+
+} // dim4
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/memory.hpp b/include/zfp/internal/array/memory.hpp
new file mode 100644
index 00000000..b6e7b9f6
--- /dev/null
+++ b/include/zfp/internal/array/memory.hpp
@@ -0,0 +1,200 @@
+#ifndef ZFP_MEMORY_HPP
+#define ZFP_MEMORY_HPP
+
+// Memory management for POD types only.  Templated functions are provided only
+// to avoid the need for casts to/from void* in pass-by-reference calls.
+
+#ifdef _WIN32
+extern "C" {
+  #ifdef __MINGW32__
+    #include <x86intrin.h>
+  #endif
+
+  #include <malloc.h>
+}
+#endif
+
+#include <algorithm>
+#include <cstdlib>
+#include <stdexcept>
+
+// byte alignment of compressed data
+#ifndef ZFP_MEMORY_ALIGNMENT
+  #define ZFP_MEMORY_ALIGNMENT 0x100u
+#endif
+
+#define unused_(x) ((void)(x))
+
+namespace zfp {
+namespace internal {
+
+// allocate size bytes
+inline void*
+allocate(size_t size)
+{
+  void* ptr = std::malloc(size);
+  if (!ptr)
+    throw std::bad_alloc();
+  return ptr;
+}
+
+// allocate size bytes with suggested alignment
+inline void*
+allocate_aligned(size_t size, size_t alignment)
+{
+  void* ptr = 0;
+
+#ifdef ZFP_WITH_ALIGNED_ALLOC
+  #if defined(__INTEL_COMPILER)
+    ptr = _mm_malloc(size, alignment);
+  #elif defined(__MINGW32__)
+    // require: alignment is an integer power of two
+    ptr = __mingw_aligned_malloc(size, alignment);
+  #elif defined(_WIN32)
+    // require: alignment is an integer power of two
+    ptr = _aligned_malloc(size, alignment);
+  #elif defined(__MACH__) || (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600)
+    // require: alignment is an integer power of two >= sizeof(void*)
+    posix_memalign(&ptr, alignment, size);
+  #else
+    // aligned allocation not supported; fall back on unaligned allocation
+    unused_(alignment);
+    ptr = allocate(size);
+  #endif
+#else
+  // aligned allocation not enabled; use unaligned allocation
+  unused_(alignment);
+  ptr = allocate(size);
+#endif
+
+  if (!ptr)
+    throw std::bad_alloc();
+
+  return ptr;
+}
+
+// deallocate memory pointed to by ptr
+inline void
+deallocate(void* ptr)
+{
+  std::free(ptr);
+}
+
+// deallocate aligned memory pointed to by ptr
+inline void
+deallocate_aligned(void* ptr)
+{
+  if (!ptr)
+    return;
+#ifdef ZFP_WITH_ALIGNED_ALLOC
+  #ifdef __INTEL_COMPILER
+    _mm_free(ptr);
+  #elif defined(__MINGW32__)
+    __mingw_aligned_free(ptr);
+  #elif defined(_WIN32)
+    _aligned_free(ptr);
+  #else
+    std::free(ptr);
+  #endif
+#else
+  std::free(ptr);
+#endif
+}
+
+// reallocate buffer to size bytes
+template <typename T>
+inline void
+reallocate(T*& ptr, size_t size, bool preserve = false)
+{
+  if (preserve)
+    ptr = static_cast<T*>(std::realloc(ptr, size));
+  else {
+    zfp::internal::deallocate(ptr);
+    ptr = static_cast<T*>(zfp::internal::allocate(size));
+  }
+}
+
+// reallocate buffer to new_size bytes with suggested alignment
+template <typename T>
+inline void
+reallocate_aligned(T*& ptr, size_t new_size, size_t alignment, size_t old_size = 0)
+{
+  void* p = ptr;
+  reallocate_aligned(p, new_size, alignment, old_size);
+  ptr = static_cast<T*>(p);
+}
+
+// untyped reallocate buffer to new_size bytes with suggested alignment
+template <>
+inline void
+reallocate_aligned(void*& ptr, size_t new_size, size_t alignment, size_t old_size)
+{
+  if (old_size) {
+    // reallocate while preserving contents
+    void* dst = zfp::internal::allocate_aligned(new_size, alignment);
+    std::memcpy(dst, ptr, std::min(old_size, new_size));
+    zfp::internal::deallocate_aligned(ptr);
+    ptr = dst;
+  }
+  else {
+    // reallocate without preserving contents
+    zfp::internal::deallocate_aligned(ptr);
+    ptr = zfp::internal::allocate_aligned(new_size, alignment);
+  }
+}
+
+// clone array 'T src[count]' to dst
+template <typename T>
+inline void
+clone(T*& dst, const T* src, size_t count)
+{
+  zfp::internal::deallocate(dst);
+  if (src) {
+    dst = static_cast<T*>(zfp::internal::allocate(count * sizeof(T)));
+    std::copy(src, src + count, dst);
+  }
+  else
+    dst = 0;
+}
+
+// clone array 'T src[count]' to dst with suggested alignment
+template <typename T>
+inline void
+clone_aligned(T*& dst, const T* src, size_t count, size_t alignment)
+{
+  void* d = dst;
+  const void* s = src;
+  clone_aligned(d, s, count * sizeof(T), alignment);
+  dst = static_cast<T*>(d);
+  src = static_cast<const T*>(s);
+}
+
+// untyped, aligned clone of size bytes
+template <>
+inline void
+clone_aligned(void*& dst, const void* src, size_t size, size_t alignment)
+{
+  zfp::internal::deallocate_aligned(dst);
+  if (src) {
+    dst = zfp::internal::allocate_aligned(size, alignment);
+    std::memcpy(dst, src, size);
+  }
+  else
+    dst = 0;
+}
+
+// return smallest multiple of unit greater than or equal to size
+inline size_t
+round_up(size_t size, size_t unit)
+{
+  size += unit - 1;
+  size -= size % unit;
+  return size;
+}
+
+}
+}
+
+#undef unused_
+
+#endif
diff --git a/include/zfp/internal/array/pointer1.hpp b/include/zfp/internal/array/pointer1.hpp
new file mode 100644
index 00000000..37876c69
--- /dev/null
+++ b/include/zfp/internal/array/pointer1.hpp
@@ -0,0 +1,118 @@
+#ifndef ZFP_POINTER1_HPP
+#define ZFP_POINTER1_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim1 {
+
+// const pointer to a 1D array or view element
+template <class Container>
+class const_pointer : public const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  const_pointer() : const_handle<Container>(0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  const_pointer(std::nullptr_t) : const_handle<Container>(0, 0) {}
+#endif
+
+  // constructor
+  explicit const_pointer(const container_type* container, size_t x) : const_handle<Container>(container, x) {}
+
+  // dereference pointer
+  const_reference<Container> operator*() const { return const_reference<Container>(container, x); }
+  const_reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  const_pointer operator+(ptrdiff_t d) const { const_pointer p = *this; p.advance(d); return p; }
+  const_pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const const_pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const const_pointer& p) const { return container == p.container && x == p.x; }
+  bool operator!=(const const_pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const const_pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const const_pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const const_pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const const_pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  const_pointer& operator++() { increment(); return *this; }
+  const_pointer& operator--() { decrement(); return *this; }
+  const_pointer operator++(int) { const_pointer p = *this; increment(); return p; }
+  const_pointer operator--(int) { const_pointer p = *this; decrement(); return p; }
+  const_pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  const_pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  ptrdiff_t offset(ptrdiff_t d = 0) const { return static_cast<ptrdiff_t>(x - container->min_x()) + d; }
+  void index(size_t& x, ptrdiff_t p) const { x = container->min_x() + size_t(p); }
+  void advance(ptrdiff_t d) { index(x, offset(d)); }
+  void increment() { ++x; }
+  void decrement() { --x; }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+};
+
+// pointer to a 1D array or view element
+template <class Container>
+class pointer : public const_pointer<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  pointer() : const_pointer<Container>(0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  pointer(std::nullptr_t) : const_pointer<Container>(0, 0) {}
+#endif
+
+  // constructor
+  explicit pointer(container_type* container, size_t x) : const_pointer<Container>(container, x) {}
+
+  // dereference pointer
+  reference<Container> operator*() const { return reference<Container>(container, x); }
+  reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  pointer operator+(ptrdiff_t d) const { pointer p = *this; p.advance(d); return p; }
+  pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const pointer& p) const { return container == p.container && x == p.x; }
+  bool operator!=(const pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  pointer& operator++() { increment(); return *this; }
+  pointer& operator--() { decrement(); return *this; }
+  pointer operator++(int) { pointer p = *this; increment(); return p; }
+  pointer operator--(int) { pointer p = *this; decrement(); return p; }
+  pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  using const_pointer<Container>::offset;
+  using const_pointer<Container>::advance;
+  using const_pointer<Container>::increment;
+  using const_pointer<Container>::decrement;
+  using const_pointer<Container>::container;
+  using const_pointer<Container>::x;
+};
+
+} // dim1
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/pointer2.hpp b/include/zfp/internal/array/pointer2.hpp
new file mode 100644
index 00000000..a074be98
--- /dev/null
+++ b/include/zfp/internal/array/pointer2.hpp
@@ -0,0 +1,136 @@
+#ifndef ZFP_POINTER2_HPP
+#define ZFP_POINTER2_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim2 {
+
+// const pointer to a 2D array or view element
+template <class Container>
+class const_pointer : public const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  const_pointer() : const_handle<Container>(0, 0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  const_pointer(std::nullptr_t) : const_handle<Container>(0, 0, 0) {}
+#endif
+
+  // constructor
+  explicit const_pointer(const container_type* container, size_t x, size_t y) : const_handle<Container>(container, x, y) {}
+
+  // dereference pointer
+  const_reference<Container> operator*() const { return const_reference<Container>(container, x, y); }
+  const_reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  const_pointer operator+(ptrdiff_t d) const { const_pointer p = *this; p.advance(d); return p; }
+  const_pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const const_pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const const_pointer& p) const { return container == p.container && x == p.x && y == p.y; }
+  bool operator!=(const const_pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const const_pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const const_pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const const_pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const const_pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  const_pointer& operator++() { increment(); return *this; }
+  const_pointer& operator--() { decrement(); return *this; }
+  const_pointer operator++(int) { const_pointer p = *this; increment(); return p; }
+  const_pointer operator--(int) { const_pointer p = *this; decrement(); return p; }
+  const_pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  const_pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  ptrdiff_t offset(ptrdiff_t d = 0) const { return static_cast<ptrdiff_t>(x - container->min_x() + container->size_x() * (y - container->min_y())) + d; }
+  void index(size_t& x, size_t& y, ptrdiff_t p) const
+  {
+    x = container->min_x() + size_t(p % ptrdiff_t(container->size_x())); p /= container->size_x();
+    y = container->min_y() + size_t(p);
+  }
+  void advance(ptrdiff_t d) { index(x, y, offset(d)); }
+  void increment()
+  {
+    if (++x == container->max_x()) {
+      x = container->min_x();
+      ++y;
+    }
+  }
+  void decrement()
+  {
+    if (x-- == container->min_x()) {
+      x += container->size_x();
+      --y;
+    }
+  }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+};
+
+// pointer to a 2D array or view element
+template <class Container>
+class pointer : public const_pointer<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  pointer() : const_pointer<Container>(0, 0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  pointer(std::nullptr_t) : const_pointer<Container>(0, 0, 0) {}
+#endif
+
+  // constructor
+  explicit pointer(container_type* container, size_t x, size_t y) : const_pointer<Container>(container, x, y) {}
+
+  // dereference pointer
+  reference<Container> operator*() const { return reference<Container>(container, x, y); }
+  reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  pointer operator+(ptrdiff_t d) const { pointer p = *this; p.advance(d); return p; }
+  pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const pointer& p) const { return container == p.container && x == p.x && y == p.y; }
+  bool operator!=(const pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  pointer& operator++() { increment(); return *this; }
+  pointer& operator--() { decrement(); return *this; }
+  pointer operator++(int) { pointer p = *this; increment(); return p; }
+  pointer operator--(int) { pointer p = *this; decrement(); return p; }
+  pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  using const_pointer<Container>::offset;
+  using const_pointer<Container>::advance;
+  using const_pointer<Container>::increment;
+  using const_pointer<Container>::decrement;
+  using const_pointer<Container>::container;
+  using const_pointer<Container>::x;
+  using const_pointer<Container>::y;
+};
+
+} // dim2
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/pointer3.hpp b/include/zfp/internal/array/pointer3.hpp
new file mode 100644
index 00000000..8f8dee61
--- /dev/null
+++ b/include/zfp/internal/array/pointer3.hpp
@@ -0,0 +1,145 @@
+#ifndef ZFP_POINTER3_HPP
+#define ZFP_POINTER3_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim3 {
+
+// const pointer to a 3D array or view element
+template <class Container>
+class const_pointer : public const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  const_pointer() : const_handle<Container>(0, 0, 0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  const_pointer(std::nullptr_t) : const_handle<Container>(0, 0, 0, 0) {}
+#endif
+
+  // constructor
+  explicit const_pointer(const container_type* container, size_t x, size_t y, size_t z) : const_handle<Container>(container, x, y, z) {}
+
+  // dereference pointer
+  const_reference<Container> operator*() const { return const_reference<Container>(container, x, y, z); }
+  const_reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  const_pointer operator+(ptrdiff_t d) const { const_pointer p = *this; p.advance(d); return p; }
+  const_pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const const_pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const const_pointer& p) const { return container == p.container && x == p.x && y == p.y && z == p.z; }
+  bool operator!=(const const_pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const const_pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const const_pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const const_pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const const_pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  const_pointer& operator++() { increment(); return *this; }
+  const_pointer& operator--() { decrement(); return *this; }
+  const_pointer operator++(int) { const_pointer p = *this; increment(); return p; }
+  const_pointer operator--(int) { const_pointer p = *this; decrement(); return p; }
+  const_pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  const_pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  ptrdiff_t offset(ptrdiff_t d = 0) const { return static_cast<ptrdiff_t>(x - container->min_x() + container->size_x() * (y - container->min_y() + container->size_y() * (z - container->min_z()))) + d; }
+  void index(size_t& x, size_t& y, size_t& z, ptrdiff_t p) const
+  {
+    x = container->min_x() + size_t(p % ptrdiff_t(container->size_x())); p /= container->size_x();
+    y = container->min_y() + size_t(p % ptrdiff_t(container->size_y())); p /= container->size_y();
+    z = container->min_z() + size_t(p);
+  }
+  void advance(ptrdiff_t d) { index(x, y, z, offset(d)); }
+  void increment()
+  {
+    if (++x == container->max_x()) {
+      x = container->min_x();
+      if (++y == container->max_y()) {
+        y = container->min_y();
+        ++z;
+      }
+    }
+  }
+  void decrement()
+  {
+    if (x-- == container->min_x()) {
+      x += container->size_x();
+      if (y-- == container->min_y()) {
+        y += container->size_y();
+        --z;
+      }
+    }
+  }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+  using const_handle<Container>::z;
+};
+
+// pointer to a 3D array or view element
+template <class Container>
+class pointer : public const_pointer<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  pointer() : const_pointer<Container>(0, 0, 0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  pointer(std::nullptr_t) : const_pointer<Container>(0, 0, 0, 0) {}
+#endif
+
+  // constructor
+  explicit pointer(container_type* container, size_t x, size_t y, size_t z) : const_pointer<Container>(container, x, y, z) {}
+
+  // dereference pointer
+  reference<Container> operator*() const { return reference<Container>(container, x, y, z); }
+  reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  pointer operator+(ptrdiff_t d) const { pointer p = *this; p.advance(d); return p; }
+  pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const pointer& p) const { return container == p.container && x == p.x && y == p.y && z == p.z; }
+  bool operator!=(const pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  pointer& operator++() { increment(); return *this; }
+  pointer& operator--() { decrement(); return *this; }
+  pointer operator++(int) { pointer p = *this; increment(); return p; }
+  pointer operator--(int) { pointer p = *this; decrement(); return p; }
+  pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  using const_pointer<Container>::offset;
+  using const_pointer<Container>::advance;
+  using const_pointer<Container>::increment;
+  using const_pointer<Container>::decrement;
+  using const_pointer<Container>::container;
+  using const_pointer<Container>::x;
+  using const_pointer<Container>::y;
+  using const_pointer<Container>::z;
+};
+
+} // dim3
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/pointer4.hpp b/include/zfp/internal/array/pointer4.hpp
new file mode 100644
index 00000000..8adb97f3
--- /dev/null
+++ b/include/zfp/internal/array/pointer4.hpp
@@ -0,0 +1,154 @@
+#ifndef ZFP_POINTER4_HPP
+#define ZFP_POINTER4_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim4 {
+
+// const pointer to a 4D array or view element
+template <class Container>
+class const_pointer : public const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  const_pointer() : const_handle<Container>(0, 0, 0, 0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  const_pointer(std::nullptr_t) : const_handle<Container>(0, 0, 0, 0, 0) {}
+#endif
+
+  // constructor
+  explicit const_pointer(const container_type* container, size_t x, size_t y, size_t z, size_t w) : const_handle<Container>(container, x, y, z, w) {}
+
+  // dereference pointer
+  const_reference<Container> operator*() const { return const_reference<Container>(container, x, y, z, w); }
+  const_reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  const_pointer operator+(ptrdiff_t d) const { const_pointer p = *this; p.advance(d); return p; }
+  const_pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const const_pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const const_pointer& p) const { return container == p.container && x == p.x && y == p.y && z == p.z && w == p.w; }
+  bool operator!=(const const_pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const const_pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const const_pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const const_pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const const_pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  const_pointer& operator++() { increment(); return *this; }
+  const_pointer& operator--() { decrement(); return *this; }
+  const_pointer operator++(int) { const_pointer p = *this; increment(); return p; }
+  const_pointer operator--(int) { const_pointer p = *this; decrement(); return p; }
+  const_pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  const_pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  ptrdiff_t offset(ptrdiff_t d = 0) const { return static_cast<ptrdiff_t>(x - container->min_x() + container->size_x() * (y - container->min_y() + container->size_y() * (z - container->min_z() + container->size_z() * (w - container->min_w())))) + d; }
+  void index(size_t& x, size_t& y, size_t& z, size_t & w, ptrdiff_t p) const
+  {
+    x = container->min_x() + size_t(p % ptrdiff_t(container->size_x())); p /= container->size_x();
+    y = container->min_y() + size_t(p % ptrdiff_t(container->size_y())); p /= container->size_y();
+    z = container->min_z() + size_t(p % ptrdiff_t(container->size_z())); p /= container->size_z();
+    w = container->min_w() + size_t(p);
+  }
+  void advance(ptrdiff_t d) { index(x, y, z, w, offset(d)); }
+  void increment()
+  {
+    if (++x == container->max_x()) {
+      x = container->min_x();
+      if (++y == container->max_y()) {
+        y = container->min_y();
+        if (++z == container->max_z()) {
+          z = container->min_z();
+          ++w;
+        }
+      }
+    }
+  }
+  void decrement()
+  {
+    if (x-- == container->min_x()) {
+      x += container->size_x();
+      if (y-- == container->min_y()) {
+        y += container->size_y();
+        if (z-- == container->min_z()) {
+          z += container->size_z();
+          --w;
+        }
+      }
+    }
+  }
+
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+  using const_handle<Container>::z;
+  using const_handle<Container>::w;
+};
+
+// pointer to a 4D array or view element
+template <class Container>
+class pointer : public const_pointer<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // default constructor
+  pointer() : const_pointer<Container>(0, 0, 0, 0, 0) {}
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  pointer(std::nullptr_t) : const_pointer<Container>(0, 0, 0, 0, 0) {}
+#endif
+
+  // constructor
+  explicit pointer(container_type* container, size_t x, size_t y, size_t z, size_t w) : const_pointer<Container>(container, x, y, z, w) {}
+
+  // dereference pointer
+  reference<Container> operator*() const { return reference<Container>(container, x, y, z, w); }
+  reference<Container> operator[](ptrdiff_t d) const { return *operator+(d); }
+
+  // pointer arithmetic
+  pointer operator+(ptrdiff_t d) const { pointer p = *this; p.advance(d); return p; }
+  pointer operator-(ptrdiff_t d) const { return operator+(-d); }
+  ptrdiff_t operator-(const pointer& p) const { return offset() - p.offset(); }
+
+  // equality operators
+  bool operator==(const pointer& p) const { return container == p.container && x == p.x && y == p.y && z == p.z && w == p.w; }
+  bool operator!=(const pointer& p) const { return !operator==(p); }
+
+  // relational operators
+  bool operator<=(const pointer& p) const { return container == p.container && offset() <= p.offset(); }
+  bool operator>=(const pointer& p) const { return container == p.container && offset() >= p.offset(); }
+  bool operator<(const pointer& p) const { return container == p.container && offset() < p.offset(); }
+  bool operator>(const pointer& p) const { return container == p.container && offset() > p.offset(); }
+
+  // increment and decrement
+  pointer& operator++() { increment(); return *this; }
+  pointer& operator--() { decrement(); return *this; }
+  pointer operator++(int) { pointer p = *this; increment(); return p; }
+  pointer operator--(int) { pointer p = *this; decrement(); return p; }
+  pointer operator+=(ptrdiff_t d) { advance(+d); return *this; }
+  pointer operator-=(ptrdiff_t d) { advance(-d); return *this; }
+
+protected:
+  using const_pointer<Container>::offset;
+  using const_pointer<Container>::advance;
+  using const_pointer<Container>::increment;
+  using const_pointer<Container>::decrement;
+  using const_pointer<Container>::container;
+  using const_pointer<Container>::x;
+  using const_pointer<Container>::y;
+  using const_pointer<Container>::z;
+  using const_pointer<Container>::w;
+};
+
+} // dim4
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/reference1.hpp b/include/zfp/internal/array/reference1.hpp
new file mode 100644
index 00000000..e41cc8b5
--- /dev/null
+++ b/include/zfp/internal/array/reference1.hpp
@@ -0,0 +1,78 @@
+#ifndef ZFP_REFERENCE1_HPP
+#define ZFP_REFERENCE1_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim1 {
+
+// const reference to a 1D array or view element
+template <class Container>
+class const_reference : const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit const_reference(const container_type* container, size_t x) : const_handle<Container>(container, x) {}
+
+  // inspector
+  operator value_type() const { return get(); }
+
+  // pointer to referenced element
+  const_pointer<Container> operator&() const { return const_pointer<Container>(container, x); }
+
+protected:
+  using const_handle<Container>::get;
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+};
+
+// reference to a 1D array or view element
+template <class Container>
+class reference : public const_reference<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit reference(container_type* container, size_t x) : const_reference<Container>(container, x) {}
+
+  // copy constructor (to satisfy rule of three)
+  reference(const reference& r) : const_reference<Container>(r.container, r.x) {}
+
+  // assignment
+  reference operator=(const reference& r) { set(r.get()); return *this; }
+  reference operator=(value_type val) { set(val); return *this; }
+
+  // compound assignment
+  reference operator+=(value_type val) { container->add(x, val); return *this; }
+  reference operator-=(value_type val) { container->sub(x, val); return *this; }
+  reference operator*=(value_type val) { container->mul(x, val); return *this; }
+  reference operator/=(value_type val) { container->div(x, val); return *this; }
+
+  // pointer to referenced element
+  pointer<Container> operator&() const { return pointer<Container>(container, x); }
+
+  // swap two array elements via proxy references
+  friend void swap(reference a, reference b)
+  {
+    value_type x = a.get();
+    value_type y = b.get();
+    b.set(x);
+    a.set(y);
+  }
+
+protected:
+  // assign value through reference
+  void set(value_type val) { container->set(x, val); }
+
+  using const_reference<Container>::get;
+  using const_reference<Container>::container;
+  using const_reference<Container>::x;
+};
+
+} // dim1
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/reference2.hpp b/include/zfp/internal/array/reference2.hpp
new file mode 100644
index 00000000..b16484fb
--- /dev/null
+++ b/include/zfp/internal/array/reference2.hpp
@@ -0,0 +1,80 @@
+#ifndef ZFP_REFERENCE2_HPP
+#define ZFP_REFERENCE2_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim2 {
+
+// const reference to a 2D array or view element
+template <class Container>
+class const_reference : const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit const_reference(const container_type* container, size_t x, size_t y) : const_handle<Container>(container, x, y) {}
+
+  // inspector
+  operator value_type() const { return get(); }
+
+  // pointer to referenced element
+  const_pointer<Container> operator&() const { return const_pointer<Container>(container, x, y); }
+
+protected:
+  using const_handle<Container>::get;
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+};
+
+// reference to a 2D array or view element
+template <class Container>
+class reference : public const_reference<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit reference(container_type* container, size_t x, size_t y) : const_reference<Container>(container, x, y) {}
+
+  // copy constructor (to satisfy rule of three)
+  reference(const reference& r) : const_reference<Container>(r.container, r.x, r.y) {}
+
+  // assignment
+  reference operator=(const reference& r) { set(r.get()); return *this; }
+  reference operator=(value_type val) { set(val); return *this; }
+
+  // compound assignment
+  reference operator+=(value_type val) { container->add(x, y, val); return *this; }
+  reference operator-=(value_type val) { container->sub(x, y, val); return *this; }
+  reference operator*=(value_type val) { container->mul(x, y, val); return *this; }
+  reference operator/=(value_type val) { container->div(x, y, val); return *this; }
+
+  // pointer to referenced element
+  pointer<Container> operator&() const { return pointer<Container>(container, x, y); }
+
+  // swap two array elements via proxy references
+  friend void swap(reference a, reference b)
+  {
+    value_type x = a.get();
+    value_type y = b.get();
+    b.set(x);
+    a.set(y);
+  }
+
+protected:
+  // assign value through reference
+  void set(value_type val) { container->set(x, y, val); }
+
+  using const_reference<Container>::get;
+  using const_reference<Container>::container;
+  using const_reference<Container>::x;
+  using const_reference<Container>::y;
+};
+
+} // dim2
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/reference3.hpp b/include/zfp/internal/array/reference3.hpp
new file mode 100644
index 00000000..ecb52d30
--- /dev/null
+++ b/include/zfp/internal/array/reference3.hpp
@@ -0,0 +1,82 @@
+#ifndef ZFP_REFERENCE3_HPP
+#define ZFP_REFERENCE3_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim3 {
+
+// const reference to a 3D array or view element
+template <class Container>
+class const_reference : const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit const_reference(const container_type* container, size_t x, size_t y, size_t z) : const_handle<Container>(container, x, y, z) {}
+
+  // inspector
+  operator value_type() const { return get(); }
+
+  // pointer to referenced element
+  const_pointer<Container> operator&() const { return const_pointer<Container>(container, x, y, z); }
+
+protected:
+  using const_handle<Container>::get;
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+  using const_handle<Container>::z;
+};
+
+// reference to a 3D array or view element
+template <class Container>
+class reference : public const_reference<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit reference(container_type* container, size_t x, size_t y, size_t z) : const_reference<Container>(container, x, y, z) {}
+
+  // copy constructor (to satisfy rule of three)
+  reference(const reference& r) : const_reference<Container>(r.container, r.x, r.y, r.z) {}
+
+  // assignment
+  reference operator=(const reference& r) { set(r.get()); return *this; }
+  reference operator=(value_type val) { set(val); return *this; }
+
+  // compound assignment
+  reference operator+=(value_type val) { container->add(x, y, z, val); return *this; }
+  reference operator-=(value_type val) { container->sub(x, y, z, val); return *this; }
+  reference operator*=(value_type val) { container->mul(x, y, z, val); return *this; }
+  reference operator/=(value_type val) { container->div(x, y, z, val); return *this; }
+
+  // pointer to referenced element
+  pointer<Container> operator&() const { return pointer<Container>(container, x, y, z); }
+
+  // swap two array elements via proxy references
+  friend void swap(reference a, reference b)
+  {
+    value_type x = a.get();
+    value_type y = b.get();
+    b.set(x);
+    a.set(y);
+  }
+
+protected:
+  // assign value through reference
+  void set(value_type val) { container->set(x, y, z, val); }
+
+  using const_reference<Container>::get;
+  using const_reference<Container>::container;
+  using const_reference<Container>::x;
+  using const_reference<Container>::y;
+  using const_reference<Container>::z;
+};
+
+} // dim3
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/reference4.hpp b/include/zfp/internal/array/reference4.hpp
new file mode 100644
index 00000000..1d0c3ca3
--- /dev/null
+++ b/include/zfp/internal/array/reference4.hpp
@@ -0,0 +1,84 @@
+#ifndef ZFP_REFERENCE4_HPP
+#define ZFP_REFERENCE4_HPP
+
+namespace zfp {
+namespace internal {
+namespace dim4 {
+
+// const reference to a 4D array or view element
+template <class Container>
+class const_reference : const_handle<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit const_reference(const container_type* container, size_t x, size_t y, size_t z, size_t w) : const_handle<Container>(container, x, y, z, w) {}
+
+  // inspector
+  operator value_type() const { return get(); }
+
+  // pointer to referenced element
+  const_pointer<Container> operator&() const { return const_pointer<Container>(container, x, y, z, w); }
+
+protected:
+  using const_handle<Container>::get;
+  using const_handle<Container>::container;
+  using const_handle<Container>::x;
+  using const_handle<Container>::y;
+  using const_handle<Container>::z;
+  using const_handle<Container>::w;
+};
+
+// reference to a 4D array or view element
+template <class Container>
+class reference : public const_reference<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // constructor
+  explicit reference(container_type* container, size_t x, size_t y, size_t z, size_t w) : const_reference<Container>(container, x, y, z, w) {}
+
+  // copy constructor (to satisfy rule of three)
+  reference(const reference& r) : const_reference<Container>(r.container, r.x, r.y, r.z, r.w) {}
+
+  // assignment
+  reference operator=(const reference& r) { set(r.get()); return *this; }
+  reference operator=(value_type val) { set(val); return *this; }
+
+  // compound assignment
+  reference operator+=(value_type val) { container->add(x, y, z, w, val); return *this; }
+  reference operator-=(value_type val) { container->sub(x, y, z, w, val); return *this; }
+  reference operator*=(value_type val) { container->mul(x, y, z, w, val); return *this; }
+  reference operator/=(value_type val) { container->div(x, y, z, w, val); return *this; }
+
+  // pointer to referenced element
+  pointer<Container> operator&() const { return pointer<Container>(container, x, y, z, w); }
+
+  // swap two array elements via proxy references
+  friend void swap(reference a, reference b)
+  {
+    value_type x = a.get();
+    value_type y = b.get();
+    b.set(x);
+    a.set(y);
+  }
+
+protected:
+  // assign value through reference
+  void set(value_type val) { container->set(x, y, z, w, val); }
+
+  using const_reference<Container>::get;
+  using const_reference<Container>::container;
+  using const_reference<Container>::x;
+  using const_reference<Container>::y;
+  using const_reference<Container>::z;
+  using const_reference<Container>::w;
+};
+
+} // dim4
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/store.hpp b/include/zfp/internal/array/store.hpp
new file mode 100644
index 00000000..f649972f
--- /dev/null
+++ b/include/zfp/internal/array/store.hpp
@@ -0,0 +1,255 @@
+#ifndef ZFP_STORE_HPP
+#define ZFP_STORE_HPP
+
+#include <climits>
+#include <cmath>
+#include "zfp/internal/array/memory.hpp"
+
+namespace zfp {
+namespace internal {
+
+// base class for block store
+template <class Codec, class Index>
+class BlockStore {
+public:
+  // compression mode
+  zfp_mode mode() const { return codec.mode(); }
+
+  // rate in bits per value (fixed-rate mode only)
+  double rate() const { return codec.rate(); }
+
+  // precision in uncompressed bits per value (fixed-precision mode only)
+  uint precision() const { return codec.precision(); }
+
+  // accuracy as absolute error tolerance (fixed-accuracy mode only)
+  double accuracy() const { return codec.accuracy(); }
+
+  // compression parameters (all compression modes)
+  void params(uint* minbits, uint* maxbits, uint* maxprec, int* minexp) const { codec.params(minbits, maxbits, maxprec, minexp); }
+
+  // enable reversible (lossless) mode
+  void set_reversible()
+  {
+    set_variable_rate();
+    codec.set_reversible();
+    clear();
+  }
+
+  // set fixed rate in compressed bits per value with optional word alignment
+  double set_rate(double rate, bool align)
+  {
+    rate = codec.set_rate(rate, align);
+    uint maxbits;
+    codec.params(0, &maxbits, 0, 0);
+    index.set_block_size(maxbits);
+    alloc(true);
+    return rate;
+  }
+
+  // set precision in uncompressed bits per value
+  uint set_precision(uint precision)
+  {
+    set_variable_rate();
+    precision = codec.set_precision(precision);
+    clear();
+    return precision;
+  }
+
+  // set accuracy as absolute error tolerance
+  double set_accuracy(double tolerance)
+  {
+    set_variable_rate();
+    tolerance = codec.set_accuracy(tolerance);
+    clear();
+    return tolerance;
+  }
+
+  // set expert mode compression parameters
+  bool set_params(uint minbits, uint maxbits, uint maxprec, int minexp)
+  {
+    if (minbits != maxbits)
+      set_variable_rate();
+    bool status = codec.set_params(minbits, maxbits, maxprec, minexp);
+    clear();
+    return status;
+  }
+
+  // set compression mode and parameters
+  void set_config(const zfp_config& config)
+  {
+    switch (config.mode) {
+      case zfp_mode_reversible:
+        set_reversible();
+        break;
+      case zfp_mode_fixed_rate:
+        if (config.arg.rate < 0)
+          set_rate(-config.arg.rate, true);
+        else
+          set_rate(+config.arg.rate, false);
+        break;
+      case zfp_mode_fixed_precision:
+        set_precision(config.arg.precision);
+        break;
+      case zfp_mode_fixed_accuracy:
+        set_accuracy(config.arg.tolerance);
+        break;
+      case zfp_mode_expert:
+        set_params(config.arg.expert.minbits, config.arg.expert.maxbits, config.arg.expert.maxprec, config.arg.expert.minexp);
+        break;
+      default:
+        throw zfp::exception("zfp compression mode not supported by array");
+    }
+  }
+
+  // clear store and reallocate memory for buffer
+  void clear()
+  {
+    index.clear();
+    alloc(true);
+  }
+
+  // flush any buffered block index data
+  void flush() { index.flush(); }
+
+  // shrink buffer to match size of compressed data
+  void compact()
+  {
+    size_t size = zfp::internal::round_up(index.range(), codec.alignment() * CHAR_BIT) / CHAR_BIT;
+    if (bytes > size) {
+      codec.close();
+      zfp::internal::reallocate_aligned(data, size, ZFP_MEMORY_ALIGNMENT, bytes);
+      bytes = size;
+      codec.open(data, bytes);
+    }
+  }
+
+  // increment private view reference count (for thread safety)
+  void reference()
+  {
+#ifdef _OPENMP
+    #pragma omp critical(references)
+    {
+      references++;
+      codec.set_thread_safety(references > 1);
+    }
+#endif
+  }
+
+  // decrement private view reference count (for thread safety)
+  void unreference()
+  {
+#ifdef _OPENMP
+    #pragma omp critical(references)
+    {
+      references--;
+      codec.set_thread_safety(references > 1);
+    }
+#endif
+  }
+
+  // byte size of store data structure components indicated by mask
+  virtual size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += index.size_bytes(mask);
+    size += codec.size_bytes(mask);
+    if (mask & ZFP_DATA_PAYLOAD)
+      size += bytes;
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this);
+    return size;
+  }
+
+  // number of bytes of compressed data
+  size_t compressed_size() const { return bytes; }
+
+  // pointer to compressed data for read or write access
+  void* compressed_data() const { return data; }
+
+protected:
+  // protected default constructor
+  BlockStore() :
+    data(0),
+    bytes(0),
+    references(0),
+    index(0)
+  {}
+
+  // destructor
+  virtual ~BlockStore() { free(); }
+
+  // buffer size in bytes needed for current codec settings
+  virtual size_t buffer_size() const = 0;
+
+  // number of elements per block
+  virtual size_t block_size() const = 0;
+
+  // total number of blocks
+  virtual size_t blocks() const = 0;
+
+  // ensure variable rate is supported
+  void set_variable_rate()
+  {
+    if (!index.has_variable_rate())
+      throw zfp::exception("zfp index does not support variable rate");
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockStore& s)
+  {
+    free();
+    zfp::internal::clone_aligned(data, s.data, s.bytes, ZFP_MEMORY_ALIGNMENT);
+    bytes = s.bytes;
+    references = s.references;
+    index = s.index;
+    codec = s.codec;
+    codec.open(data, bytes);
+  }
+
+  // allocate memory for block store
+  void alloc(bool clear)
+  {
+    free();
+    bytes = buffer_size();
+    zfp::internal::reallocate_aligned(data, bytes, ZFP_MEMORY_ALIGNMENT);
+    if (clear)
+      std::fill(static_cast<uchar*>(data), static_cast<uchar*>(data) + bytes, uchar(0));
+    codec.open(data, bytes);
+  }
+
+  // free block store
+  void free()
+  {
+    if (data) {
+      zfp::internal::deallocate_aligned(data);
+      data = 0;
+      bytes = 0;
+      codec.close();
+    }
+  }
+
+  // bit offset to block store
+  bitstream_offset offset(size_t block_index) const { return index.block_offset(block_index); }
+
+  // shape 0 <= m <= 3 of block containing index i, 0 <= i <= n - 1
+  static uint shape_code(size_t i, size_t n)
+  {
+    // handle partial blocks efficiently using no conditionals
+    size_t m = i ^ n;               // m < 4 iff partial block
+    m -= 4;                         // m < 0 iff partial block
+    m >>= CHAR_BIT * sizeof(m) - 2; // m = 3 iff partial block; otherwise m = 0
+    m &= -n;                        // m = 4 - w
+    return static_cast<uint>(m);
+  }
+
+  void* data;        // pointer to compressed blocks
+  size_t bytes;      // compressed data size
+  size_t references; // private view references to array (for thread safety)
+  Index index;       // block index (size and offset)
+  Codec codec;       // compression codec
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/store1.hpp b/include/zfp/internal/array/store1.hpp
new file mode 100644
index 00000000..aeb05fa8
--- /dev/null
+++ b/include/zfp/internal/array/store1.hpp
@@ -0,0 +1,140 @@
+#ifndef ZFP_STORE1_HPP
+#define ZFP_STORE1_HPP
+
+#include "zfp/internal/array/store.hpp"
+
+namespace zfp {
+namespace internal {
+
+// compressed block store for 1D array
+template <typename Scalar, class Codec, class Index>
+class BlockStore1 : public BlockStore<Codec, Index> {
+public:
+  // default constructor
+  BlockStore1() :
+    nx(0),
+    bx(0)
+  {}
+
+  // block store for array of size nx and given configuration
+  BlockStore1(size_t nx, const zfp_config& config)
+  {
+    set_size(nx);
+    this->set_config(config);
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockStore1& s)
+  {
+    free();
+    BlockStore<Codec, Index>::deep_copy(s);
+    nx = s.nx;
+    bx = s.bx;
+  }
+
+  // resize array
+  void resize(size_t nx, bool clear = true)
+  {
+    free();
+    set_size(nx);
+    if (blocks())
+      alloc(clear);
+  }
+
+  // byte size of store data structure components indicated by mask
+  virtual size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  {
+    size_t size = 0;
+    size += BlockStore<Codec, Index>::size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this) - sizeof(BlockStore<Codec, Index>);
+    return size;
+  }
+
+  // conservative buffer size 
+  virtual size_t buffer_size() const
+  {
+    zfp_field* field = zfp_field_1d(0, codec.type, nx);
+    size_t size = codec.buffer_size(field);
+    zfp_field_free(field);
+    return size;
+  }
+
+  // number of elements per block
+  virtual size_t block_size() const { return 4; }
+
+  // total number of blocks
+  virtual size_t blocks() const { return bx; }
+
+  // array size in blocks
+  size_t block_size_x() const { return bx; }
+
+  // flat block index for element i
+  size_t block_index(size_t i) const { return i / 4; }
+
+  // encoding of block dimensions
+  uint block_shape(size_t block_index) const
+  {
+    size_t i = 4 * block_index;
+    uint mx = shape_code(i, nx);
+    return mx;
+  }
+
+  // encode contiguous block with given index
+  size_t encode(size_t block_index, const Scalar* block)
+  {
+    size_t size = codec.encode_block(offset(block_index), block_shape(block_index), block);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // encode block with given index from strided array
+  size_t encode(size_t block_index, const Scalar* p, ptrdiff_t sx)
+  {
+    size_t size = codec.encode_block_strided(offset(block_index), block_shape(block_index), p, sx);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // decode contiguous block with given index
+  size_t decode(size_t block_index, Scalar* block) const
+  {
+    return codec.decode_block(offset(block_index), block_shape(block_index), block);
+  }
+
+  // decode block with given index to strided array
+  size_t decode(size_t block_index, Scalar* p, ptrdiff_t sx) const
+  {
+    return codec.decode_block_strided(offset(block_index), block_shape(block_index), p, sx);
+  }
+
+protected:
+  using BlockStore<Codec, Index>::alloc;
+  using BlockStore<Codec, Index>::free;
+  using BlockStore<Codec, Index>::offset;
+  using BlockStore<Codec, Index>::shape_code;
+  using BlockStore<Codec, Index>::index;
+  using BlockStore<Codec, Index>::codec;
+
+  // set array dimensions
+  void set_size(size_t nx)
+  {
+    if (nx == 0) {
+      this->nx = 0;
+      bx = 0;
+    }
+    else {
+      this->nx = nx;
+      bx = (nx + 3) / 4;
+    }
+    index.resize(blocks());
+  }
+
+  size_t nx; // array dimensions
+  size_t bx; // array dimensions in number of blocks
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/store2.hpp b/include/zfp/internal/array/store2.hpp
new file mode 100644
index 00000000..466067ac
--- /dev/null
+++ b/include/zfp/internal/array/store2.hpp
@@ -0,0 +1,147 @@
+#ifndef ZFP_STORE2_HPP
+#define ZFP_STORE2_HPP
+
+#include "zfp/internal/array/store.hpp"
+
+namespace zfp {
+namespace internal {
+
+// compressed block store for 2D array
+template <typename Scalar, class Codec, class Index>
+class BlockStore2 : public BlockStore<Codec, Index> {
+public:
+  // default constructor
+  BlockStore2() :
+    nx(0), ny(0),
+    bx(0), by(0)
+  {}
+
+  // block store for array of size nx * ny and given configuration
+  BlockStore2(size_t nx, size_t ny, const zfp_config& config)
+  {
+    set_size(nx, ny);
+    this->set_config(config);
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockStore2& s)
+  {
+    free();
+    BlockStore<Codec, Index>::deep_copy(s);
+    nx = s.nx;
+    ny = s.ny;
+    bx = s.bx;
+    by = s.by;
+  }
+
+  // resize array
+  void resize(size_t nx, size_t ny, bool clear = true)
+  {
+    free();
+    set_size(nx, ny);
+    if (blocks())
+      alloc(clear);
+  }
+
+  // byte size of store data structure components indicated by mask
+  virtual size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  { 
+    size_t size = 0;
+    size += BlockStore<Codec, Index>::size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this) - sizeof(BlockStore<Codec, Index>);
+    return size;
+  }
+
+  // conservative buffer size 
+  virtual size_t buffer_size() const
+  {
+    zfp_field* field = zfp_field_2d(0, codec.type, nx, ny);
+    size_t size = codec.buffer_size(field);
+    zfp_field_free(field);
+    return size;
+  }
+
+  // number of elements per block
+  virtual size_t block_size() const { return 4 * 4; }
+
+  // total number of blocks
+  virtual size_t blocks() const { return bx * by; }
+
+  // array size in blocks
+  size_t block_size_x() const { return bx; }
+  size_t block_size_y() const { return by; }
+
+  // flat block index for element (i, j)
+  size_t block_index(size_t i, size_t j) const { return (i / 4) + bx * (j / 4); }
+
+  // encoding of block dimensions
+  uint block_shape(size_t block_index) const
+  {
+    size_t i = 4 * (block_index % bx); block_index /= bx;
+    size_t j = 4 * block_index;
+    uint mx = shape_code(i, nx);
+    uint my = shape_code(j, ny);
+    return mx + 4 * my;
+  }
+
+  // encode contiguous block with given index
+  size_t encode(size_t block_index, const Scalar* block)
+  {
+    size_t size = codec.encode_block(offset(block_index), block_shape(block_index), block);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // encode block with given index from strided array
+  size_t encode(size_t block_index, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
+  {
+    size_t size = codec.encode_block_strided(offset(block_index), block_shape(block_index), p, sx, sy);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // decode contiguous block with given index
+  size_t decode(size_t block_index, Scalar* block) const
+  {
+    return codec.decode_block(offset(block_index), block_shape(block_index), block);
+  }
+
+  // decode block with given index to strided array
+  size_t decode(size_t block_index, Scalar* p, ptrdiff_t sx, ptrdiff_t sy) const
+  {
+    return codec.decode_block_strided(offset(block_index), block_shape(block_index), p, sx, sy);
+  }
+
+protected:
+  using BlockStore<Codec, Index>::alloc;
+  using BlockStore<Codec, Index>::free;
+  using BlockStore<Codec, Index>::offset;
+  using BlockStore<Codec, Index>::shape_code;
+  using BlockStore<Codec, Index>::index;
+  using BlockStore<Codec, Index>::codec;
+
+  // set array dimensions
+  void set_size(size_t nx, size_t ny)
+  {
+    if (nx == 0 || ny == 0) {
+      this->nx = this->ny = 0;
+      bx = by = 0;
+    }
+    else {
+      this->nx = nx;
+      this->ny = ny;
+      bx = (nx + 3) / 4;
+      by = (ny + 3) / 4;
+    }
+    index.resize(blocks());
+  }
+
+  size_t nx, ny; // array dimensions
+  size_t bx, by; // array dimensions in number of blocks
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/store3.hpp b/include/zfp/internal/array/store3.hpp
new file mode 100644
index 00000000..cb2afb73
--- /dev/null
+++ b/include/zfp/internal/array/store3.hpp
@@ -0,0 +1,154 @@
+#ifndef ZFP_STORE3_HPP
+#define ZFP_STORE3_HPP
+
+#include "zfp/internal/array/store.hpp"
+
+namespace zfp {
+namespace internal {
+
+// compressed block store for 3D array
+template <typename Scalar, class Codec, class Index>
+class BlockStore3 : public BlockStore<Codec, Index> {
+public:
+  // default constructor
+  BlockStore3() :
+    nx(0), ny(0), nz(0),
+    bx(0), by(0), bz(0)
+  {}
+
+  // block store for array of size nx * ny * nz and given configuration
+  BlockStore3(size_t nx, size_t ny, size_t nz, const zfp_config& config)
+  {
+    set_size(nx, ny, nz);
+    this->set_config(config);
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockStore3& s)
+  {
+    free();
+    BlockStore<Codec, Index>::deep_copy(s);
+    nx = s.nx;
+    ny = s.ny;
+    nz = s.nz;
+    bx = s.bx;
+    by = s.by;
+    bz = s.bz;
+  }
+
+  // resize array
+  void resize(size_t nx, size_t ny, size_t nz, bool clear = true)
+  {
+    free();
+    set_size(nx, ny, nz);
+    if (blocks())
+      alloc(clear);
+  }
+
+  // byte size of store data structure components indicated by mask
+  virtual size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  { 
+    size_t size = 0;
+    size += BlockStore<Codec, Index>::size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this) - sizeof(BlockStore<Codec, Index>);
+    return size;
+  }
+
+  // conservative buffer size 
+  virtual size_t buffer_size() const
+  {
+    zfp_field* field = zfp_field_3d(0, codec.type, nx, ny, nz);
+    size_t size = codec.buffer_size(field);
+    zfp_field_free(field);
+    return size;
+  }
+
+  // number of elements per block
+  virtual size_t block_size() const { return 4 * 4 * 4; }
+
+  // total number of blocks
+  virtual size_t blocks() const { return bx * by * bz; }
+
+  // array size in blocks
+  size_t block_size_x() const { return bx; }
+  size_t block_size_y() const { return by; }
+  size_t block_size_z() const { return bz; }
+
+  // flat block index for block containing element (i, j, k)
+  size_t block_index(size_t i, size_t j, size_t k) const { return (i / 4) + bx * ((j / 4) + by * (k / 4)); }
+
+  // encoding of block dimensions
+  uint block_shape(size_t block_index) const
+  {
+    size_t i = 4 * (block_index % bx); block_index /= bx;
+    size_t j = 4 * (block_index % by); block_index /= by;
+    size_t k = 4 * block_index;
+    uint mx = shape_code(i, nx);
+    uint my = shape_code(j, ny);
+    uint mz = shape_code(k, nz);
+    return mx + 4 * (my + 4 * mz);
+  }
+
+  // encode contiguous block with given index
+  size_t encode(size_t block_index, const Scalar* block)
+  {
+    size_t size = codec.encode_block(offset(block_index), block_shape(block_index), block);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // encode block with given index from strided array
+  size_t encode(size_t block_index, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+  {
+    size_t size = codec.encode_block_strided(offset(block_index), block_shape(block_index), p, sx, sy, sz);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // decode contiguous block with given index
+  size_t decode(size_t block_index, Scalar* block) const
+  {
+    return codec.decode_block(offset(block_index), block_shape(block_index), block);
+  }
+
+  // decode block with given index to strided array
+  size_t decode(size_t block_index, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz) const
+  {
+    return codec.decode_block_strided(offset(block_index), block_shape(block_index), p, sx, sy, sz);
+  }
+
+protected:
+  using BlockStore<Codec, Index>::alloc;
+  using BlockStore<Codec, Index>::free;
+  using BlockStore<Codec, Index>::offset;
+  using BlockStore<Codec, Index>::shape_code;
+  using BlockStore<Codec, Index>::index;
+  using BlockStore<Codec, Index>::codec;
+
+  // set array dimensions
+  void set_size(size_t nx, size_t ny, size_t nz)
+  {
+    if (nx == 0 || ny == 0 || nz == 0) {
+      this->nx = this->ny = this->nz = 0;
+      bx = by = bz = 0;
+    }
+    else {
+      this->nx = nx;
+      this->ny = ny;
+      this->nz = nz;
+      bx = (nx + 3) / 4;
+      by = (ny + 3) / 4;
+      bz = (nz + 3) / 4;
+    }
+    index.resize(blocks());
+  }
+
+  size_t nx, ny, nz; // array dimensions
+  size_t bx, by, bz; // array dimensions in number of blocks
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/store4.hpp b/include/zfp/internal/array/store4.hpp
new file mode 100644
index 00000000..dbea0c98
--- /dev/null
+++ b/include/zfp/internal/array/store4.hpp
@@ -0,0 +1,161 @@
+#ifndef ZFP_STORE4_HPP
+#define ZFP_STORE4_HPP
+
+#include "zfp/internal/array/store.hpp"
+
+namespace zfp {
+namespace internal {
+
+// compressed block store for 4D array
+template <typename Scalar, class Codec, class Index>
+class BlockStore4 : public BlockStore<Codec, Index> {
+public:
+  // default constructor
+  BlockStore4() :
+    nx(0), ny(0), nz(0), nw(0),
+    bx(0), by(0), bz(0), bw(0)
+  {}
+
+  // block store for array of size nx * ny * nz * nw and given configuration
+  BlockStore4(size_t nx, size_t ny, size_t nz, size_t nw, const zfp_config& config)
+  {
+    set_size(nx, ny, nz, nw);
+    this->set_config(config);
+  }
+
+  // perform a deep copy
+  void deep_copy(const BlockStore4& s)
+  {
+    free();
+    BlockStore<Codec, Index>::deep_copy(s);
+    nx = s.nx;
+    ny = s.ny;
+    nz = s.nz;
+    nw = s.nw;
+    bx = s.bx;
+    by = s.by;
+    bz = s.bz;
+    bw = s.bw;
+  }
+
+  // resize array
+  void resize(size_t nx, size_t ny, size_t nz, size_t nw, bool clear = true)
+  {
+    free();
+    set_size(nx, ny, nz, nw);
+    if (blocks())
+      alloc(clear);
+  }
+
+  // byte size of store data structure components indicated by mask
+  virtual size_t size_bytes(uint mask = ZFP_DATA_ALL) const
+  { 
+    size_t size = 0;
+    size += BlockStore<Codec, Index>::size_bytes(mask);
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this) - sizeof(BlockStore<Codec, Index>);
+    return size;
+  }
+
+  // conservative buffer size 
+  virtual size_t buffer_size() const
+  {
+    zfp_field* field = zfp_field_4d(0, codec.type, nx, ny, nz, nw);
+    size_t size = codec.buffer_size(field);
+    zfp_field_free(field);
+    return size;
+  }
+
+  // number of elements per block
+  virtual size_t block_size() const { return 4 * 4 * 4 * 4; }
+
+  // total number of blocks
+  virtual size_t blocks() const { return bx * by * bz * bw; }
+
+  // array size in blocks
+  size_t block_size_x() const { return bx; }
+  size_t block_size_y() const { return by; }
+  size_t block_size_z() const { return bz; }
+  size_t block_size_w() const { return bw; }
+
+  // flat block index for element (i, j, k, l)
+  size_t block_index(size_t i, size_t j, size_t k, size_t l) const { return (i / 4) + bx * ((j / 4) + by * ((k / 4) + bz * (l / 4))); }
+
+  // encoding of block dimensions
+  uint block_shape(size_t block_index) const
+  {
+    size_t i = 4 * (block_index % bx); block_index /= bx;
+    size_t j = 4 * (block_index % by); block_index /= by;
+    size_t k = 4 * (block_index % bz); block_index /= bz;
+    size_t l = 4 * block_index;
+    uint mx = shape_code(i, nx);
+    uint my = shape_code(j, ny);
+    uint mz = shape_code(k, nz);
+    uint mw = shape_code(l, nw);
+    return mx + 4 * (my + 4 * (mz + 4 * mw));
+  }
+
+  // encode contiguous block with given index
+  size_t encode(size_t block_index, const Scalar* block)
+  {
+    size_t size = codec.encode_block(offset(block_index), block_shape(block_index), block);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // encode block with given index from strided array
+  size_t encode(size_t block_index, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+  {
+    size_t size = codec.encode_block_strided(offset(block_index), block_shape(block_index), p, sx, sy, sz, sw);
+    index.set_block_size(block_index, size);
+    return size;
+  }
+
+  // decode contiguous block with given index
+  size_t decode(size_t block_index, Scalar* block) const
+  {
+    return codec.decode_block(offset(block_index), block_shape(block_index), block);
+  }
+
+  // decode block with given index to strided array
+  size_t decode(size_t block_index, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw) const
+  {
+    return codec.decode_block_strided(offset(block_index), block_shape(block_index), p, sx, sy, sz, sw);
+  }
+
+protected:
+  using BlockStore<Codec, Index>::alloc;
+  using BlockStore<Codec, Index>::free;
+  using BlockStore<Codec, Index>::offset;
+  using BlockStore<Codec, Index>::shape_code;
+  using BlockStore<Codec, Index>::index;
+  using BlockStore<Codec, Index>::codec;
+
+  // set array dimensions
+  void set_size(size_t nx, size_t ny, size_t nz, size_t nw)
+  {
+    if (nx == 0 || ny == 0 || nz == 0 || nw == 0) {
+      this->nx = this->ny = this->nz = this->nw = 0;
+      bx = by = bz = bw = 0;
+    }
+    else {
+      this->nx = nx;
+      this->ny = ny;
+      this->nz = nz;
+      this->nw = nw;
+      bx = (nx + 3) / 4;
+      by = (ny + 3) / 4;
+      bz = (nz + 3) / 4;
+      bw = (nw + 3) / 4;
+    }
+    index.resize(blocks());
+  }
+
+  size_t nx, ny, nz, nw; // array dimensions
+  size_t bx, by, bz, bw; // array dimensions in number of blocks
+};
+
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/traits.hpp b/include/zfp/internal/array/traits.hpp
new file mode 100644
index 00000000..7ec4a02b
--- /dev/null
+++ b/include/zfp/internal/array/traits.hpp
@@ -0,0 +1,30 @@
+#ifndef ZFP_TRAITS_HPP
+#define ZFP_TRAITS_HPP
+
+namespace zfp {
+namespace internal {
+
+// useful type traits
+template <typename Scalar>
+struct trait;
+/*
+  static const zfp_type type;    // corresponding zfp type
+  static const size_t precision; // precision in number of bits
+*/
+
+template <>
+struct trait<float> {
+  static const zfp_type type = zfp_type_float;
+  static const size_t precision = CHAR_BIT * sizeof(float);
+};
+
+template <>
+struct trait<double> {
+  static const zfp_type type = zfp_type_double;
+  static const size_t precision = CHAR_BIT * sizeof(double);
+};
+
+}
+}
+
+#endif
diff --git a/include/zfp/internal/array/view1.hpp b/include/zfp/internal/array/view1.hpp
new file mode 100644
index 00000000..adfe868b
--- /dev/null
+++ b/include/zfp/internal/array/view1.hpp
@@ -0,0 +1,303 @@
+#ifndef ZFP_VIEW1_HPP
+#define ZFP_VIEW1_HPP
+
+// 1D array views
+
+namespace zfp {
+namespace internal {
+namespace dim1 {
+
+// abstract view of 1D array (base class)
+template <class Container>
+class preview {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // rate in bits per value
+  double rate() const { return array->rate(); }
+
+  // dimensions of (sub)array
+  size_t size() const { return nx; }
+
+  // local to global array index
+  size_t global_x(size_t i) const { return x + i; }
+
+protected:
+  // construction and assignment--perform shallow copy of (sub)array
+  explicit preview(container_type* array) : array(array), x(0), nx(array->size_x()) {}
+  explicit preview(container_type* array, size_t x, size_t nx) : array(array), x(x), nx(nx) {}
+  preview& operator=(container_type* a)
+  {
+    array = a;
+    x = 0;
+    nx = a->nx;
+    return *this;
+  }
+
+  // global index bounds for iterators
+  size_t min_x() const { return x; }
+  size_t max_x() const { return x + nx; }
+
+  container_type* array; // underlying container
+  size_t x;              // offset into array
+  size_t nx;             // dimensions of subarray
+};
+
+// generic read-only view into a rectangular subset of a 1D array
+template <class Container>
+class const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim1::const_reference<const_view> const_reference;
+  typedef typename zfp::internal::dim1::const_pointer<const_view> const_pointer;
+  typedef typename zfp::internal::dim1::const_iterator<const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  const_view(container_type* array) : preview<Container>(array) {}
+  const_view(container_type* array, size_t x, size_t nx) : preview<Container>(array, x, nx) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+
+  // [i] inspector
+  const_reference operator[](size_t index) const { return const_reference(this, x + index); }
+
+  // (i) inspector
+  const_reference operator()(size_t i) const { return const_reference(this, x + i); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x); }
+  const_iterator cend() const { return const_iterator(this, x + nx); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim1::const_handle<const_view>;
+  friend class zfp::internal::dim1::const_pointer<const_view>;
+  friend class zfp::internal::dim1::const_iterator<const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::nx;
+
+  // inspector
+  value_type get(size_t x) const { return array->get(x); }
+};
+
+// generic read-write view into a rectangular subset of a 1D array
+template <class Container>
+class view : public const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim1::const_reference<view> const_reference;
+  typedef typename zfp::internal::dim1::const_pointer<view> const_pointer;
+  typedef typename zfp::internal::dim1::const_iterator<view> const_iterator;
+  typedef typename zfp::internal::dim1::reference<view> reference;
+  typedef typename zfp::internal::dim1::pointer<view> pointer;
+  typedef typename zfp::internal::dim1::iterator<view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  view(container_type* array) : const_view<Container>(array) {}
+  view(container_type* array, size_t x, size_t nx) : const_view<Container>(array, x, nx) {}
+
+  // [i] inspector
+  const_reference operator[](size_t index) const { return const_reference(this, x + index); }
+
+  // (i) inspector
+  const_reference operator()(size_t i) const { return const_reference(this, x + i); }
+
+  // [i] mutator
+  reference operator[](size_t index) { return reference(this, x + index); }
+
+  // (i) mutator
+  reference operator()(size_t i) { return reference(this, x + i); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x); }
+  const_iterator cend() const { return const_iterator(this, x + nx); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x); }
+  iterator end() { return iterator(this, x + nx); }
+
+protected:
+  friend class zfp::internal::dim1::const_handle<view>;
+  friend class zfp::internal::dim1::const_pointer<view>;
+  friend class zfp::internal::dim1::const_iterator<view>;
+  friend class zfp::internal::dim1::reference<view>;
+  friend class zfp::internal::dim1::pointer<view>;
+  friend class zfp::internal::dim1::iterator<view>;
+
+  using const_view<Container>::min_x;
+  using const_view<Container>::max_x;
+  using const_view<Container>::get;
+  using const_view<Container>::array;
+  using const_view<Container>::x;
+  using const_view<Container>::nx;
+
+  // mutator
+  void set(size_t x, value_type val) { array->set(x, val); }
+
+  // in-place updates
+  void add(size_t x, value_type val) { array->add(x, val); }
+  void sub(size_t x, value_type val) { array->sub(x, val); }
+  void mul(size_t x, value_type val) { array->mul(x, val); }
+  void div(size_t x, value_type val) { array->div(x, val); }
+};
+
+// thread-safe read-only view of 1D (sub)array with private cache
+template <class Container>
+class private_const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename container_type::store_type store_type;
+  typedef typename zfp::internal::dim1::const_reference<private_const_view> const_reference;
+  typedef typename zfp::internal::dim1::const_pointer<private_const_view> const_pointer;
+  typedef typename zfp::internal::dim1::const_iterator<private_const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_const_view(container_type* array, size_t cache_size = 0) :
+    preview<Container>(array),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+  private_const_view(container_type* array, size_t x, size_t nx, size_t cache_size = 0) :
+    preview<Container>(array, x, nx),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+
+  // destructor
+  ~private_const_view()
+  {
+    array->store.unreference();
+  }
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes) { cache.resize(bytes); }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // (i) inspector
+  const_reference operator()(size_t i) const { return const_reference(this, x + i); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x); }
+  const_iterator cend() const { return const_iterator(this, x + nx); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim1::const_handle<private_const_view>;
+  friend class zfp::internal::dim1::const_pointer<private_const_view>;
+  friend class zfp::internal::dim1::const_iterator<private_const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::nx;
+
+  // inspector
+  value_type get(size_t x) const { return cache.get(x); }
+
+  BlockCache1<value_type, store_type> cache; // cache of decompressed blocks
+};
+
+// thread-safe read-write view of private 1D (sub)array
+template <class Container>
+class private_view : public private_const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim1::const_reference<private_view> const_reference;
+  typedef typename zfp::internal::dim1::const_pointer<private_view> const_pointer;
+  typedef typename zfp::internal::dim1::const_iterator<private_view> const_iterator;
+  typedef typename zfp::internal::dim1::reference<private_view> reference;
+  typedef typename zfp::internal::dim1::pointer<private_view> pointer;
+  typedef typename zfp::internal::dim1::iterator<private_view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_view(container_type* array, size_t cache_size = 0) : private_const_view<Container>(array, cache_size) {}
+  private_view(container_type* array, size_t x, size_t nx, size_t cache_size = 0) : private_const_view<Container>(array, x, nx, cache_size) {}
+
+  // partition view into count block-aligned pieces, with 0 <= index < count
+  void partition(size_t index, size_t count)
+  {
+    partition(x, nx, index, count);
+  }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // (i) inspector
+  const_reference operator()(size_t i) const { return const_reference(this, x + i); }
+
+  // (i) mutator
+  reference operator()(size_t i) { return reference(this, x + i); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x); }
+  const_iterator cend() const { return const_iterator(this, x + nx); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x); }
+  iterator end() { return iterator(this, x + nx); }
+
+protected:
+  friend class zfp::internal::dim1::const_handle<private_view>;
+  friend class zfp::internal::dim1::const_pointer<private_view>;
+  friend class zfp::internal::dim1::const_iterator<private_view>;
+  friend class zfp::internal::dim1::reference<private_view>;
+  friend class zfp::internal::dim1::pointer<private_view>;
+  friend class zfp::internal::dim1::iterator<private_view>;
+
+  using private_const_view<Container>::min_x;
+  using private_const_view<Container>::max_x;
+  using private_const_view<Container>::get;
+  using private_const_view<Container>::array;
+  using private_const_view<Container>::x;
+  using private_const_view<Container>::nx;
+  using private_const_view<Container>::cache;
+
+  // block-aligned partition of [offset, offset + size): index out of count
+  static void partition(size_t& offset, size_t& size, size_t index, size_t count)
+  {
+    size_t bmin = offset / 4;
+    size_t bmax = (offset + size + 3) / 4;
+    size_t xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
+    size_t xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
+    offset = xmin;
+    size = xmax - xmin;
+  }
+
+  // mutator
+  void set(size_t x, value_type val) { cache.set(x, val); }
+
+  // in-place updates
+  void add(size_t x, value_type val) { cache.ref(x) += val; }
+  void sub(size_t x, value_type val) { cache.ref(x) -= val; }
+  void mul(size_t x, value_type val) { cache.ref(x) *= val; }
+  void div(size_t x, value_type val) { cache.ref(x) /= val; }
+};
+
+} // dim1
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/view2.hpp b/include/zfp/internal/array/view2.hpp
new file mode 100644
index 00000000..8e12336f
--- /dev/null
+++ b/include/zfp/internal/array/view2.hpp
@@ -0,0 +1,498 @@
+#ifndef ZFP_VIEW2_HPP
+#define ZFP_VIEW2_HPP
+
+// 2D array views
+
+namespace zfp {
+namespace internal {
+namespace dim2 {
+
+// abstract view of 2D array (base class)
+template <class Container>
+class preview {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // rate in bits per value
+  double rate() const { return array->rate(); }
+
+  // dimensions of (sub)array
+  size_t size() const { return nx * ny; }
+
+  // local to global array indices
+  size_t global_x(size_t i) const { return x + i; }
+  size_t global_y(size_t j) const { return y + j; }
+
+protected:
+  // construction and assignment--perform shallow copy of (sub)array
+  explicit preview(container_type* array) : array(array), x(0), y(0), nx(array->size_x()), ny(array->size_y()) {}
+  explicit preview(container_type* array, size_t x, size_t y, size_t nx, size_t ny) : array(array), x(x), y(y), nx(nx), ny(ny) {}
+  preview& operator=(container_type* a)
+  {
+    array = a;
+    x = y = 0;
+    nx = a->nx;
+    ny = a->ny;
+    return *this;
+  }
+
+  // global index bounds for iterators
+  size_t min_x() const { return x; }
+  size_t max_x() const { return x + nx; }
+  size_t min_y() const { return y; }
+  size_t max_y() const { return y + ny; }
+
+  container_type* array; // underlying container
+  size_t x, y;           // offset into array
+  size_t nx, ny;         // dimensions of subarray
+};
+
+// generic read-only view into a rectangular subset of a 2D array
+template <class Container>
+class const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim2::const_reference<const_view> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<const_view> const_pointer;
+  typedef typename zfp::internal::dim2::const_iterator<const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  const_view(container_type* array) : preview<Container>(array) {}
+  const_view(container_type* array, size_t x, size_t y, size_t nx, size_t ny) : preview<Container>(array, x, y, nx, ny) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // (i, j) inspector
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y); }
+  const_iterator cend() const { return const_iterator(this, x, y + ny); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<const_view>;
+  friend class zfp::internal::dim2::const_pointer<const_view>;
+  friend class zfp::internal::dim2::const_iterator<const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::min_y;
+  using preview<Container>::max_y;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+
+  // inspector
+  value_type get(size_t x, size_t y) const { return array->get(x, y); }
+};
+
+// generic read-write view into a rectangular subset of a 2D array
+template <class Container>
+class view : public const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim2::const_reference<view> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<view> const_pointer;
+  typedef typename zfp::internal::dim2::const_iterator<view> const_iterator;
+  typedef typename zfp::internal::dim2::reference<view> reference;
+  typedef typename zfp::internal::dim2::pointer<view> pointer;
+  typedef typename zfp::internal::dim2::iterator<view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  view(container_type* array) : const_view<Container>(array) {}
+  view(container_type* array, size_t x, size_t y, size_t nx, size_t ny) : const_view<Container>(array, x, y, nx, ny) {}
+
+  // (i, j) inspector
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j); }
+
+  // (i, j) mutator
+  reference operator()(size_t i, size_t j) { return reference(this, x + i, y + j); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y); }
+  const_iterator cend() const { return const_iterator(this, x, y + ny); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x, y); }
+  iterator end() { return iterator(this, x, y + ny); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<view>;
+  friend class zfp::internal::dim2::const_pointer<view>;
+  friend class zfp::internal::dim2::const_iterator<view>;
+  friend class zfp::internal::dim2::reference<view>;
+  friend class zfp::internal::dim2::pointer<view>;
+  friend class zfp::internal::dim2::iterator<view>;
+
+  using const_view<Container>::min_x;
+  using const_view<Container>::max_x;
+  using const_view<Container>::min_y;
+  using const_view<Container>::max_y;
+  using const_view<Container>::get;
+  using const_view<Container>::array;
+  using const_view<Container>::x;
+  using const_view<Container>::y;
+  using const_view<Container>::nx;
+  using const_view<Container>::ny;
+
+  // mutator
+  void set(size_t x, size_t y, value_type val) { array->set(x, y, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, value_type val) { array->add(x, y, val); }
+  void sub(size_t x, size_t y, value_type val) { array->sub(x, y, val); }
+  void mul(size_t x, size_t y, value_type val) { array->mul(x, y, val); }
+  void div(size_t x, size_t y, value_type val) { array->div(x, y, val); }
+};
+
+// flat view of 2D array (operator[] returns scalar)
+template <class Container>
+class flat_view : public view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim2::const_reference<flat_view> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<flat_view> const_pointer;
+  typedef typename zfp::internal::dim2::reference<flat_view> reference;
+  typedef typename zfp::internal::dim2::pointer<flat_view> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  flat_view(container_type* array) : view<Container>(array) {}
+  flat_view(container_type* array, size_t x, size_t y, size_t nx, size_t ny) : view<Container>(array, x, y, nx, ny) {}
+
+  // convert (i, j) index to flat index
+  size_t index(size_t i, size_t j) const { return i + nx * j; }
+
+  // convert flat index to (i, j) index
+  void ij(size_t& i, size_t& j, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index;
+  }
+
+  // flat index [] inspector
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j;
+    ij(i, j, index);
+    return const_reference(this, x + i, y + j);
+  }
+
+  // flat index [] mutator
+  reference operator[](size_t index)
+  {
+    size_t i, j;
+    ij(i, j, index);
+    return reference(this, x + i, y + j);
+  }
+
+  // (i, j) inspector
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j); }
+
+  // (i, j) mutator
+  reference operator()(size_t i, size_t j) { return reference(this, x + i, y + j); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<flat_view>;
+  friend class zfp::internal::dim2::const_pointer<flat_view>;
+  friend class zfp::internal::dim2::reference<flat_view>;
+  friend class zfp::internal::dim2::pointer<flat_view>;
+
+  using view<Container>::array;
+  using view<Container>::x;
+  using view<Container>::y;
+  using view<Container>::nx;
+  using view<Container>::ny;
+
+  // inspector
+  value_type get(size_t x, size_t y) const { return array->get(x, y); }
+
+  // mutator
+  void set(size_t x, size_t y, value_type val) { array->set(x, y, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, value_type val) { array->add(x, y, val); }
+  void sub(size_t x, size_t y, value_type val) { array->sub(x, y, val); }
+  void mul(size_t x, size_t y, value_type val) { array->mul(x, y, val); }
+  void div(size_t x, size_t y, value_type val) { array->div(x, y, val); }
+};
+
+// forward declaration of friends
+template <class Container> class nested_view1;
+template <class Container> class nested_view2;
+
+// nested view into a 1D rectangular subset of a 2D array
+template <class Container>
+class nested_view1 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim2::const_reference<nested_view1> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<nested_view1> const_pointer;
+  typedef typename zfp::internal::dim2::reference<nested_view1> reference;
+  typedef typename zfp::internal::dim2::pointer<nested_view1> pointer;
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+
+  // [i] inspector and mutator
+  const_reference operator[](size_t index) const { return const_reference(this, x + index, y); }
+  reference operator[](size_t index) { return reference(this, x + index, y); }
+
+  // (i) inspector and mutator
+  const_reference operator()(size_t i) const { return const_reference(this, x + i, y); }
+  reference operator()(size_t i) { return reference(this, x + i, y); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<nested_view1>;
+  friend class zfp::internal::dim2::const_pointer<nested_view1>;
+  friend class zfp::internal::dim2::reference<nested_view1>;
+  friend class zfp::internal::dim2::pointer<nested_view1>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+
+  // construction--perform shallow copy of (sub)array
+  friend class nested_view2<Container>;
+  explicit nested_view1(container_type* array) : preview<Container>(array) {}
+  explicit nested_view1(container_type* array, size_t x, size_t y, size_t nx, size_t ny) : preview<Container>(array, x, y, nx, ny) {}
+
+  // inspector
+  value_type get(size_t x, size_t y) const { return array->get(x, y); }
+
+  // mutator
+  void set(size_t x, size_t y, value_type val) { array->set(x, y, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, value_type val) { array->add(x, y, val); }
+  void sub(size_t x, size_t y, value_type val) { array->sub(x, y, val); }
+  void mul(size_t x, size_t y, value_type val) { array->mul(x, y, val); }
+  void div(size_t x, size_t y, value_type val) { array->div(x, y, val); }
+};
+
+// nested view into a 2D rectangular subset of a 2D array
+template <class Container>
+class nested_view2 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim2::const_reference<nested_view2> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<nested_view2> const_pointer;
+  typedef typename zfp::internal::dim2::reference<nested_view2> reference;
+  typedef typename zfp::internal::dim2::pointer<nested_view2> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  nested_view2(container_type* array) : preview<Container>(array) {}
+  nested_view2(container_type* array, size_t x, size_t y, size_t nx, size_t ny) : preview<Container>(array, x, y, nx, ny) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // 1D view
+  nested_view1<Container> operator[](size_t index) const { return nested_view1<Container>(array, x, y + index, nx, 1); }
+
+  // (i, j) inspector and mutator
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j); }
+  reference operator()(size_t i, size_t j) { return reference(this, x + i, y + j); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<nested_view2>;
+  friend class zfp::internal::dim2::const_pointer<nested_view2>;
+  friend class zfp::internal::dim2::reference<nested_view2>;
+  friend class zfp::internal::dim2::pointer<nested_view2>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+
+  // inspector
+  value_type get(size_t x, size_t y) const { return array->get(x, y); }
+
+  // mutator
+  void set(size_t x, size_t y, value_type val) { array->set(x, y, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, value_type val) { array->add(x, y, val); }
+  void sub(size_t x, size_t y, value_type val) { array->sub(x, y, val); }
+  void mul(size_t x, size_t y, value_type val) { array->mul(x, y, val); }
+  void div(size_t x, size_t y, value_type val) { array->div(x, y, val); }
+};
+
+// thread-safe read-only view of 2D (sub)array with private cache
+template <class Container>
+class private_const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename container_type::store_type store_type;
+  typedef typename zfp::internal::dim2::const_reference<private_const_view> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<private_const_view> const_pointer;
+  typedef typename zfp::internal::dim2::const_iterator<private_const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_const_view(container_type* array, size_t cache_size = 0) :
+    preview<Container>(array),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+  private_const_view(container_type* array, size_t x, size_t y, size_t nx, size_t ny, size_t cache_size = 0) :
+    preview<Container>(array, x, y, nx, ny),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+
+  // destructor
+  ~private_const_view()
+  {
+    array->store.unreference();
+  }
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes) { cache.resize(bytes); }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // (i, j) inspector
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y); }
+  const_iterator cend() const { return const_iterator(this, x, y + ny); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<private_const_view>;
+  friend class zfp::internal::dim2::const_pointer<private_const_view>;
+  friend class zfp::internal::dim2::const_iterator<private_const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::min_y;
+  using preview<Container>::max_y;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+
+  // inspector
+  value_type get(size_t x, size_t y) const { return cache.get(x, y); }
+
+  BlockCache2<value_type, store_type> cache; // cache of decompressed blocks
+};
+
+// thread-safe read-write view of private 2D (sub)array
+template <class Container>
+class private_view : public private_const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim2::const_reference<private_view> const_reference;
+  typedef typename zfp::internal::dim2::const_pointer<private_view> const_pointer;
+  typedef typename zfp::internal::dim2::const_iterator<private_view> const_iterator;
+  typedef typename zfp::internal::dim2::reference<private_view> reference;
+  typedef typename zfp::internal::dim2::pointer<private_view> pointer;
+  typedef typename zfp::internal::dim2::iterator<private_view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_view(container_type* array, size_t cache_size = 0) : private_const_view<Container>(array, cache_size) {}
+  private_view(container_type* array, size_t x, size_t y, size_t nx, size_t ny, size_t cache_size = 0) : private_const_view<Container>(array, x, y, nx, ny, cache_size) {}
+
+  // partition view into count block-aligned pieces, with 0 <= index < count
+  void partition(size_t index, size_t count)
+  {
+    if (nx > ny)
+      partition(x, nx, index, count);
+    else
+      partition(y, ny, index, count);
+  }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // (i, j) inspector
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j); }
+
+  // (i, j) mutator
+  reference operator()(size_t i, size_t j) { return reference(this, x + i, y + j); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y); }
+  const_iterator cend() const { return const_iterator(this, x, y + ny); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x, y); }
+  iterator end() { return iterator(this, x, y + ny); }
+
+protected:
+  friend class zfp::internal::dim2::const_handle<private_view>;
+  friend class zfp::internal::dim2::const_pointer<private_view>;
+  friend class zfp::internal::dim2::const_iterator<private_view>;
+  friend class zfp::internal::dim2::reference<private_view>;
+  friend class zfp::internal::dim2::pointer<private_view>;
+  friend class zfp::internal::dim2::iterator<private_view>;
+
+  using private_const_view<Container>::min_x;
+  using private_const_view<Container>::max_x;
+  using private_const_view<Container>::min_y;
+  using private_const_view<Container>::max_y;
+  using private_const_view<Container>::get;
+  using private_const_view<Container>::array;
+  using private_const_view<Container>::x;
+  using private_const_view<Container>::y;
+  using private_const_view<Container>::nx;
+  using private_const_view<Container>::ny;
+  using private_const_view<Container>::cache;
+
+  // block-aligned partition of [offset, offset + size): index out of count
+  static void partition(size_t& offset, size_t& size, size_t index, size_t count)
+  {
+    size_t bmin = offset / 4;
+    size_t bmax = (offset + size + 3) / 4;
+    size_t xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
+    size_t xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
+    offset = xmin;
+    size = xmax - xmin;
+  }
+
+  // mutator
+  void set(size_t x, size_t y, value_type val) { cache.set(x, y, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, value_type val) { cache.ref(x, y) += val; }
+  void sub(size_t x, size_t y, value_type val) { cache.ref(x, y) -= val; }
+  void mul(size_t x, size_t y, value_type val) { cache.ref(x, y) *= val; }
+  void div(size_t x, size_t y, value_type val) { cache.ref(x, y) /= val; }
+};
+
+} // dim2
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/view3.hpp b/include/zfp/internal/array/view3.hpp
new file mode 100644
index 00000000..24ceb8f4
--- /dev/null
+++ b/include/zfp/internal/array/view3.hpp
@@ -0,0 +1,584 @@
+#ifndef ZFP_VIEW3_HPP
+#define ZFP_VIEW3_HPP
+
+// 3D array views
+
+namespace zfp {
+namespace internal {
+namespace dim3 {
+
+// abstract view of 3D array (base class)
+template <class Container>
+class preview {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // rate in bits per value
+  double rate() const { return array->rate(); }
+
+  // dimensions of (sub)array
+  size_t size() const { return nx * ny * nz; }
+
+  // local to global array indices
+  size_t global_x(size_t i) const { return x + i; }
+  size_t global_y(size_t j) const { return y + j; }
+  size_t global_z(size_t k) const { return z + k; }
+
+protected:
+  // construction and assignment--perform shallow copy of (sub)array
+  explicit preview(container_type* array) : array(array), x(0), y(0), z(0), nx(array->size_x()), ny(array->size_y()), nz(array->size_z()) {}
+  explicit preview(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : array(array), x(x), y(y), z(z), nx(nx), ny(ny), nz(nz) {}
+  preview& operator=(container_type* a)
+  {
+    array = a;
+    x = y = z = 0;
+    nx = a->nx;
+    ny = a->ny;
+    nz = a->nz;
+    return *this;
+  }
+
+  // global index bounds for iterators
+  size_t min_x() const { return x; }
+  size_t max_x() const { return x + nx; }
+  size_t min_y() const { return y; }
+  size_t max_y() const { return y + ny; }
+  size_t min_z() const { return z; }
+  size_t max_z() const { return z + nz; }
+
+  container_type* array; // underlying container
+  size_t x, y, z;        // offset into array
+  size_t nx, ny, nz;     // dimensions of subarray
+};
+
+// generic read-only view into a rectangular subset of a 3D array
+template <class Container>
+class const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<const_view> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<const_view> const_pointer;
+  typedef typename zfp::internal::dim3::const_iterator<const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  const_view(container_type* array) : preview<Container>(array) {}
+  const_view(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : preview<Container>(array, x, y, z, nx, ny, nz) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+
+  // (i, j, k) inspector
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z); }
+  const_iterator cend() const { return const_iterator(this, x, y, z + nz); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<const_view>;
+  friend class zfp::internal::dim3::const_pointer<const_view>;
+  friend class zfp::internal::dim3::const_iterator<const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::min_y;
+  using preview<Container>::max_y;
+  using preview<Container>::min_z;
+  using preview<Container>::max_z;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z) const { return array->get(x, y, z); }
+};
+
+// generic read-write view into a rectangular subset of a 3D array
+template <class Container>
+class view : public const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<view> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<view> const_pointer;
+  typedef typename zfp::internal::dim3::const_iterator<view> const_iterator;
+  typedef typename zfp::internal::dim3::reference<view> reference;
+  typedef typename zfp::internal::dim3::pointer<view> pointer;
+  typedef typename zfp::internal::dim3::iterator<view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  view(container_type* array) : const_view<Container>(array) {}
+  view(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : const_view<Container>(array, x, y, z, nx, ny, nz) {}
+
+  // (i, j, k) inspector
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k); }
+
+  // (i, j, k) mutator
+  reference operator()(size_t i, size_t j, size_t k) { return reference(this, x + i, y + j, z + k); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z); }
+  const_iterator cend() const { return const_iterator(this, x, y, z + nz); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x, y, z); }
+  iterator end() { return iterator(this, x, y, z + nz); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<view>;
+  friend class zfp::internal::dim3::const_pointer<view>;
+  friend class zfp::internal::dim3::const_iterator<view>;
+  friend class zfp::internal::dim3::reference<view>;
+  friend class zfp::internal::dim3::pointer<view>;
+  friend class zfp::internal::dim3::iterator<view>;
+
+  using const_view<Container>::min_x;
+  using const_view<Container>::max_x;
+  using const_view<Container>::min_y;
+  using const_view<Container>::max_y;
+  using const_view<Container>::min_z;
+  using const_view<Container>::max_z;
+  using const_view<Container>::get;
+  using const_view<Container>::array;
+  using const_view<Container>::x;
+  using const_view<Container>::y;
+  using const_view<Container>::z;
+  using const_view<Container>::nx;
+  using const_view<Container>::ny;
+  using const_view<Container>::nz;
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, value_type val) { array->set(x, y, z, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, value_type val) { array->add(x, y, z, val); }
+  void sub(size_t x, size_t y, size_t z, value_type val) { array->sub(x, y, z, val); }
+  void mul(size_t x, size_t y, size_t z, value_type val) { array->mul(x, y, z, val); }
+  void div(size_t x, size_t y, size_t z, value_type val) { array->div(x, y, z, val); }
+};
+
+// flat view of 3D array (operator[] returns scalar)
+template <class Container>
+class flat_view : public view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<flat_view> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<flat_view> const_pointer;
+  typedef typename zfp::internal::dim3::reference<flat_view> reference;
+  typedef typename zfp::internal::dim3::pointer<flat_view> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  flat_view(container_type* array) : view<Container>(array) {}
+  flat_view(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : view<Container>(array, x, y, z, nx, ny, nz) {}
+
+  // convert (i, j, k) index to flat index
+  size_t index(size_t i, size_t j, size_t k) const { return i + nx * (j + ny * k); }
+
+  // convert flat index to (i, j, k) index
+  void ijk(size_t& i, size_t& j, size_t& k, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny; index /= ny;
+    k = index;
+  }
+
+  // flat index [] inspector
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j, k;
+    ijk(i, j, k, index);
+    return const_reference(this, x + i, y + j, z + k);
+  }
+
+  // flat index [] mutator
+  reference operator[](size_t index)
+  {
+    size_t i, j, k;
+    ijk(i, j, k, index);
+    return reference(this, x + i, y + j, z + k);
+  }
+
+  // (i, j, k) inspector
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k); }
+
+  // (i, j, k) mutator
+  reference operator()(size_t i, size_t j, size_t k) { return reference(this, x + i, y + j, z + k); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<flat_view>;
+  friend class zfp::internal::dim3::const_pointer<flat_view>;
+  friend class zfp::internal::dim3::reference<flat_view>;
+  friend class zfp::internal::dim3::pointer<flat_view>;
+
+  using view<Container>::array;
+  using view<Container>::x;
+  using view<Container>::y;
+  using view<Container>::z;
+  using view<Container>::nx;
+  using view<Container>::ny;
+  using view<Container>::nz;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z) const { return array->get(x, y, z); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, value_type val) { array->set(x, y, z, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, value_type val) { array->add(x, y, z, val); }
+  void sub(size_t x, size_t y, size_t z, value_type val) { array->sub(x, y, z, val); }
+  void mul(size_t x, size_t y, size_t z, value_type val) { array->mul(x, y, z, val); }
+  void div(size_t x, size_t y, size_t z, value_type val) { array->div(x, y, z, val); }
+};
+
+// forward declaration of friends
+template <class Container> class nested_view1;
+template <class Container> class nested_view2;
+template <class Container> class nested_view3;
+
+// nested view into a 1D rectangular subset of a 3D array
+template <class Container>
+class nested_view1 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<nested_view1> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<nested_view1> const_pointer;
+  typedef typename zfp::internal::dim3::reference<nested_view1> reference;
+  typedef typename zfp::internal::dim3::pointer<nested_view1> pointer;
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+
+  // [i] inspector and mutator
+  const_reference operator[](size_t index) const { return const_reference(this, x + index, y, z); }
+  reference operator[](size_t index) { return reference(this, x + index, y, z); }
+
+  // (i) inspector and mutator
+  const_reference operator()(size_t i) const { return const_reference(this, x + i, y, z); }
+  reference operator()(size_t i) { return reference(this, x + i, y, z); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<nested_view1>;
+  friend class zfp::internal::dim3::const_pointer<nested_view1>;
+  friend class zfp::internal::dim3::reference<nested_view1>;
+  friend class zfp::internal::dim3::pointer<nested_view1>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+
+  // construction--perform shallow copy of (sub)array
+  friend class nested_view2<Container>;
+  explicit nested_view1(container_type* array) : preview<Container>(array) {}
+  explicit nested_view1(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : preview<Container>(array, x, y, z, nx, ny, nz) {}
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z) const { return array->get(x, y, z); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, value_type val) { array->set(x, y, z, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, value_type val) { array->add(x, y, z, val); }
+  void sub(size_t x, size_t y, size_t z, value_type val) { array->sub(x, y, z, val); }
+  void mul(size_t x, size_t y, size_t z, value_type val) { array->mul(x, y, z, val); }
+  void div(size_t x, size_t y, size_t z, value_type val) { array->div(x, y, z, val); }
+};
+
+// nested view into a 2D rectangular subset of a 3D array
+template <class Container>
+class nested_view2 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<nested_view2> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<nested_view2> const_pointer;
+  typedef typename zfp::internal::dim3::reference<nested_view2> reference;
+  typedef typename zfp::internal::dim3::pointer<nested_view2> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  nested_view2(container_type* array) : preview<Container>(array) {}
+  nested_view2(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : preview<Container>(array, x, y, z, nx, ny, nz) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // 1D view
+  nested_view1<Container> operator[](size_t index) const { return nested_view1<Container>(array, x, y + index, z, nx, 1, 1); }
+
+  // (i, j) inspector and mutator
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j, z); }
+  reference operator()(size_t i, size_t j) { return reference(this, x + i, y + j, z); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<nested_view2>;
+  friend class zfp::internal::dim3::const_pointer<nested_view2>;
+  friend class zfp::internal::dim3::reference<nested_view2>;
+  friend class zfp::internal::dim3::pointer<nested_view2>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z) const { return array->get(x, y, z); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, value_type val) { array->set(x, y, z, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, value_type val) { array->add(x, y, z, val); }
+  void sub(size_t x, size_t y, size_t z, value_type val) { array->sub(x, y, z, val); }
+  void mul(size_t x, size_t y, size_t z, value_type val) { array->mul(x, y, z, val); }
+  void div(size_t x, size_t y, size_t z, value_type val) { array->div(x, y, z, val); }
+};
+
+// nested view into a 3D rectangular subset of a 3D array
+template <class Container>
+class nested_view3 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<nested_view3> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<nested_view3> const_pointer;
+  typedef typename zfp::internal::dim3::reference<nested_view3> reference;
+  typedef typename zfp::internal::dim3::pointer<nested_view3> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  nested_view3(container_type* array) : preview<Container>(array) {}
+  nested_view3(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz) : preview<Container>(array, x, y, z, nx, ny, nz) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+
+  // 2D view
+  nested_view2<Container> operator[](size_t index) const { return nested_view2<Container>(array, x, y, z + index, nx, ny, 1); }
+
+  // (i, j, k) inspector and mutator
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k); }
+  reference operator()(size_t i, size_t j, size_t k) { return reference(this, x + i, y + j, z + k); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<nested_view3>;
+  friend class zfp::internal::dim3::const_pointer<nested_view3>;
+  friend class zfp::internal::dim3::reference<nested_view3>;
+  friend class zfp::internal::dim3::pointer<nested_view3>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z) const { return array->get(x, y, z); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, value_type val) { array->set(x, y, z, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, value_type val) { array->add(x, y, z, val); }
+  void sub(size_t x, size_t y, size_t z, value_type val) { array->sub(x, y, z, val); }
+  void mul(size_t x, size_t y, size_t z, value_type val) { array->mul(x, y, z, val); }
+  void div(size_t x, size_t y, size_t z, value_type val) { array->div(x, y, z, val); }
+};
+
+// thread-safe read-only view of 3D (sub)array with private cache
+template <class Container>
+class private_const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename container_type::store_type store_type;
+  typedef typename zfp::internal::dim3::const_reference<private_const_view> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<private_const_view> const_pointer;
+  typedef typename zfp::internal::dim3::const_iterator<private_const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_const_view(container_type* array, size_t cache_size = 0) :
+    preview<Container>(array),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+  private_const_view(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz, size_t cache_size = 0) :
+    preview<Container>(array, x, y, z, nx, ny, nz),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+
+  // destructor
+  ~private_const_view()
+  {
+    array->store.unreference();
+  }
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes) { cache.resize(bytes); }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // (i, j, k) inspector
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z); }
+  const_iterator cend() const { return const_iterator(this, x, y, z + nz); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<private_const_view>;
+  friend class zfp::internal::dim3::const_pointer<private_const_view>;
+  friend class zfp::internal::dim3::const_iterator<private_const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::min_y;
+  using preview<Container>::max_y;
+  using preview<Container>::min_z;
+  using preview<Container>::max_z;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z) const { return cache.get(x, y, z); }
+
+  BlockCache3<value_type, store_type> cache; // cache of decompressed blocks
+};
+
+// thread-safe read-write view of private 3D (sub)array
+template <class Container>
+class private_view : public private_const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim3::const_reference<private_view> const_reference;
+  typedef typename zfp::internal::dim3::const_pointer<private_view> const_pointer;
+  typedef typename zfp::internal::dim3::const_iterator<private_view> const_iterator;
+  typedef typename zfp::internal::dim3::reference<private_view> reference;
+  typedef typename zfp::internal::dim3::pointer<private_view> pointer;
+  typedef typename zfp::internal::dim3::iterator<private_view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_view(container_type* array, size_t cache_size = 0) : private_const_view<Container>(array, cache_size) {}
+  private_view(container_type* array, size_t x, size_t y, size_t z, size_t nx, size_t ny, size_t nz, size_t cache_size = 0) : private_const_view<Container>(array, x, y, z, nx, ny, nz, cache_size) {}
+
+  // partition view into count block-aligned pieces, with 0 <= index < count
+  void partition(size_t index, size_t count)
+  {
+    if (nx > std::max(ny, nz))
+      partition(x, nx, index, count);
+    else if (ny > std::max(nx, nz))
+      partition(y, ny, index, count);
+    else
+      partition(z, nz, index, count);
+  }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // (i, j, k) inspector
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k); }
+
+  // (i, j, k) mutator
+  reference operator()(size_t i, size_t j, size_t k) { return reference(this, x + i, y + j, z + k); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z); }
+  const_iterator cend() const { return const_iterator(this, x, y, z + nz); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x, y, z); }
+  iterator end() { return iterator(this, x, y, z + nz); }
+
+protected:
+  friend class zfp::internal::dim3::const_handle<private_view>;
+  friend class zfp::internal::dim3::const_pointer<private_view>;
+  friend class zfp::internal::dim3::const_iterator<private_view>;
+  friend class zfp::internal::dim3::reference<private_view>;
+  friend class zfp::internal::dim3::pointer<private_view>;
+  friend class zfp::internal::dim3::iterator<private_view>;
+
+  using private_const_view<Container>::min_x;
+  using private_const_view<Container>::max_x;
+  using private_const_view<Container>::min_y;
+  using private_const_view<Container>::max_y;
+  using private_const_view<Container>::min_z;
+  using private_const_view<Container>::max_z;
+  using private_const_view<Container>::get;
+  using private_const_view<Container>::array;
+  using private_const_view<Container>::x;
+  using private_const_view<Container>::y;
+  using private_const_view<Container>::z;
+  using private_const_view<Container>::nx;
+  using private_const_view<Container>::ny;
+  using private_const_view<Container>::nz;
+  using private_const_view<Container>::cache;
+
+  // block-aligned partition of [offset, offset + size): index out of count
+  static void partition(size_t& offset, size_t& size, size_t index, size_t count)
+  {
+    size_t bmin = offset / 4;
+    size_t bmax = (offset + size + 3) / 4;
+    size_t xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
+    size_t xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
+    offset = xmin;
+    size = xmax - xmin;
+  }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, value_type val) { cache.set(x, y, z, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, value_type val) { cache.ref(x, y, z) += val; }
+  void sub(size_t x, size_t y, size_t z, value_type val) { cache.ref(x, y, z) -= val; }
+  void mul(size_t x, size_t y, size_t z, value_type val) { cache.ref(x, y, z) *= val; }
+  void div(size_t x, size_t y, size_t z, value_type val) { cache.ref(x, y, z) /= val; }
+};
+
+} // dim3
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/array/view4.hpp b/include/zfp/internal/array/view4.hpp
new file mode 100644
index 00000000..5888a305
--- /dev/null
+++ b/include/zfp/internal/array/view4.hpp
@@ -0,0 +1,679 @@
+#ifndef ZFP_VIEW4_HPP
+#define ZFP_VIEW4_HPP
+
+// 4D array views
+
+namespace zfp {
+namespace internal {
+namespace dim4 {
+
+// abstract view of 4D array (base class)
+template <class Container>
+class preview {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+
+  // rate in bits per value
+  double rate() const { return array->rate(); }
+
+  // dimensions of (sub)array
+  size_t size() const { return nx * ny * nz * nw; }
+
+  // local to global array indices
+  size_t global_x(size_t i) const { return x + i; }
+  size_t global_y(size_t j) const { return y + j; }
+  size_t global_z(size_t k) const { return z + k; }
+  size_t global_w(size_t l) const { return w + l; }
+
+protected:
+  // construction and assignment--perform shallow copy of (sub)array
+  explicit preview(container_type* array) : array(array), x(0), y(0), z(0), w(0), nx(array->size_x()), ny(array->size_y()), nz(array->size_z()), nw(array->size_w()) {}
+  explicit preview(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : array(array), x(x), y(y), z(z), w(w), nx(nx), ny(ny), nz(nz), nw(nw) {}
+  preview& operator=(container_type* a)
+  {
+    array = a;
+    x = y = z = w = 0;
+    nx = a->nx;
+    ny = a->ny;
+    nz = a->nz;
+    nw = a->nw;
+    return *this;
+  }
+
+  // global index bounds for iterators
+  size_t min_x() const { return x; }
+  size_t max_x() const { return x + nx; }
+  size_t min_y() const { return y; }
+  size_t max_y() const { return y + ny; }
+  size_t min_z() const { return z; }
+  size_t max_z() const { return z + nz; }
+  size_t min_w() const { return w; }
+  size_t max_w() const { return w + nw; }
+
+  container_type* array; // underlying container
+  size_t x, y, z, w;     // offset into array
+  size_t nx, ny, nz, nw; // dimensions of subarray
+};
+
+// generic read-only view into a rectangular subset of a 4D array
+template <class Container>
+class const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<const_view> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<const_view> const_pointer;
+  typedef typename zfp::internal::dim4::const_iterator<const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  const_view(container_type* array) : preview<Container>(array) {}
+  const_view(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : preview<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+  size_t size_w() const { return nw; }
+
+  // (i, j, k, l) inspector
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(this, x + i, y + j, z + k, w + l); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z, w); }
+  const_iterator cend() const { return const_iterator(this, x, y, z, w + nw); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<const_view>;
+  friend class zfp::internal::dim4::const_pointer<const_view>;
+  friend class zfp::internal::dim4::const_iterator<const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::min_y;
+  using preview<Container>::max_y;
+  using preview<Container>::min_z;
+  using preview<Container>::max_z;
+  using preview<Container>::min_w;
+  using preview<Container>::max_w;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::w;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+  using preview<Container>::nw;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return array->get(x, y, z, w); }
+};
+
+// generic read-write view into a rectangular subset of a 4D array
+template <class Container>
+class view : public const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<view> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<view> const_pointer;
+  typedef typename zfp::internal::dim4::const_iterator<view> const_iterator;
+  typedef typename zfp::internal::dim4::reference<view> reference;
+  typedef typename zfp::internal::dim4::pointer<view> pointer;
+  typedef typename zfp::internal::dim4::iterator<view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  view(container_type* array) : const_view<Container>(array) {}
+  view(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : const_view<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // (i, j, k, l) inspector
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(this, x + i, y + j, z + k, w + l); }
+
+  // (i, j, k, l) mutator
+  reference operator()(size_t i, size_t j, size_t k, size_t l) { return reference(this, x + i, y + j, z + k, w + l); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z, w); }
+  const_iterator cend() const { return const_iterator(this, x, y, z, w + nw); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x, y, z, w); }
+  iterator end() { return iterator(this, x, y, z, w + nw); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<view>;
+  friend class zfp::internal::dim4::const_pointer<view>;
+  friend class zfp::internal::dim4::const_iterator<view>;
+  friend class zfp::internal::dim4::reference<view>;
+  friend class zfp::internal::dim4::pointer<view>;
+  friend class zfp::internal::dim4::iterator<view>;
+
+  using const_view<Container>::min_x;
+  using const_view<Container>::max_x;
+  using const_view<Container>::min_y;
+  using const_view<Container>::max_y;
+  using const_view<Container>::min_z;
+  using const_view<Container>::max_z;
+  using const_view<Container>::min_w;
+  using const_view<Container>::max_w;
+  using const_view<Container>::get;
+  using const_view<Container>::array;
+  using const_view<Container>::x;
+  using const_view<Container>::y;
+  using const_view<Container>::z;
+  using const_view<Container>::w;
+  using const_view<Container>::nx;
+  using const_view<Container>::ny;
+  using const_view<Container>::nz;
+  using const_view<Container>::nw;
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { array->set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { array->add(x, y, z, w, val); }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { array->sub(x, y, z, w, val); }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { array->mul(x, y, z, w, val); }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { array->div(x, y, z, w, val); }
+};
+
+// flat view of 4D array (operator[] returns scalar)
+template <class Container>
+class flat_view : public view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<flat_view> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<flat_view> const_pointer;
+  typedef typename zfp::internal::dim4::reference<flat_view> reference;
+  typedef typename zfp::internal::dim4::pointer<flat_view> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  flat_view(container_type* array) : view<Container>(array) {}
+  flat_view(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : view<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // convert (i, j, k, l) index to flat index
+  size_t index(size_t i, size_t j, size_t k, size_t l) const { return i + nx * (j + ny * (k + nz * l)); }
+
+  // convert flat index to (i, j, k, l) index
+  void ijkl(size_t& i, size_t& j, size_t& k, size_t& l, size_t index) const
+  {
+    i = index % nx; index /= nx;
+    j = index % ny; index /= ny;
+    k = index % nz; index /= nz;
+    l = index;
+  }
+
+  // flat index [] inspector
+  const_reference operator[](size_t index) const
+  {
+    size_t i, j, k, l;
+    ijkl(i, j, k, l, index);
+    return const_reference(this, x + i, y + j, z + k, w + l);
+  }
+
+  // flat index [] mutator
+  reference operator[](size_t index)
+  {
+    size_t i, j, k, l;
+    ijkl(i, j, k, l, index);
+    return reference(this, x + i, y + j, z + k, w + l);
+  }
+
+  // (i, j, k, l) inspector
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(this, x + i, y + j, z + k, w + l); }
+
+  // (i, j, k, l) mutator
+  reference operator()(size_t i, size_t j, size_t k, size_t l) { return reference(this, x + i, y + j, z + k, w + l); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<flat_view>;
+  friend class zfp::internal::dim4::const_pointer<flat_view>;
+  friend class zfp::internal::dim4::reference<flat_view>;
+  friend class zfp::internal::dim4::pointer<flat_view>;
+
+  using view<Container>::array;
+  using view<Container>::x;
+  using view<Container>::y;
+  using view<Container>::z;
+  using view<Container>::w;
+  using view<Container>::nx;
+  using view<Container>::ny;
+  using view<Container>::nz;
+  using view<Container>::nw;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return array->get(x, y, z, w); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { array->set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { array->add(x, y, z, w, val); }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { array->sub(x, y, z, w, val); }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { array->mul(x, y, z, w, val); }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { array->div(x, y, z, w, val); }
+};
+
+// forward declaration of friends
+template <class Container> class nested_view1;
+template <class Container> class nested_view2;
+template <class Container> class nested_view3;
+template <class Container> class nested_view4;
+
+// nested view into a 1D rectangular subset of a 4D array
+template <class Container>
+class nested_view1 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<nested_view1> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<nested_view1> const_pointer;
+  typedef typename zfp::internal::dim4::reference<nested_view1> reference;
+  typedef typename zfp::internal::dim4::pointer<nested_view1> pointer;
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+
+  // [i] inspector and mutator
+  const_reference operator[](size_t index) const { return const_reference(this, x + index, y, z, w); }
+  reference operator[](size_t index) { return reference(this, x + index, y, z, w); }
+
+  // (i) inspector and mutator
+  const_reference operator()(size_t i) const { return const_reference(this, x + i, y, z, w); }
+  reference operator()(size_t i) { return reference(this, x + i, y, z, w); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<nested_view1>;
+  friend class zfp::internal::dim4::const_pointer<nested_view1>;
+  friend class zfp::internal::dim4::reference<nested_view1>;
+  friend class zfp::internal::dim4::pointer<nested_view1>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::w;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+  using preview<Container>::nw;
+
+  // construction--perform shallow copy of (sub)array
+  friend class nested_view2<Container>;
+  explicit nested_view1(container_type* array) : preview<Container>(array) {}
+  explicit nested_view1(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : preview<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return array->get(x, y, z, w); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { array->set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { array->add(x, y, z, w, val); }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { array->sub(x, y, z, w, val); }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { array->mul(x, y, z, w, val); }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { array->div(x, y, z, w, val); }
+};
+
+// nested view into a 2D rectangular subset of a 4D array
+template <class Container>
+class nested_view2 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<nested_view2> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<nested_view2> const_pointer;
+  typedef typename zfp::internal::dim4::reference<nested_view2> reference;
+  typedef typename zfp::internal::dim4::pointer<nested_view2> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  nested_view2(container_type* array) : preview<Container>(array) {}
+  nested_view2(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : preview<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+
+  // 1D view
+  nested_view1<Container> operator[](size_t index) const { return nested_view1<Container>(array, x, y + index, z, w, nx, 1, 1, 1); }
+
+  // (i, j) inspector and mutator
+  const_reference operator()(size_t i, size_t j) const { return const_reference(this, x + i, y + j, z, w); }
+  reference operator()(size_t i, size_t j) { return reference(this, x + i, y + j, z, w); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<nested_view2>;
+  friend class zfp::internal::dim4::const_pointer<nested_view2>;
+  friend class zfp::internal::dim4::reference<nested_view2>;
+  friend class zfp::internal::dim4::pointer<nested_view2>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::w;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+  using preview<Container>::nw;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return array->get(x, y, z, w); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { array->set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { array->add(x, y, z, w, val); }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { array->sub(x, y, z, w, val); }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { array->mul(x, y, z, w, val); }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { array->div(x, y, z, w, val); }
+};
+
+// nested view into a 3D rectangular subset of a 4D array
+template <class Container>
+class nested_view3 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<nested_view3> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<nested_view3> const_pointer;
+  typedef typename zfp::internal::dim4::reference<nested_view3> reference;
+  typedef typename zfp::internal::dim4::pointer<nested_view3> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  nested_view3(container_type* array) : preview<Container>(array) {}
+  nested_view3(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : preview<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+
+  // 2D view
+  nested_view2<Container> operator[](size_t index) const { return nested_view2<Container>(array, x, y, z + index, w, nx, ny, 1, 1); }
+
+  // (i, j, k) inspector and mutator
+  const_reference operator()(size_t i, size_t j, size_t k) const { return const_reference(this, x + i, y + j, z + k, w); }
+  reference operator()(size_t i, size_t j, size_t k) { return reference(this, x + i, y + j, z + k, w); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<nested_view3>;
+  friend class zfp::internal::dim4::const_pointer<nested_view3>;
+  friend class zfp::internal::dim4::reference<nested_view3>;
+  friend class zfp::internal::dim4::pointer<nested_view3>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::w;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+  using preview<Container>::nw;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return array->get(x, y, z, w); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { array->set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { array->add(x, y, z, w, val); }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { array->sub(x, y, z, w, val); }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { array->mul(x, y, z, w, val); }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { array->div(x, y, z, w, val); }
+};
+
+// nested view into a 4D rectangular subset of a 4D array
+template <class Container>
+class nested_view4 : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<nested_view4> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<nested_view4> const_pointer;
+  typedef typename zfp::internal::dim4::reference<nested_view4> reference;
+  typedef typename zfp::internal::dim4::pointer<nested_view4> pointer;
+
+  // construction--perform shallow copy of (sub)array
+  nested_view4(container_type* array) : preview<Container>(array) {}
+  nested_view4(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw) : preview<Container>(array, x, y, z, w, nx, ny, nz, nw) {}
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+  size_t size_w() const { return nw; }
+
+  // 3D view
+  nested_view3<Container> operator[](size_t index) const { return nested_view3<Container>(array, x, y, z, w + index, nx, ny, nz, 1); }
+
+  // (i, j, k, l) inspector and mutator
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(this, x + i, y + j, z + k, w + l); }
+  reference operator()(size_t i, size_t j, size_t k, size_t l) { return reference(this, x + i, y + j, z + k, w + l); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<nested_view4>;
+  friend class zfp::internal::dim4::const_pointer<nested_view4>;
+  friend class zfp::internal::dim4::reference<nested_view4>;
+  friend class zfp::internal::dim4::pointer<nested_view4>;
+
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::w;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+  using preview<Container>::nw;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return array->get(x, y, z, w); }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { array->set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { array->add(x, y, z, w, val); }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { array->sub(x, y, z, w, val); }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { array->mul(x, y, z, w, val); }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { array->div(x, y, z, w, val); }
+};
+
+// thread-safe read-only view of 4D (sub)array with private cache
+template <class Container>
+class private_const_view : public preview<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename container_type::store_type store_type;
+  typedef typename zfp::internal::dim4::const_reference<private_const_view> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<private_const_view> const_pointer;
+  typedef typename zfp::internal::dim4::const_iterator<private_const_view> const_iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_const_view(container_type* array, size_t cache_size = 0) :
+    preview<Container>(array),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+  private_const_view(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw, size_t cache_size = 0) :
+    preview<Container>(array, x, y, z, w, nx, ny, nz, nw),
+    cache(array->store, cache_size ? cache_size : array->cache.size())
+  {
+    array->store.reference();
+  }
+
+  // destructor
+  ~private_const_view()
+  {
+    array->store.unreference();
+  }
+
+  // dimensions of (sub)array
+  size_t size_x() const { return nx; }
+  size_t size_y() const { return ny; }
+  size_t size_z() const { return nz; }
+  size_t size_w() const { return nw; }
+
+  // cache size in number of bytes
+  size_t cache_size() const { return cache.size(); }
+
+  // set minimum cache size in bytes (array dimensions must be known)
+  void set_cache_size(size_t bytes) { cache.resize(bytes); }
+
+  // empty cache without compressing modified cached blocks
+  void clear_cache() const { cache.clear(); }
+
+  // (i, j, k) inspector
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(this, x + i, y + j, z + k, w + l); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z, w); }
+  const_iterator cend() const { return const_iterator(this, x, y, z, w + nw); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<private_const_view>;
+  friend class zfp::internal::dim4::const_pointer<private_const_view>;
+  friend class zfp::internal::dim4::const_iterator<private_const_view>;
+
+  using preview<Container>::min_x;
+  using preview<Container>::max_x;
+  using preview<Container>::min_y;
+  using preview<Container>::max_y;
+  using preview<Container>::min_z;
+  using preview<Container>::max_z;
+  using preview<Container>::min_w;
+  using preview<Container>::max_w;
+  using preview<Container>::array;
+  using preview<Container>::x;
+  using preview<Container>::y;
+  using preview<Container>::z;
+  using preview<Container>::w;
+  using preview<Container>::nx;
+  using preview<Container>::ny;
+  using preview<Container>::nz;
+  using preview<Container>::nw;
+
+  // inspector
+  value_type get(size_t x, size_t y, size_t z, size_t w) const { return cache.get(x, y, z, w); }
+
+  BlockCache4<value_type, store_type> cache; // cache of decompressed blocks
+};
+
+// thread-safe read-write view of private 4D (sub)array
+template <class Container>
+class private_view : public private_const_view<Container> {
+public:
+  typedef Container container_type;
+  typedef typename container_type::value_type value_type;
+  typedef typename zfp::internal::dim4::const_reference<private_view> const_reference;
+  typedef typename zfp::internal::dim4::const_pointer<private_view> const_pointer;
+  typedef typename zfp::internal::dim4::const_iterator<private_view> const_iterator;
+  typedef typename zfp::internal::dim4::reference<private_view> reference;
+  typedef typename zfp::internal::dim4::pointer<private_view> pointer;
+  typedef typename zfp::internal::dim4::iterator<private_view> iterator;
+
+  // construction--perform shallow copy of (sub)array
+  private_view(container_type* array, size_t cache_size = 0) : private_const_view<Container>(array, cache_size) {}
+  private_view(container_type* array, size_t x, size_t y, size_t z, size_t w, size_t nx, size_t ny, size_t nz, size_t nw, size_t cache_size = 0) : private_const_view<Container>(array, x, y, z, w, nx, ny, nz, nw, cache_size) {}
+
+  // partition view into count block-aligned pieces, with 0 <= index < count
+  void partition(size_t index, size_t count)
+  {
+    if (std::max(nx, ny) > std::max(nz, nw)) {
+      if (nx > ny)
+        partition(x, nx, index, count);
+      else
+        partition(y, ny, index, count);
+    }
+    else {
+      if (nz > nw)
+        partition(z, nz, index, count);
+      else
+        partition(w, nw, index, count);
+    }
+  }
+
+  // flush cache by compressing all modified cached blocks
+  void flush_cache() const { cache.flush(); }
+
+  // (i, j, k, l) inspector
+  const_reference operator()(size_t i, size_t j, size_t k, size_t l) const { return const_reference(this, x + i, y + j, z + k, w + l); }
+
+  // (i, j, k, l) mutator
+  reference operator()(size_t i, size_t j, size_t k, size_t l) { return reference(this, x + i, y + j, z + k, w + l); }
+
+  // random access iterators
+  const_iterator cbegin() const { return const_iterator(this, x, y, z, w); }
+  const_iterator cend() const { return const_iterator(this, x, y, z, w + nw); }
+  const_iterator begin() const { return cbegin(); }
+  const_iterator end() const { return cend(); }
+  iterator begin() { return iterator(this, x, y, z, w); }
+  iterator end() { return iterator(this, x, y, z, w + nw); }
+
+protected:
+  friend class zfp::internal::dim4::const_handle<private_view>;
+  friend class zfp::internal::dim4::const_pointer<private_view>;
+  friend class zfp::internal::dim4::const_iterator<private_view>;
+  friend class zfp::internal::dim4::reference<private_view>;
+  friend class zfp::internal::dim4::pointer<private_view>;
+  friend class zfp::internal::dim4::iterator<private_view>;
+
+  using private_const_view<Container>::min_x;
+  using private_const_view<Container>::max_x;
+  using private_const_view<Container>::min_y;
+  using private_const_view<Container>::max_y;
+  using private_const_view<Container>::min_z;
+  using private_const_view<Container>::max_z;
+  using private_const_view<Container>::min_w;
+  using private_const_view<Container>::max_w;
+  using private_const_view<Container>::get;
+  using private_const_view<Container>::array;
+  using private_const_view<Container>::x;
+  using private_const_view<Container>::y;
+  using private_const_view<Container>::z;
+  using private_const_view<Container>::w;
+  using private_const_view<Container>::nx;
+  using private_const_view<Container>::ny;
+  using private_const_view<Container>::nz;
+  using private_const_view<Container>::nw;
+  using private_const_view<Container>::cache;
+
+  // block-aligned partition of [offset, offset + size): index out of count
+  static void partition(size_t& offset, size_t& size, size_t index, size_t count)
+  {
+    size_t bmin = offset / 4;
+    size_t bmax = (offset + size + 3) / 4;
+    size_t xmin = std::max(offset +    0, 4 * (bmin + (bmax - bmin) * (index + 0) / count));
+    size_t xmax = std::min(offset + size, 4 * (bmin + (bmax - bmin) * (index + 1) / count));
+    offset = xmin;
+    size = xmax - xmin;
+  }
+
+  // mutator
+  void set(size_t x, size_t y, size_t z, size_t w, value_type val) { cache.set(x, y, z, w, val); }
+
+  // in-place updates
+  void add(size_t x, size_t y, size_t z, size_t w, value_type val) { cache.ref(x, y, z, w) += val; }
+  void sub(size_t x, size_t y, size_t z, size_t w, value_type val) { cache.ref(x, y, z, w) -= val; }
+  void mul(size_t x, size_t y, size_t z, size_t w, value_type val) { cache.ref(x, y, z, w) *= val; }
+  void div(size_t x, size_t y, size_t z, size_t w, value_type val) { cache.ref(x, y, z, w) /= val; }
+};
+
+} // dim4
+} // internal
+} // zfp
+
+#endif
diff --git a/include/zfp/internal/cfp/array1d.h b/include/zfp/internal/cfp/array1d.h
new file mode 100644
index 00000000..fb20d3a7
--- /dev/null
+++ b/include/zfp/internal/cfp/array1d.h
@@ -0,0 +1,141 @@
+#ifndef CFP_ARRAY_1D_H
+#define CFP_ARRAY_1D_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array1d;
+
+typedef struct {
+  cfp_array1d array;
+  size_t x;
+} cfp_ref1d;
+
+typedef struct {
+  cfp_ref1d reference;
+} cfp_ptr1d;
+
+typedef struct {
+  cfp_array1d array;
+  size_t x;
+} cfp_iter1d;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ref1d self);
+  void (*set)(cfp_ref1d self, double val);
+  cfp_ptr1d (*ptr)(cfp_ref1d self);
+  void (*copy)(cfp_ref1d self, const cfp_ref1d src);
+} cfp_ref1d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ptr1d self);
+  double (*get_at)(const cfp_ptr1d self, ptrdiff_t d);
+  void (*set)(cfp_ptr1d self, double val);
+  void (*set_at)(cfp_ptr1d self, ptrdiff_t d, double val);
+  cfp_ref1d (*ref)(cfp_ptr1d self);
+  cfp_ref1d (*ref_at)(cfp_ptr1d self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr1d lhs, const cfp_ptr1d rhs);
+  zfp_bool (*gt)(const cfp_ptr1d lhs, const cfp_ptr1d rhs);
+  zfp_bool (*leq)(const cfp_ptr1d lhs, const cfp_ptr1d rhs);
+  zfp_bool (*geq)(const cfp_ptr1d lhs, const cfp_ptr1d rhs);
+  zfp_bool (*eq)(const cfp_ptr1d lhs, const cfp_ptr1d rhs);
+  zfp_bool (*neq)(const cfp_ptr1d lhs, const cfp_ptr1d rhs);
+  ptrdiff_t (*distance)(const cfp_ptr1d first, const cfp_ptr1d last);
+  cfp_ptr1d (*next)(const cfp_ptr1d p, ptrdiff_t d);
+  cfp_ptr1d (*prev)(const cfp_ptr1d p, ptrdiff_t d);
+  cfp_ptr1d (*inc)(const cfp_ptr1d p);
+  cfp_ptr1d (*dec)(const cfp_ptr1d p);
+} cfp_ptr1d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_iter1d self);
+  double (*get_at)(const cfp_iter1d self, ptrdiff_t d);
+  void (*set)(cfp_iter1d self, double val);
+  void (*set_at)(cfp_iter1d self, ptrdiff_t d, double val);
+  cfp_ref1d (*ref)(cfp_iter1d self);
+  cfp_ref1d (*ref_at)(cfp_iter1d self, ptrdiff_t d);
+  cfp_ptr1d (*ptr)(cfp_iter1d self);
+  cfp_ptr1d (*ptr_at)(cfp_iter1d self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter1d self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter1d lhs, const cfp_iter1d rhs);
+  zfp_bool (*gt)(const cfp_iter1d lhs, const cfp_iter1d rhs);
+  zfp_bool (*leq)(const cfp_iter1d lhs, const cfp_iter1d rhs);
+  zfp_bool (*geq)(const cfp_iter1d lhs, const cfp_iter1d rhs);
+  zfp_bool (*eq)(const cfp_iter1d lhs, const cfp_iter1d rhs);
+  zfp_bool (*neq)(const cfp_iter1d lhs, const cfp_iter1d rhs);
+  ptrdiff_t (*distance)(const cfp_iter1d first, const cfp_iter1d last);
+  cfp_iter1d (*next)(const cfp_iter1d it, ptrdiff_t d);
+  cfp_iter1d (*prev)(const cfp_iter1d it, ptrdiff_t d);
+  cfp_iter1d (*inc)(const cfp_iter1d it);
+  cfp_iter1d (*dec)(const cfp_iter1d it);
+} cfp_iter1d_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array1d a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header1d_api;
+
+typedef struct {
+  cfp_array1d (*ctor_default)(void);
+  cfp_array1d (*ctor)(size_t n, double rate, const double* p, size_t cache_size);
+  cfp_array1d (*ctor_copy)(const cfp_array1d src);
+  cfp_array1d (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array1d self);
+
+  void (*deep_copy)(cfp_array1d self, const cfp_array1d src);
+
+  double (*rate)(const cfp_array1d self);
+  double (*set_rate)(cfp_array1d self, double rate);
+  size_t (*cache_size)(const cfp_array1d self);
+  void (*set_cache_size)(cfp_array1d self, size_t bytes);
+  void (*clear_cache)(const cfp_array1d self);
+  void (*flush_cache)(const cfp_array1d self);
+  size_t (*size_bytes)(const cfp_array1d self, uint mask);
+  size_t (*compressed_size)(const cfp_array1d self);
+  void* (*compressed_data)(const cfp_array1d self);
+  size_t (*size)(const cfp_array1d self);
+  void (*resize)(cfp_array1d self, size_t n, zfp_bool clear);
+
+  void (*get_array)(const cfp_array1d self, double* p);
+  void (*set_array)(cfp_array1d self, const double* p);
+  double (*get_flat)(const cfp_array1d self, size_t i);
+  void (*set_flat)(cfp_array1d self, size_t i, double val);
+  double (*get)(const cfp_array1d self, size_t i);
+  void (*set)(cfp_array1d self, size_t i, double val);
+
+  cfp_ref1d (*ref)(cfp_array1d self, size_t i);
+  cfp_ref1d (*ref_flat)(cfp_array1d self, size_t i);
+
+  cfp_ptr1d (*ptr)(cfp_array1d self, size_t i);
+  cfp_ptr1d (*ptr_flat)(cfp_array1d self, size_t i);
+
+  cfp_iter1d (*begin)(cfp_array1d self);
+  cfp_iter1d (*end)(cfp_array1d self);
+
+  cfp_ref1d_api reference;
+  cfp_ptr1d_api pointer;
+  cfp_iter1d_api iterator;
+  cfp_header1d_api header;
+} cfp_array1d_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array1f.h b/include/zfp/internal/cfp/array1f.h
new file mode 100644
index 00000000..6ca593d0
--- /dev/null
+++ b/include/zfp/internal/cfp/array1f.h
@@ -0,0 +1,141 @@
+#ifndef CFP_ARRAY_1F_H
+#define CFP_ARRAY_1F_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array1f;
+
+typedef struct {
+  cfp_array1f array;
+  size_t x;
+} cfp_ref1f;
+
+typedef struct {
+  cfp_ref1f reference;
+} cfp_ptr1f;
+
+typedef struct {
+  cfp_array1f array;
+  size_t x;
+} cfp_iter1f;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ref1f self);
+  void (*set)(cfp_ref1f self, float val);
+  cfp_ptr1f (*ptr)(cfp_ref1f self);
+  void (*copy)(cfp_ref1f self, const cfp_ref1f src);
+} cfp_ref1f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ptr1f self);
+  float (*get_at)(const cfp_ptr1f self, ptrdiff_t d);
+  void (*set)(cfp_ptr1f self, float val);
+  void (*set_at)(cfp_ptr1f self, ptrdiff_t d, float val);
+  cfp_ref1f (*ref)(cfp_ptr1f self);
+  cfp_ref1f (*ref_at)(cfp_ptr1f self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr1f lhs, const cfp_ptr1f rhs);
+  zfp_bool (*gt)(const cfp_ptr1f lhs, const cfp_ptr1f rhs);
+  zfp_bool (*leq)(const cfp_ptr1f lhs, const cfp_ptr1f rhs);
+  zfp_bool (*geq)(const cfp_ptr1f lhs, const cfp_ptr1f rhs);
+  zfp_bool (*eq)(const cfp_ptr1f lhs, const cfp_ptr1f rhs);
+  zfp_bool (*neq)(const cfp_ptr1f lhs, const cfp_ptr1f rhs);
+  ptrdiff_t (*distance)(const cfp_ptr1f first, const cfp_ptr1f last);
+  cfp_ptr1f (*next)(const cfp_ptr1f p, ptrdiff_t d);
+  cfp_ptr1f (*prev)(const cfp_ptr1f p, ptrdiff_t d);
+  cfp_ptr1f (*inc)(const cfp_ptr1f p);
+  cfp_ptr1f (*dec)(const cfp_ptr1f p);
+} cfp_ptr1f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_iter1f self);
+  float (*get_at)(const cfp_iter1f self, ptrdiff_t d);
+  void (*set)(cfp_iter1f self, float val);
+  void (*set_at)(cfp_iter1f self, ptrdiff_t d, float val);
+  cfp_ref1f (*ref)(cfp_iter1f self);
+  cfp_ref1f (*ref_at)(cfp_iter1f self, ptrdiff_t d);
+  cfp_ptr1f (*ptr)(cfp_iter1f self);
+  cfp_ptr1f (*ptr_at)(cfp_iter1f self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter1f self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter1f lhs, const cfp_iter1f rhs);
+  zfp_bool (*gt)(const cfp_iter1f lhs, const cfp_iter1f rhs);
+  zfp_bool (*leq)(const cfp_iter1f lhs, const cfp_iter1f rhs);
+  zfp_bool (*geq)(const cfp_iter1f lhs, const cfp_iter1f rhs);
+  zfp_bool (*eq)(const cfp_iter1f lhs, const cfp_iter1f rhs);
+  zfp_bool (*neq)(const cfp_iter1f lhs, const cfp_iter1f rhs);
+  ptrdiff_t (*distance)(const cfp_iter1f first, const cfp_iter1f last);
+  cfp_iter1f (*next)(const cfp_iter1f it, ptrdiff_t d);
+  cfp_iter1f (*prev)(const cfp_iter1f it, ptrdiff_t d);
+  cfp_iter1f (*inc)(const cfp_iter1f it);
+  cfp_iter1f (*dec)(const cfp_iter1f it);
+} cfp_iter1f_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array1f a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header1f_api;
+
+typedef struct {
+  cfp_array1f (*ctor_default)(void);
+  cfp_array1f (*ctor)(size_t n, double rate, const float* p, size_t cache_size);
+  cfp_array1f (*ctor_copy)(const cfp_array1f src);
+  cfp_array1f (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array1f self);
+
+  void (*deep_copy)(cfp_array1f self, const cfp_array1f src);
+
+  double (*rate)(const cfp_array1f self);
+  double (*set_rate)(cfp_array1f self, double rate);
+  size_t (*cache_size)(const cfp_array1f self);
+  void (*set_cache_size)(cfp_array1f self, size_t bytes);
+  void (*clear_cache)(const cfp_array1f self);
+  void (*flush_cache)(const cfp_array1f self);
+  size_t (*size_bytes)(const cfp_array1f self, uint mask);
+  size_t (*compressed_size)(const cfp_array1f self);
+  void* (*compressed_data)(const cfp_array1f self);
+  size_t (*size)(const cfp_array1f self);
+  void (*resize)(cfp_array1f self, size_t n, zfp_bool clear);
+
+  void (*get_array)(const cfp_array1f self, float* p);
+  void (*set_array)(cfp_array1f self, const float* p);
+  float (*get_flat)(const cfp_array1f self, size_t i);
+  void (*set_flat)(cfp_array1f self, size_t i, float val);
+  float (*get)(const cfp_array1f self, size_t i);
+  void (*set)(cfp_array1f self, size_t i, float val);
+
+  cfp_ref1f (*ref)(cfp_array1f self, size_t i);
+  cfp_ref1f (*ref_flat)(cfp_array1f self, size_t i);
+
+  cfp_ptr1f (*ptr)(cfp_array1f self, size_t i);
+  cfp_ptr1f (*ptr_flat)(cfp_array1f self, size_t i);
+
+  cfp_iter1f (*begin)(cfp_array1f self);
+  cfp_iter1f (*end)(cfp_array1f self);
+
+  cfp_ref1f_api reference;
+  cfp_ptr1f_api pointer;
+  cfp_iter1f_api iterator;
+  cfp_header1f_api header;
+} cfp_array1f_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array2d.h b/include/zfp/internal/cfp/array2d.h
new file mode 100644
index 00000000..b0e078af
--- /dev/null
+++ b/include/zfp/internal/cfp/array2d.h
@@ -0,0 +1,144 @@
+#ifndef CFP_ARRAY_2D_H
+#define CFP_ARRAY_2D_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array2d;
+
+typedef struct {
+  cfp_array2d array;
+  size_t x, y;
+} cfp_ref2d;
+
+typedef struct {
+  cfp_ref2d reference;
+} cfp_ptr2d;
+
+typedef struct {
+  cfp_array2d array;
+  size_t x, y;
+} cfp_iter2d;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ref2d self);
+  void (*set)(cfp_ref2d self, double val);
+  cfp_ptr2d (*ptr)(cfp_ref2d self);
+  void (*copy)(cfp_ref2d self, const cfp_ref2d src);
+} cfp_ref2d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ptr2d self);
+  double (*get_at)(const cfp_ptr2d self, ptrdiff_t d);
+  void (*set)(cfp_ptr2d self, double val);
+  void (*set_at)(cfp_ptr2d self, ptrdiff_t d, double val);
+  cfp_ref2d (*ref)(cfp_ptr2d self);
+  cfp_ref2d (*ref_at)(cfp_ptr2d self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr2d lhs, const cfp_ptr2d rhs);
+  zfp_bool (*gt)(const cfp_ptr2d lhs, const cfp_ptr2d rhs);
+  zfp_bool (*leq)(const cfp_ptr2d lhs, const cfp_ptr2d rhs);
+  zfp_bool (*geq)(const cfp_ptr2d lhs, const cfp_ptr2d rhs);
+  zfp_bool (*eq)(const cfp_ptr2d lhs, const cfp_ptr2d rhs);
+  zfp_bool (*neq)(const cfp_ptr2d lhs, const cfp_ptr2d rhs);
+  ptrdiff_t (*distance)(const cfp_ptr2d first, const cfp_ptr2d last);
+  cfp_ptr2d (*next)(const cfp_ptr2d p, ptrdiff_t d);
+  cfp_ptr2d (*prev)(const cfp_ptr2d p, ptrdiff_t d);
+  cfp_ptr2d (*inc)(const cfp_ptr2d p);
+  cfp_ptr2d (*dec)(const cfp_ptr2d p);
+} cfp_ptr2d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_iter2d self);
+  double (*get_at)(const cfp_iter2d self, ptrdiff_t d);
+  void (*set)(cfp_iter2d self, double value);
+  void (*set_at)(cfp_iter2d self, ptrdiff_t d, double value);
+  cfp_ref2d (*ref)(cfp_iter2d self);
+  cfp_ref2d (*ref_at)(cfp_iter2d self, ptrdiff_t d);
+  cfp_ptr2d (*ptr)(cfp_iter2d self);
+  cfp_ptr2d (*ptr_at)(cfp_iter2d self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter2d self);
+  size_t (*j)(const cfp_iter2d self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter2d lhs, const cfp_iter2d rhs);
+  zfp_bool (*gt)(const cfp_iter2d lhs, const cfp_iter2d rhs);
+  zfp_bool (*leq)(const cfp_iter2d lhs, const cfp_iter2d rhs);
+  zfp_bool (*geq)(const cfp_iter2d lhs, const cfp_iter2d rhs);
+  zfp_bool (*eq)(const cfp_iter2d lhs, const cfp_iter2d rhs);
+  zfp_bool (*neq)(const cfp_iter2d lhs, const cfp_iter2d rhs);
+  ptrdiff_t (*distance)(const cfp_iter2d fist, const cfp_iter2d last);
+  cfp_iter2d (*next)(const cfp_iter2d it, ptrdiff_t d);
+  cfp_iter2d (*prev)(const cfp_iter2d it, ptrdiff_t d);
+  cfp_iter2d (*inc)(const cfp_iter2d it);
+  cfp_iter2d (*dec)(const cfp_iter2d it);
+} cfp_iter2d_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array2d a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header2d_api;
+
+typedef struct {
+  cfp_array2d (*ctor_default)(void);
+  cfp_array2d (*ctor)(size_t nx, size_t ny, double rate, const double* p, size_t cache_size);
+  cfp_array2d (*ctor_copy)(const cfp_array2d src);
+  cfp_array2d (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array2d self);
+
+  void (*deep_copy)(cfp_array2d self, const cfp_array2d src);
+
+  double (*rate)(const cfp_array2d self);
+  double (*set_rate)(cfp_array2d self, double rate);
+  size_t (*cache_size)(const cfp_array2d self);
+  void (*set_cache_size)(cfp_array2d self, size_t bytes);
+  void (*clear_cache)(const cfp_array2d self);
+  void (*flush_cache)(const cfp_array2d self);
+  size_t (*size_bytes)(const cfp_array2d self, uint mask);
+  size_t (*compressed_size)(const cfp_array2d self);
+  void* (*compressed_data)(const cfp_array2d self);
+  size_t (*size)(const cfp_array2d self);
+  size_t (*size_x)(const cfp_array2d self);
+  size_t (*size_y)(const cfp_array2d self);
+  void (*resize)(cfp_array2d self, size_t nx, size_t ny, zfp_bool clear);
+
+  void (*get_array)(const cfp_array2d self, double* p);
+  void (*set_array)(cfp_array2d self, const double* p);
+  double (*get_flat)(const cfp_array2d self, size_t i);
+  void (*set_flat)(cfp_array2d self, size_t i, double val);
+  double (*get)(const cfp_array2d self, size_t i, size_t j);
+  void (*set)(cfp_array2d self, size_t i, size_t j, double val);
+
+  cfp_ref2d (*ref)(cfp_array2d self, size_t i, size_t j);
+  cfp_ref2d (*ref_flat)(cfp_array2d self, size_t i);
+
+  cfp_ptr2d (*ptr)(cfp_array2d self, size_t i, size_t j);
+  cfp_ptr2d (*ptr_flat)(cfp_array2d self, size_t i);
+
+  cfp_iter2d (*begin)(cfp_array2d self);
+  cfp_iter2d (*end)(cfp_array2d self);
+
+  cfp_ref2d_api reference;
+  cfp_ptr2d_api pointer;
+  cfp_iter2d_api iterator;
+  cfp_header2d_api header;
+} cfp_array2d_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array2f.h b/include/zfp/internal/cfp/array2f.h
new file mode 100644
index 00000000..0137b609
--- /dev/null
+++ b/include/zfp/internal/cfp/array2f.h
@@ -0,0 +1,144 @@
+#ifndef CFP_ARRAY_2F_H
+#define CFP_ARRAY_2F_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array2f;
+
+typedef struct {
+  cfp_array2f array;
+  size_t x, y;
+} cfp_ref2f;
+
+typedef struct {
+  cfp_ref2f reference;
+} cfp_ptr2f;
+
+typedef struct {
+  cfp_array2f array;
+  size_t x, y;
+} cfp_iter2f;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ref2f self);
+  void (*set)(cfp_ref2f self, float val);
+  cfp_ptr2f (*ptr)(cfp_ref2f self);
+  void (*copy)(cfp_ref2f self, const cfp_ref2f src);
+} cfp_ref2f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ptr2f self);
+  float (*get_at)(const cfp_ptr2f self, ptrdiff_t d);
+  void (*set)(cfp_ptr2f self, float val);
+  void (*set_at)(cfp_ptr2f self, ptrdiff_t d, float val);
+  cfp_ref2f (*ref)(cfp_ptr2f self);
+  cfp_ref2f (*ref_at)(cfp_ptr2f self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr2f lhs, const cfp_ptr2f rhs);
+  zfp_bool (*gt)(const cfp_ptr2f lhs, const cfp_ptr2f rhs);
+  zfp_bool (*leq)(const cfp_ptr2f lhs, const cfp_ptr2f rhs);
+  zfp_bool (*geq)(const cfp_ptr2f lhs, const cfp_ptr2f rhs);
+  zfp_bool (*eq)(const cfp_ptr2f lhs, const cfp_ptr2f rhs);
+  zfp_bool (*neq)(const cfp_ptr2f lhs, const cfp_ptr2f rhs);
+  ptrdiff_t (*distance)(const cfp_ptr2f first, const cfp_ptr2f last);
+  cfp_ptr2f (*next)(const cfp_ptr2f p, ptrdiff_t d);
+  cfp_ptr2f (*prev)(const cfp_ptr2f p, ptrdiff_t d);
+  cfp_ptr2f (*inc)(const cfp_ptr2f p);
+  cfp_ptr2f (*dec)(const cfp_ptr2f p);
+} cfp_ptr2f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_iter2f self);
+  float (*get_at)(const cfp_iter2f self, ptrdiff_t d);
+  void (*set)(cfp_iter2f self, float val);
+  void (*set_at)(cfp_iter2f self, ptrdiff_t d, float val);
+  cfp_ref2f (*ref)(cfp_iter2f self);
+  cfp_ref2f (*ref_at)(cfp_iter2f self, ptrdiff_t d);
+  cfp_ptr2f (*ptr)(cfp_iter2f self);
+  cfp_ptr2f (*ptr_at)(cfp_iter2f self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter2f self);
+  size_t (*j)(const cfp_iter2f self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter2f lhs, const cfp_iter2f rhs);
+  zfp_bool (*gt)(const cfp_iter2f lhs, const cfp_iter2f rhs);
+  zfp_bool (*leq)(const cfp_iter2f lhs, const cfp_iter2f rhs);
+  zfp_bool (*geq)(const cfp_iter2f lhs, const cfp_iter2f rhs);
+  zfp_bool (*eq)(const cfp_iter2f lhs, const cfp_iter2f rhs);
+  zfp_bool (*neq)(const cfp_iter2f lhs, const cfp_iter2f rhs);
+  ptrdiff_t (*distance)(const cfp_iter2f first, const cfp_iter2f last);
+  cfp_iter2f (*next)(const cfp_iter2f it, ptrdiff_t d);
+  cfp_iter2f (*prev)(const cfp_iter2f it, ptrdiff_t d);
+  cfp_iter2f (*inc)(const cfp_iter2f it);
+  cfp_iter2f (*dec)(const cfp_iter2f it);
+} cfp_iter2f_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array2f a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header2f_api;
+
+typedef struct {
+  cfp_array2f (*ctor_default)(void);
+  cfp_array2f (*ctor)(size_t nx, size_t ny, double rate, const float* p, size_t cache_size);
+  cfp_array2f (*ctor_copy)(const cfp_array2f src);
+  cfp_array2f (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array2f self);
+
+  void (*deep_copy)(cfp_array2f self, const cfp_array2f src);
+
+  double (*rate)(const cfp_array2f self);
+  double (*set_rate)(cfp_array2f self, double rate);
+  size_t (*cache_size)(const cfp_array2f self);
+  void (*set_cache_size)(cfp_array2f self, size_t bytes);
+  void (*clear_cache)(const cfp_array2f self);
+  void (*flush_cache)(const cfp_array2f self);
+  size_t (*size_bytes)(const cfp_array2f self, uint mask);
+  size_t (*compressed_size)(const cfp_array2f self);
+  void* (*compressed_data)(const cfp_array2f self);
+  size_t (*size)(const cfp_array2f self);
+  size_t (*size_x)(const cfp_array2f self);
+  size_t (*size_y)(const cfp_array2f self);
+  void (*resize)(cfp_array2f self, size_t nx, size_t ny, zfp_bool clear);
+
+  void (*get_array)(const cfp_array2f self, float* p);
+  void (*set_array)(cfp_array2f self, const float* p);
+  float (*get_flat)(const cfp_array2f self, size_t i);
+  void (*set_flat)(cfp_array2f self, size_t i, float val);
+  float (*get)(const cfp_array2f self, size_t i, size_t j);
+  void (*set)(cfp_array2f self, size_t i, size_t j, float val);
+
+  cfp_ref2f (*ref)(cfp_array2f self, size_t i, size_t j);
+  cfp_ref2f (*ref_flat)(cfp_array2f self, size_t i);
+
+  cfp_ptr2f (*ptr)(cfp_array2f self, size_t i, size_t j);
+  cfp_ptr2f (*ptr_flat)(cfp_array2f self, size_t i);
+
+  cfp_iter2f (*begin)(cfp_array2f self);
+  cfp_iter2f (*end)(cfp_array2f self);
+
+  cfp_ref2f_api reference;
+  cfp_ptr2f_api pointer;
+  cfp_iter2f_api iterator;
+  cfp_header2f_api header;
+} cfp_array2f_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array3d.h b/include/zfp/internal/cfp/array3d.h
new file mode 100644
index 00000000..9c4a654a
--- /dev/null
+++ b/include/zfp/internal/cfp/array3d.h
@@ -0,0 +1,146 @@
+#ifndef CFP_ARRAY_3D_H
+#define CFP_ARRAY_3D_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array3d;
+
+typedef struct {
+  cfp_array3d array;
+  size_t x, y, z;
+} cfp_ref3d;
+
+typedef struct {
+  cfp_ref3d reference;
+} cfp_ptr3d;
+
+typedef struct {
+  cfp_array3d array;
+  size_t x, y, z;
+} cfp_iter3d;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ref3d self);
+  void (*set)(cfp_ref3d self, double val);
+  cfp_ptr3d (*ptr)(cfp_ref3d self);
+  void (*copy)(cfp_ref3d self, const cfp_ref3d src);
+} cfp_ref3d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ptr3d self);
+  double (*get_at)(const cfp_ptr3d self, ptrdiff_t d);
+  void (*set)(cfp_ptr3d self, double val);
+  void (*set_at)(cfp_ptr3d self, ptrdiff_t d, double val);
+  cfp_ref3d (*ref)(cfp_ptr3d self);
+  cfp_ref3d (*ref_at)(cfp_ptr3d self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr3d lhs, const cfp_ptr3d rhs);
+  zfp_bool (*gt)(const cfp_ptr3d lhs, const cfp_ptr3d rhs);
+  zfp_bool (*leq)(const cfp_ptr3d lhs, const cfp_ptr3d rhs);
+  zfp_bool (*geq)(const cfp_ptr3d lhs, const cfp_ptr3d rhs);
+  zfp_bool (*eq)(const cfp_ptr3d lhs, const cfp_ptr3d rhs);
+  zfp_bool (*neq)(const cfp_ptr3d lhs, const cfp_ptr3d rhs);
+  ptrdiff_t (*distance)(const cfp_ptr3d first, const cfp_ptr3d last);
+  cfp_ptr3d (*next)(const cfp_ptr3d p, ptrdiff_t d);
+  cfp_ptr3d (*prev)(const cfp_ptr3d p, ptrdiff_t d);
+  cfp_ptr3d (*inc)(const cfp_ptr3d p);
+  cfp_ptr3d (*dec)(const cfp_ptr3d p);
+} cfp_ptr3d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_iter3d self);
+  double (*get_at)(const cfp_iter3d self, ptrdiff_t d);
+  void (*set)(cfp_iter3d self, double val);
+  void (*set_at)(cfp_iter3d self, ptrdiff_t d, double val);
+  cfp_ref3d (*ref)(cfp_iter3d self);
+  cfp_ref3d (*ref_at)(cfp_iter3d self, ptrdiff_t d);
+  cfp_ptr3d (*ptr)(cfp_iter3d self);
+  cfp_ptr3d (*ptr_at)(cfp_iter3d self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter3d self);
+  size_t (*j)(const cfp_iter3d self);
+  size_t (*k)(const cfp_iter3d self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter3d lhs, const cfp_iter3d rhs);
+  zfp_bool (*gt)(const cfp_iter3d lhs, const cfp_iter3d rhs);
+  zfp_bool (*leq)(const cfp_iter3d lhs, const cfp_iter3d rhs);
+  zfp_bool (*geq)(const cfp_iter3d lhs, const cfp_iter3d rhs);
+  zfp_bool (*eq)(const cfp_iter3d lhs, const cfp_iter3d rhs);
+  zfp_bool (*neq)(const cfp_iter3d lhs, const cfp_iter3d rhs);
+  ptrdiff_t (*distance)(const cfp_iter3d first, const cfp_iter3d last);
+  cfp_iter3d (*next)(const cfp_iter3d it, ptrdiff_t d);
+  cfp_iter3d (*prev)(const cfp_iter3d it, ptrdiff_t d);
+  cfp_iter3d (*inc)(const cfp_iter3d it);
+  cfp_iter3d (*dec)(const cfp_iter3d it);
+} cfp_iter3d_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array3d a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header3d_api;
+
+typedef struct {
+  cfp_array3d (*ctor_default)(void);
+  cfp_array3d (*ctor)(size_t nx, size_t ny, size_t nz, double rate, const double* p, size_t cache_size);
+  cfp_array3d (*ctor_copy)(const cfp_array3d src);
+  cfp_array3d (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array3d self);
+
+  void (*deep_copy)(cfp_array3d self, const cfp_array3d src);
+
+  double (*rate)(const cfp_array3d self);
+  double (*set_rate)(cfp_array3d self, double rate);
+  size_t (*cache_size)(const cfp_array3d self);
+  void (*set_cache_size)(cfp_array3d self, size_t bytes);
+  void (*clear_cache)(const cfp_array3d self);
+  void (*flush_cache)(const cfp_array3d self);
+  size_t (*size_bytes)(const cfp_array3d self, uint mask);
+  size_t (*compressed_size)(const cfp_array3d self);
+  void* (*compressed_data)(const cfp_array3d self);
+  size_t (*size)(const cfp_array3d self);
+  size_t (*size_x)(const cfp_array3d self);
+  size_t (*size_y)(const cfp_array3d self);
+  size_t (*size_z)(const cfp_array3d self);
+  void (*resize)(cfp_array3d self, size_t nx, size_t ny, size_t nz, zfp_bool clear);
+
+  void (*get_array)(const cfp_array3d self, double* p);
+  void (*set_array)(cfp_array3d self, const double* p);
+  double (*get_flat)(const cfp_array3d self, size_t i);
+  void (*set_flat)(cfp_array3d self, size_t i, double val);
+  double (*get)(const cfp_array3d self, size_t i, size_t j, size_t k);
+  void (*set)(cfp_array3d self, size_t i, size_t j, size_t k, double val);
+
+  cfp_ref3d (*ref)(cfp_array3d self, size_t i, size_t j, size_t k);
+  cfp_ref3d (*ref_flat)(cfp_array3d self, size_t i);
+
+  cfp_ptr3d (*ptr)(cfp_array3d self, size_t i, size_t j, size_t k);
+  cfp_ptr3d (*ptr_flat)(cfp_array3d self, size_t i);
+
+  cfp_iter3d (*begin)(cfp_array3d self);
+  cfp_iter3d (*end)(cfp_array3d self);
+
+  cfp_ref3d_api reference;
+  cfp_ptr3d_api pointer;
+  cfp_iter3d_api iterator;
+  cfp_header3d_api header;
+} cfp_array3d_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array3f.h b/include/zfp/internal/cfp/array3f.h
new file mode 100644
index 00000000..e0f3aba5
--- /dev/null
+++ b/include/zfp/internal/cfp/array3f.h
@@ -0,0 +1,146 @@
+#ifndef CFP_ARRAY_3F_H
+#define CFP_ARRAY_3F_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array3f;
+
+typedef struct {
+  cfp_array3f array;
+  size_t x, y, z;
+} cfp_ref3f;
+
+typedef struct {
+  cfp_ref3f reference;
+} cfp_ptr3f;
+
+typedef struct {
+  cfp_array3f array;
+  size_t x, y, z;
+} cfp_iter3f;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ref3f self);
+  void (*set)(cfp_ref3f self, float val);
+  cfp_ptr3f (*ptr)(cfp_ref3f self);
+  void (*copy)(cfp_ref3f self, const cfp_ref3f src);
+} cfp_ref3f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ptr3f self);
+  float (*get_at)(const cfp_ptr3f self, ptrdiff_t d);
+  void (*set)(cfp_ptr3f self, float val);
+  void (*set_at)(cfp_ptr3f self, ptrdiff_t d, float val);
+  cfp_ref3f (*ref)(cfp_ptr3f self);
+  cfp_ref3f (*ref_at)(cfp_ptr3f self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr3f lhs, const cfp_ptr3f rhs);
+  zfp_bool (*gt)(const cfp_ptr3f lhs, const cfp_ptr3f rhs);
+  zfp_bool (*leq)(const cfp_ptr3f lhs, const cfp_ptr3f rhs);
+  zfp_bool (*geq)(const cfp_ptr3f lhs, const cfp_ptr3f rhs);
+  zfp_bool (*eq)(const cfp_ptr3f lhs, const cfp_ptr3f rhs);
+  zfp_bool (*neq)(const cfp_ptr3f lhs, const cfp_ptr3f rhs);
+  ptrdiff_t (*distance)(const cfp_ptr3f first, const cfp_ptr3f last);
+  cfp_ptr3f (*next)(const cfp_ptr3f p, ptrdiff_t d);
+  cfp_ptr3f (*prev)(const cfp_ptr3f p, ptrdiff_t d);
+  cfp_ptr3f (*inc)(const cfp_ptr3f p);
+  cfp_ptr3f (*dec)(const cfp_ptr3f p);
+} cfp_ptr3f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_iter3f self);
+  float (*get_at)(const cfp_iter3f self, ptrdiff_t d);
+  void (*set)(cfp_iter3f self, float val);
+  void (*set_at)(cfp_iter3f self, ptrdiff_t d, float val);
+  cfp_ref3f (*ref)(cfp_iter3f self);
+  cfp_ref3f (*ref_at)(cfp_iter3f self, ptrdiff_t d);
+  cfp_ptr3f (*ptr)(cfp_iter3f self);
+  cfp_ptr3f (*ptr_at)(cfp_iter3f self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter3f self);
+  size_t (*j)(const cfp_iter3f self);
+  size_t (*k)(const cfp_iter3f self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter3f lhs, const cfp_iter3f rhs);
+  zfp_bool (*gt)(const cfp_iter3f lhs, const cfp_iter3f rhs);
+  zfp_bool (*leq)(const cfp_iter3f lhs, const cfp_iter3f rhs);
+  zfp_bool (*geq)(const cfp_iter3f lhs, const cfp_iter3f rhs);
+  zfp_bool (*eq)(const cfp_iter3f lhs, const cfp_iter3f rhs);
+  zfp_bool (*neq)(const cfp_iter3f lhs, const cfp_iter3f rhs);
+  ptrdiff_t (*distance)(const cfp_iter3f first, const cfp_iter3f last);
+  cfp_iter3f (*next)(const cfp_iter3f it, ptrdiff_t d);
+  cfp_iter3f (*prev)(const cfp_iter3f it, ptrdiff_t d);
+  cfp_iter3f (*inc)(const cfp_iter3f it);
+  cfp_iter3f (*dec)(const cfp_iter3f it);
+} cfp_iter3f_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array3f a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header3f_api;
+
+typedef struct {
+  cfp_array3f (*ctor_default)(void);
+  cfp_array3f (*ctor)(size_t nx, size_t ny, size_t nz, double rate, const float* p, size_t cache_size);
+  cfp_array3f (*ctor_copy)(const cfp_array3f src);
+  cfp_array3f (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array3f self);
+
+  void (*deep_copy)(cfp_array3f self, const cfp_array3f src);
+
+  double (*rate)(const cfp_array3f self);
+  double (*set_rate)(cfp_array3f self, double rate);
+  size_t (*cache_size)(const cfp_array3f self);
+  void (*set_cache_size)(cfp_array3f self, size_t bytes);
+  void (*clear_cache)(const cfp_array3f self);
+  void (*flush_cache)(const cfp_array3f self);
+  size_t (*size_bytes)(const cfp_array3f self, uint mask);
+  size_t (*compressed_size)(const cfp_array3f self);
+  void* (*compressed_data)(const cfp_array3f self);
+  size_t (*size)(const cfp_array3f self);
+  size_t (*size_x)(const cfp_array3f self);
+  size_t (*size_y)(const cfp_array3f self);
+  size_t (*size_z)(const cfp_array3f self);
+  void (*resize)(cfp_array3f self, size_t nx, size_t ny, size_t nz, zfp_bool clear);
+
+  void (*get_array)(const cfp_array3f self, float* p);
+  void (*set_array)(cfp_array3f self, const float* p);
+  float (*get_flat)(const cfp_array3f self, size_t i);
+  void (*set_flat)(cfp_array3f self, size_t i, float val);
+  float (*get)(const cfp_array3f self, size_t i, size_t j, size_t k);
+  void (*set)(cfp_array3f self, size_t i, size_t j, size_t k, float val);
+
+  cfp_ref3f (*ref)(cfp_array3f self, size_t i, size_t j, size_t k);
+  cfp_ref3f (*ref_flat)(cfp_array3f self, size_t i);
+
+  cfp_ptr3f (*ptr)(cfp_array3f self, size_t i, size_t j, size_t k);
+  cfp_ptr3f (*ptr_flat)(cfp_array3f self, size_t i);
+
+  cfp_iter3f (*begin)(cfp_array3f self);
+  cfp_iter3f (*end)(cfp_array3f self);
+
+  cfp_ref3f_api reference;
+  cfp_ptr3f_api pointer;
+  cfp_iter3f_api iterator;
+  cfp_header3f_api header;
+} cfp_array3f_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array4d.h b/include/zfp/internal/cfp/array4d.h
new file mode 100644
index 00000000..44d1ecf0
--- /dev/null
+++ b/include/zfp/internal/cfp/array4d.h
@@ -0,0 +1,148 @@
+#ifndef CFP_ARRAY_4D_H
+#define CFP_ARRAY_4D_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array4d;
+
+typedef struct {
+  cfp_array4d array;
+  size_t x, y, z, w;
+} cfp_ref4d;
+
+typedef struct {
+  cfp_ref4d reference;
+} cfp_ptr4d;
+
+typedef struct {
+  cfp_array4d array;
+  size_t x, y, z, w;
+} cfp_iter4d;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ref4d self);
+  void (*set)(cfp_ref4d self, double val);
+  cfp_ptr4d (*ptr)(cfp_ref4d self);
+  void (*copy)(cfp_ref4d self, const cfp_ref4d src);
+} cfp_ref4d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_ptr4d self);
+  double (*get_at)(const cfp_ptr4d self, ptrdiff_t d);
+  void (*set)(cfp_ptr4d self, double val);
+  void (*set_at)(cfp_ptr4d self, ptrdiff_t d, double val);
+  cfp_ref4d (*ref)(cfp_ptr4d self);
+  cfp_ref4d (*ref_at)(cfp_ptr4d self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr4d lhs, const cfp_ptr4d rhs);
+  zfp_bool (*gt)(const cfp_ptr4d lhs, const cfp_ptr4d rhs);
+  zfp_bool (*leq)(const cfp_ptr4d lhs, const cfp_ptr4d rhs);
+  zfp_bool (*geq)(const cfp_ptr4d lhs, const cfp_ptr4d rhs);
+  zfp_bool (*eq)(const cfp_ptr4d lhs, const cfp_ptr4d rhs);
+  zfp_bool (*neq)(const cfp_ptr4d lhs, const cfp_ptr4d rhs);
+  ptrdiff_t (*distance)(const cfp_ptr4d first, const cfp_ptr4d last);
+  cfp_ptr4d (*next)(const cfp_ptr4d p, ptrdiff_t d);
+  cfp_ptr4d (*prev)(const cfp_ptr4d p, ptrdiff_t d);
+  cfp_ptr4d (*inc)(const cfp_ptr4d p);
+  cfp_ptr4d (*dec)(const cfp_ptr4d p);
+} cfp_ptr4d_api;
+
+typedef struct {
+  /* member functions */
+  double (*get)(const cfp_iter4d self);
+  double (*get_at)(const cfp_iter4d self, ptrdiff_t d);
+  void (*set)(cfp_iter4d self, double val);
+  void (*set_at)(cfp_iter4d self, ptrdiff_t d, double val);
+  cfp_ref4d (*ref)(cfp_iter4d self);
+  cfp_ref4d (*ref_at)(cfp_iter4d self, ptrdiff_t d);
+  cfp_ptr4d (*ptr)(cfp_iter4d self);
+  cfp_ptr4d (*ptr_at)(cfp_iter4d self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter4d self);
+  size_t (*j)(const cfp_iter4d self);
+  size_t (*k)(const cfp_iter4d self);
+  size_t (*l)(const cfp_iter4d self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter4d lhs, const cfp_iter4d rhs);
+  zfp_bool (*gt)(const cfp_iter4d lhs, const cfp_iter4d rhs);
+  zfp_bool (*leq)(const cfp_iter4d lhs, const cfp_iter4d rhs);
+  zfp_bool (*geq)(const cfp_iter4d lhs, const cfp_iter4d rhs);
+  zfp_bool (*eq)(const cfp_iter4d lhs, const cfp_iter4d rhs);
+  zfp_bool (*neq)(const cfp_iter4d lhs, const cfp_iter4d rhs);
+  ptrdiff_t (*distance)(const cfp_iter4d first, const cfp_iter4d last);
+  cfp_iter4d (*next)(const cfp_iter4d it, ptrdiff_t d);
+  cfp_iter4d (*prev)(const cfp_iter4d it, ptrdiff_t d);
+  cfp_iter4d (*inc)(const cfp_iter4d it);
+  cfp_iter4d (*dec)(const cfp_iter4d it);
+} cfp_iter4d_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array4d a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header4d_api;
+
+typedef struct {
+  cfp_array4d (*ctor_default)(void);
+  cfp_array4d (*ctor)(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const double* p, size_t cache_size);
+  cfp_array4d (*ctor_copy)(const cfp_array4d src);
+  cfp_array4d (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array4d self);
+
+  void (*deep_copy)(cfp_array4d self, const cfp_array4d src);
+
+  double (*rate)(const cfp_array4d self);
+  double (*set_rate)(cfp_array4d self, double rate);
+  size_t (*cache_size)(const cfp_array4d self);
+  void (*set_cache_size)(cfp_array4d self, size_t bytes);
+  void (*clear_cache)(const cfp_array4d self);
+  void (*flush_cache)(const cfp_array4d self);
+  size_t (*size_bytes)(const cfp_array4d self, uint mask);
+  size_t (*compressed_size)(const cfp_array4d self);
+  void* (*compressed_data)(const cfp_array4d self);
+  size_t (*size)(const cfp_array4d self);
+  size_t (*size_x)(const cfp_array4d self);
+  size_t (*size_y)(const cfp_array4d self);
+  size_t (*size_z)(const cfp_array4d self);
+  size_t (*size_w)(const cfp_array4d self);
+  void (*resize)(cfp_array4d self, size_t nx, size_t ny, size_t nz, size_t nw, zfp_bool clear);
+
+  void (*get_array)(const cfp_array4d self, double* p);
+  void (*set_array)(cfp_array4d self, const double* p);
+  double (*get_flat)(const cfp_array4d self, size_t i);
+  void (*set_flat)(cfp_array4d self, size_t i, double val);
+  double (*get)(const cfp_array4d self, size_t i, size_t j, size_t k, size_t l);
+  void (*set)(cfp_array4d self, size_t i, size_t j, size_t k, size_t l, double val);
+
+  cfp_ref4d (*ref)(cfp_array4d self, size_t i, size_t j, size_t k, size_t l);
+  cfp_ref4d (*ref_flat)(cfp_array4d self, size_t i);
+
+  cfp_ptr4d (*ptr)(cfp_array4d self, size_t i, size_t j, size_t k, size_t l);
+  cfp_ptr4d (*ptr_flat)(cfp_array4d self, size_t i);
+
+  cfp_iter4d (*begin)(cfp_array4d self);
+  cfp_iter4d (*end)(cfp_array4d self);
+
+  cfp_ref4d_api reference;
+  cfp_ptr4d_api pointer;
+  cfp_iter4d_api iterator;
+  cfp_header4d_api header;
+} cfp_array4d_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/array4f.h b/include/zfp/internal/cfp/array4f.h
new file mode 100644
index 00000000..b336dffe
--- /dev/null
+++ b/include/zfp/internal/cfp/array4f.h
@@ -0,0 +1,148 @@
+#ifndef CFP_ARRAY_4F_H
+#define CFP_ARRAY_4F_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef struct {
+  void* object;
+} cfp_array4f;
+
+typedef struct {
+  cfp_array4f array;
+  size_t x, y, z, w;
+} cfp_ref4f;
+
+typedef struct {
+  cfp_ref4f reference;
+} cfp_ptr4f;
+
+typedef struct {
+  cfp_array4f array;
+  size_t x, y, z, w;
+} cfp_iter4f;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ref4f self);
+  void (*set)(cfp_ref4f self, float val);
+  cfp_ptr4f (*ptr)(cfp_ref4f self);
+  void (*copy)(cfp_ref4f self, const cfp_ref4f src);
+} cfp_ref4f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_ptr4f self);
+  float (*get_at)(const cfp_ptr4f self, ptrdiff_t d);
+  void (*set)(cfp_ptr4f self, float val);
+  void (*set_at)(cfp_ptr4f self, ptrdiff_t d, float val);
+  cfp_ref4f (*ref)(cfp_ptr4f self);
+  cfp_ref4f (*ref_at)(cfp_ptr4f self, ptrdiff_t d);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_ptr4f lhs, const cfp_ptr4f rhs);
+  zfp_bool (*gt)(const cfp_ptr4f lhs, const cfp_ptr4f rhs);
+  zfp_bool (*leq)(const cfp_ptr4f lhs, const cfp_ptr4f rhs);
+  zfp_bool (*geq)(const cfp_ptr4f lhs, const cfp_ptr4f rhs);
+  zfp_bool (*eq)(const cfp_ptr4f lhs, const cfp_ptr4f rhs);
+  zfp_bool (*neq)(const cfp_ptr4f lhs, const cfp_ptr4f rhs);
+  ptrdiff_t (*distance)(const cfp_ptr4f first, const cfp_ptr4f last);
+  cfp_ptr4f (*next)(const cfp_ptr4f p, ptrdiff_t d);
+  cfp_ptr4f (*prev)(const cfp_ptr4f p, ptrdiff_t d);
+  cfp_ptr4f (*inc)(const cfp_ptr4f p);
+  cfp_ptr4f (*dec)(const cfp_ptr4f p);
+} cfp_ptr4f_api;
+
+typedef struct {
+  /* member functions */
+  float (*get)(const cfp_iter4f self);
+  float (*get_at)(const cfp_iter4f self, ptrdiff_t d);
+  void (*set)(cfp_iter4f self, float val);
+  void (*set_at)(cfp_iter4f self, ptrdiff_t d, float val);
+  cfp_ref4f (*ref)(cfp_iter4f self);
+  cfp_ref4f (*ref_at)(cfp_iter4f self, ptrdiff_t d);
+  cfp_ptr4f (*ptr)(cfp_iter4f self);
+  cfp_ptr4f (*ptr_at)(cfp_iter4f self, ptrdiff_t d);
+  size_t (*i)(const cfp_iter4f self);
+  size_t (*j)(const cfp_iter4f self);
+  size_t (*k)(const cfp_iter4f self);
+  size_t (*l)(const cfp_iter4f self);
+  /* non-member functions */
+  zfp_bool (*lt)(const cfp_iter4f lhs, const cfp_iter4f rhs);
+  zfp_bool (*gt)(const cfp_iter4f lhs, const cfp_iter4f rhs);
+  zfp_bool (*leq)(const cfp_iter4f lhs, const cfp_iter4f rhs);
+  zfp_bool (*geq)(const cfp_iter4f lhs, const cfp_iter4f rhs);
+  zfp_bool (*eq)(const cfp_iter4f lhs, const cfp_iter4f rhs);
+  zfp_bool (*neq)(const cfp_iter4f lhs, const cfp_iter4f rhs);
+  ptrdiff_t (*distance)(const cfp_iter4f first, const cfp_iter4f last);
+  cfp_iter4f (*next)(const cfp_iter4f it, ptrdiff_t d);
+  cfp_iter4f (*prev)(const cfp_iter4f it, ptrdiff_t d);
+  cfp_iter4f (*inc)(const cfp_iter4f it);
+  cfp_iter4f (*dec)(const cfp_iter4f it);
+} cfp_iter4f_api;
+
+typedef struct {
+  /* constructor/destructor */
+  cfp_header (*ctor)(const cfp_array4f a);
+  cfp_header (*ctor_buffer)(const void* data, size_t size);
+  void (*dtor)(cfp_header self);
+  /* array metadata */
+  zfp_type (*scalar_type)(const cfp_header self);
+  uint (*dimensionality)(const cfp_header self);
+  size_t (*size_x)(const cfp_header self);
+  size_t (*size_y)(const cfp_header self);
+  size_t (*size_z)(const cfp_header self);
+  size_t (*size_w)(const cfp_header self);
+  double (*rate)(const cfp_header self);
+  /* header payload: data pointer and byte size */
+  const void* (*data)(const cfp_header self);
+  size_t (*size_bytes)(const cfp_header self, uint mask);
+} cfp_header4f_api;
+
+typedef struct {
+  cfp_array4f (*ctor_default)(void);
+  cfp_array4f (*ctor)(size_t nx, size_t ny, size_t nz, size_t nw, double rate, const float* p, size_t cache_size);
+  cfp_array4f (*ctor_copy)(const cfp_array4f src);
+  cfp_array4f (*ctor_header)(const cfp_header h, const void* buffer, size_t buffer_size_bytes);
+  void (*dtor)(cfp_array4f self);
+
+  void (*deep_copy)(cfp_array4f self, const cfp_array4f src);
+
+  double (*rate)(const cfp_array4f self);
+  double (*set_rate)(cfp_array4f self, double rate);
+  size_t (*cache_size)(const cfp_array4f self);
+  void (*set_cache_size)(cfp_array4f self, size_t bytes);
+  void (*clear_cache)(const cfp_array4f self);
+  void (*flush_cache)(const cfp_array4f self);
+  size_t (*size_bytes)(const cfp_array4f self, uint mask);
+  size_t (*compressed_size)(const cfp_array4f self);
+  void* (*compressed_data)(const cfp_array4f self);
+  size_t (*size)(const cfp_array4f self);
+  size_t (*size_x)(const cfp_array4f self);
+  size_t (*size_y)(const cfp_array4f self);
+  size_t (*size_z)(const cfp_array4f self);
+  size_t (*size_w)(const cfp_array4f self);
+  void (*resize)(cfp_array4f self, size_t nx, size_t ny, size_t nz, size_t nw, zfp_bool clear);
+
+  void (*get_array)(const cfp_array4f self, float* p);
+  void (*set_array)(cfp_array4f self, const float* p);
+  float (*get_flat)(const cfp_array4f self, size_t i);
+  void (*set_flat)(cfp_array4f self, size_t i, float val);
+  float (*get)(const cfp_array4f self, size_t i, size_t j, size_t k, size_t l);
+  void (*set)(cfp_array4f self, size_t i, size_t j, size_t k, size_t l, float val);
+
+  cfp_ref4f (*ref)(cfp_array4f self, size_t i, size_t j, size_t k, size_t l);
+  cfp_ref4f (*ref_flat)(cfp_array4f self, size_t i);
+
+  cfp_ptr4f (*ptr)(cfp_array4f self, size_t i, size_t j, size_t k, size_t l);
+  cfp_ptr4f (*ptr_flat)(cfp_array4f self, size_t i);
+
+  cfp_iter4f (*begin)(cfp_array4f self);
+  cfp_iter4f (*end)(cfp_array4f self);
+
+  cfp_ref4f_api reference;
+  cfp_ptr4f_api pointer;
+  cfp_iter4f_api iterator;
+  cfp_header4f_api header;
+} cfp_array4f_api;
+
+#endif
diff --git a/include/zfp/internal/cfp/header.h b/include/zfp/internal/cfp/header.h
new file mode 100644
index 00000000..01d78ba5
--- /dev/null
+++ b/include/zfp/internal/cfp/header.h
@@ -0,0 +1,8 @@
+#ifndef CFP_HEADER_H
+#define CFP_HEADER_H
+
+typedef struct {
+  void* object;
+} cfp_header;
+
+#endif
diff --git a/include/zfp/internal/codec/genheader.hpp b/include/zfp/internal/codec/genheader.hpp
new file mode 100644
index 00000000..8beec088
--- /dev/null
+++ b/include/zfp/internal/codec/genheader.hpp
@@ -0,0 +1,76 @@
+// zfp::codec::generic_base::header
+class header : public zfp::array::header {
+public:
+  // serialization: construct header from array
+  header(const zfp::array& a) :
+    zfp::array::header(a),
+    bit_rate(static_cast<size_t>(a.rate()))
+  {
+    buffer[0] = magic;
+    buffer[1] = 0; // TODO: codec identifier (dimensionality, internal type)
+    buffer[2] = static_cast<uint64>(bit_rate);
+    buffer[3] = static_cast<uint64>(type);
+    buffer[4] = static_cast<uint64>(nx);
+    buffer[5] = static_cast<uint64>(ny);
+    buffer[6] = static_cast<uint64>(nz);
+    buffer[7] = static_cast<uint64>(nw);
+  }
+
+  // deserialization: construct header from memory buffer of optional size
+  header(const void* data, size_t bytes = 0) :
+    bit_rate(0)
+  {
+    // ensure byte size matches
+    if (bytes && bytes != byte_size)
+      throw zfp::exception("zfp generic header length does not match expectations");
+    else {
+      // copy and parse header
+      std::memcpy(buffer, data, byte_size);
+      if (buffer[0] != magic)
+        throw zfp::exception("zfp generic header is corrupt");
+      bit_rate = static_cast<size_t>(buffer[2]);
+      type = static_cast<zfp_type>(buffer[3]);
+      nx = static_cast<size_t>(buffer[4]);
+      ny = static_cast<size_t>(buffer[5]);
+      nz = static_cast<size_t>(buffer[6]);
+      nw = static_cast<size_t>(buffer[7]);
+    }
+  }
+
+  virtual ~header() {}
+
+  // rate in bits per value
+  double rate() const { return static_cast<double>(bit_rate); }
+
+  // header data
+  const void* data() const { return buffer; }
+
+  // header byte size
+  size_t size_bytes(uint mask = ZFP_DATA_HEADER) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this) - byte_size;
+    if (mask & ZFP_DATA_HEADER)
+      size += byte_size;
+    return size;
+  }
+
+protected:
+  // magic word
+  static const uint64 magic = UINT64C(0x000000008570667a);
+
+  // header size measured in bits, bytes, and 64-bit words
+  static const size_t word_size = 8;
+  static const size_t byte_size = word_size * sizeof(uint64);
+  static const size_t bit_size = byte_size * CHAR_BIT;
+
+  using zfp::array::header::type;
+  using zfp::array::header::nx;
+  using zfp::array::header::ny;
+  using zfp::array::header::nz;
+  using zfp::array::header::nw;
+
+  size_t bit_rate;          // array rate in bits per value
+  uint64 buffer[word_size]; // header data
+};
diff --git a/include/zfp/internal/codec/zfpheader.hpp b/include/zfp/internal/codec/zfpheader.hpp
new file mode 100644
index 00000000..6823e049
--- /dev/null
+++ b/include/zfp/internal/codec/zfpheader.hpp
@@ -0,0 +1,129 @@
+// zfp::codec::zfp_base::header
+class header : public zfp::array::header {
+public:
+  // serialization: construct header from array
+  header(const zfp::array& a) :
+    zfp::array::header(a),
+    bit_rate(a.rate())
+  {
+    std::string error;
+
+    // set up zfp stream and field for generating header
+    bitstream* stream = stream_open(buffer, sizeof(buffer));
+    zfp_stream* zfp = zfp_stream_open(stream);
+    bit_rate = zfp_stream_set_rate(zfp, bit_rate, type, dimensionality(), zfp_true);
+    if (zfp_stream_mode(zfp) > ZFP_MODE_SHORT_MAX)
+      error = "zfp serialization supports only short headers";
+    else {
+      // set up field
+      zfp_field* field = 0;
+      switch (dimensionality()) {
+        case 1:
+          field = zfp_field_1d(0, type, nx);
+          break;
+        case 2:
+          field = zfp_field_2d(0, type, nx, ny);
+          break;
+        case 3:
+          field = zfp_field_3d(0, type, nx, ny, nz);
+          break;
+        case 4:
+          field = zfp_field_4d(0, type, nx, ny, nz, nw);
+          break;
+        default:
+          error = "zfp serialization supports only 1D, 2D, 3D, and 4D arrays";
+          break;
+      }
+
+      if (field) {
+        // write header to buffer
+        size_t bits = zfp_write_header(zfp, field, ZFP_HEADER_FULL);
+        if (bits != bit_size)
+          error = "zfp header length does not match expected length";
+        zfp_stream_flush(zfp);
+        zfp_field_free(field);
+      }
+    }
+
+    zfp_stream_close(zfp);
+    stream_close(stream);
+
+    if (!error.empty())
+      throw zfp::exception(error);
+  }
+
+  // deserialization: construct header from memory buffer of optional size
+  header(const void* data, size_t bytes = 0) :
+    bit_rate(0)
+  {
+    std::string error;
+
+    // ensure byte size matches
+    if (bytes && bytes != byte_size)
+      error = "zfp header length does not match expectations";
+    else {
+      // copy and parse header
+      std::fill(buffer, buffer + word_size, 0);
+      std::memcpy(buffer, data, byte_size);
+      bitstream* stream = stream_open(buffer, sizeof(buffer));
+      zfp_stream* zfp = zfp_stream_open(stream);
+      zfp_field field;
+      size_t bits = zfp_read_header(zfp, &field, ZFP_HEADER_FULL);
+      if (!bits)
+        error = "zfp header is corrupt";
+      else if (bits != bit_size)
+        error = "zfp deserialization supports only short headers";
+      else if (zfp_stream_compression_mode(zfp) != zfp_mode_fixed_rate)
+        error = "zfp deserialization supports only fixed-rate mode";
+      else {
+        // success; initialize fields
+        type = field.type;
+        nx = field.nx;
+        ny = field.ny;
+        nz = field.nz;
+        nw = field.nw;
+        bit_rate = double(zfp->maxbits) / (1u << (2 * dimensionality()));
+      }
+      zfp_stream_close(zfp);
+      stream_close(stream);
+    }
+
+    // throw exception upon error
+    if (!error.empty())
+      throw zfp::exception(error);
+  }
+
+  virtual ~header() {}
+
+  // rate in bits per value
+  double rate() const { return bit_rate; }
+
+  // header data
+  const void* data() const { return buffer; }
+
+  // header byte size
+  size_t size_bytes(uint mask = ZFP_DATA_HEADER) const
+  {
+    size_t size = 0;
+    if (mask & ZFP_DATA_META)
+      size += sizeof(*this) - byte_size;
+    if (mask & ZFP_DATA_HEADER)
+      size += byte_size;
+    return size;
+  }
+
+protected:
+  // header size measured in bits, bytes, and 64-bit words
+  static const size_t bit_size = ZFP_MAGIC_BITS + ZFP_META_BITS + ZFP_MODE_SHORT_BITS;
+  static const size_t byte_size = (bit_size + CHAR_BIT - 1) / CHAR_BIT;
+  static const size_t word_size = (byte_size + sizeof(uint64) - 1) / sizeof(uint64);
+
+  using zfp::array::header::type;
+  using zfp::array::header::nx;
+  using zfp::array::header::ny;
+  using zfp::array::header::nz;
+  using zfp::array::header::nw;
+
+  double bit_rate;          // array rate in bits per value
+  uint64 buffer[word_size]; // header data
+};
diff --git a/src/inline/inline.h b/include/zfp/internal/zfp/inline.h
similarity index 77%
rename from src/inline/inline.h
rename to include/zfp/internal/zfp/inline.h
index e9ade3f1..bb10673b 100644
--- a/src/inline/inline.h
+++ b/include/zfp/internal/zfp/inline.h
@@ -1,5 +1,5 @@
-#ifndef INLINE_H
-#define INLINE_H
+#ifndef ZFP_INLINE_H
+#define ZFP_INLINE_H
 
 #ifndef inline_
   #if __STDC_VERSION__ >= 199901L
diff --git a/include/zfp/macros.h b/include/zfp/internal/zfp/macros.h
similarity index 100%
rename from include/zfp/macros.h
rename to include/zfp/internal/zfp/macros.h
diff --git a/include/zfp/system.h b/include/zfp/internal/zfp/system.h
similarity index 59%
rename from include/zfp/system.h
rename to include/zfp/internal/zfp/system.h
index 53941964..23c49360 100644
--- a/include/zfp/system.h
+++ b/include/zfp/internal/zfp/system.h
@@ -1,15 +1,16 @@
 #ifndef ZFP_SYSTEM_H
 #define ZFP_SYSTEM_H
 
-#if __STDC_VERSION__ >= 199901L
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+  /* C99: use restrict */
   #define restrict_ restrict
 #else
+  /* C89: no restrict keyword */
   #define restrict_
 #endif
 
 /* macros for exporting and importing symbols */
-#ifdef _MSC_VER
-  #define export_ __declspec(dllexport)
+#if defined(_MSC_VER) && defined(ZFP_SHARED_LIBS)
   /* export (import) symbols when ZFP_SOURCE is (is not) defined */
   #ifdef ZFP_SOURCE
     #ifdef __cplusplus
@@ -24,8 +25,7 @@
       #define extern_ extern     __declspec(dllimport)
     #endif
   #endif
-#else /* !_MSC_VER */
-  #define export_
+#else /* !(_MSC_VER && ZFP_SHARED_LIBS) */
   #ifdef __cplusplus
     #define extern_ extern "C"
   #else
@@ -33,13 +33,13 @@
   #endif
 #endif
 
-#ifdef __GNUC__
-  /* L1 cache line size for alignment purposes */
-  #ifndef ZFP_CACHE_LINE_SIZE
-    #define ZFP_CACHE_LINE_SIZE 0x100
-  #endif
-  #define align_(n) __attribute__((aligned(n)))
-  #define cache_align_(x) x align_(ZFP_CACHE_LINE_SIZE)
+/* L1 cache line size for alignment purposes */
+#ifndef ZFP_CACHE_LINE_SIZE
+  #define ZFP_CACHE_LINE_SIZE 0x100
+#endif
+/* ZFP_CACHE_LINE_SIZE=0 disables alignment */
+#if defined(__GNUC__) && ZFP_CACHE_LINE_SIZE
+  #define cache_align_(x) x __attribute__((aligned(ZFP_CACHE_LINE_SIZE)))
 #else
   #define cache_align_(x) x
 #endif
diff --git a/include/zfp/types.h b/include/zfp/internal/zfp/types.h
similarity index 77%
rename from include/zfp/types.h
rename to include/zfp/internal/zfp/types.h
index f57e1f89..5c882367 100644
--- a/include/zfp/types.h
+++ b/include/zfp/internal/zfp/types.h
@@ -4,8 +4,33 @@
 typedef unsigned char uchar;
 typedef unsigned short ushort;
 typedef unsigned int uint;
+typedef unsigned long ulong;
 
-#if __STDC_VERSION__ >= 199901L
+#if defined(__cplusplus) && __cplusplus >= 201103L
+  /* C++11: use standard integer types */
+  #include <cstdint>
+  #include <cinttypes>
+  #define INT64C(x) INT64_C(x)
+  #define UINT64C(x) UINT64_C(x)
+  #define INT64PRId PRId64
+  #define INT64PRIi PRIi64
+  #define UINT64PRIo PRIo64
+  #define UINT64PRIu PRIu64
+  #define UINT64PRIx PRIx64
+  #define INT64SCNd SCNd64
+  #define INT64SCNi SCNi64
+  #define UINT64SCNo SCNo64
+  #define UINT64SCNu SCNu64
+  #define UINT64SCNx SCNx64
+  typedef std::int8_t int8;
+  typedef std::uint8_t uint8;
+  typedef std::int16_t int16;
+  typedef std::uint16_t uint16;
+  typedef std::int32_t int32;
+  typedef std::uint32_t uint32;
+  typedef std::int64_t int64;
+  typedef std::uint64_t uint64;
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
   /* C99: use standard integer types */
   #include <stdint.h>
   #include <inttypes.h>
@@ -41,7 +66,7 @@ typedef unsigned int uint;
   typedef unsigned int uint32;
 
   /* determine 64-bit data model */
-  #if defined(_WIN32) || defined(_WIN64)
+  #if defined(_WIN32)
     /* assume ILP32 or LLP64 (MSVC, MinGW) */
     #define ZFP_LLP64 1
   #else
diff --git a/include/zfp/version.h b/include/zfp/version.h
new file mode 100644
index 00000000..1390bb36
--- /dev/null
+++ b/include/zfp/version.h
@@ -0,0 +1,49 @@
+#ifndef ZFP_VERSION_H
+#define ZFP_VERSION_H
+
+/* library version information */
+#define ZFP_VERSION_MAJOR 1   /* library major version number */
+#define ZFP_VERSION_MINOR 0   /* library minor version number */
+#define ZFP_VERSION_PATCH 1   /* library patch version number */
+#define ZFP_VERSION_TWEAK 0   /* library tweak version number */
+
+/* codec version number (see also zfp_codec_version) */
+#define ZFP_CODEC 5
+
+/* stringification */
+#define _zfp_str_(x) # x
+#define _zfp_str(x) _zfp_str_(x)
+
+/* macro for generating an integer version identifier */
+#define ZFP_MAKE_VERSION(major, minor, patch, tweak) \
+  (((major) << 12) + \
+   ((minor) << 8) + \
+   ((patch) << 4) + \
+   ((tweak) << 0))
+
+/* macros for generating a version string */
+#define ZFP_MAKE_VERSION_STRING(major, minor, patch) \
+  _zfp_str(major) "." \
+  _zfp_str(minor) "." \
+  _zfp_str(patch)
+
+#define ZFP_MAKE_FULLVERSION_STRING(major, minor, patch, tweak) \
+  _zfp_str(major) "." \
+  _zfp_str(minor) "." \
+  _zfp_str(patch) "." \
+  _zfp_str(tweak)
+
+/* library version number (see also zfp_library_version) */
+#define ZFP_VERSION \
+  ZFP_MAKE_VERSION(ZFP_VERSION_MAJOR, ZFP_VERSION_MINOR, ZFP_VERSION_PATCH, ZFP_VERSION_TWEAK)
+
+/* library version string (see also zfp_version_string) */
+#if ZFP_VERSION_TWEAK == 0
+  #define ZFP_VERSION_STRING \
+    ZFP_MAKE_VERSION_STRING(ZFP_VERSION_MAJOR, ZFP_VERSION_MINOR, ZFP_VERSION_PATCH)
+#else
+  #define ZFP_VERSION_STRING \
+    ZFP_MAKE_FULLVERSION_STRING(ZFP_VERSION_MAJOR, ZFP_VERSION_MINOR, ZFP_VERSION_PATCH, ZFP_VERSION_TWEAK)
+#endif
+
+#endif
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 905dc97e..9410ddd7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,5 +1,8 @@
+if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.27.0)
+    cmake_policy(SET CMP0148 OLD)
+endif ()
+
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/scikit-build-cmake)
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/eyescale-cmake)
 include(UseCython)
 include(FindPythonExtensions)
 include(FindNumPy)
@@ -7,20 +10,21 @@ include(FindNumPy)
 find_package(PythonInterp REQUIRED)
 find_package(PythonLibs REQUIRED)
 find_package(PythonExtensions REQUIRED)
-find_package(Cython REQUIRED)
+find_package(Cython 0.28 REQUIRED) # >= v0.28 required for const memoryview support
 find_package(NumPy REQUIRED)
 
 include_directories(${ZFP_SOURCE_DIR}/include)
-include_directories(${PYTHON_NUMPY_INCLUDE_DIR})
+include_directories(${NumPy_INCLUDE_DIR})
 
-add_cython_target(zfpy zfpy.pyx C)
+add_cython_target(zfpy zfpy.pyx C PY3)
 add_library(zfpy MODULE ${zfpy})
 target_link_libraries(zfpy zfp)
 python_extension_module(zfpy)
 
-# Build to the currrent binary dir to avoid conflicts with other libraries named zfp
-set(PYLIB_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/lib" CACHE PATH "Directory where zfp python library will be built")
+# Build to the current binary dir to avoid conflicts with other libraries named zfp
+set(PYLIB_BUILD_DIR "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Directory where zfp python library will be built")
 set_target_properties(zfpy PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PYLIB_BUILD_DIR})
+
 # Install to the typical python module directory
 set(python_install_lib_dir "lib/python${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}/site-packages/")
 install(TARGETS zfpy LIBRARY DESTINATION ${python_install_lib_dir})
diff --git a/python/eyescale-cmake/FindNumPy.cmake b/python/eyescale-cmake/FindNumPy.cmake
deleted file mode 100644
index 8aba4e69..00000000
--- a/python/eyescale-cmake/FindNumPy.cmake
+++ /dev/null
@@ -1,41 +0,0 @@
-# Find the Python NumPy package
-# PYTHON_NUMPY_INCLUDE_DIR
-# PYTHON_NUMPY_FOUND
-# will be set by this script
-
-# cmake_minimum_required(VERSION 2.6)
-
-if(NOT PYTHON_EXECUTABLE)
-  if(NumPy_FIND_QUIETLY)
-    find_package(PythonInterp QUIET)
-  else()
-    find_package(PythonInterp)
-    set(__numpy_out 1)
-  endif()
-endif()
-
-if (PYTHON_EXECUTABLE)
-  # Find out the include path
-  execute_process(
-    COMMAND "${PYTHON_EXECUTABLE}" -c
-            "from __future__ import print_function\ntry: import numpy; print(numpy.get_include(), end='')\nexcept:pass\n"
-            OUTPUT_VARIABLE __numpy_path)
-  # And the version
-  execute_process(
-    COMMAND "${PYTHON_EXECUTABLE}" -c
-            "from __future__ import print_function\ntry: import numpy; print(numpy.__version__, end='')\nexcept:pass\n"
-    OUTPUT_VARIABLE __numpy_version)
-elseif(__numpy_out)
-  message(STATUS "Python executable not found.")
-endif(PYTHON_EXECUTABLE)
-
-find_path(PYTHON_NUMPY_INCLUDE_DIR numpy/arrayobject.h
-  HINTS "${__numpy_path}" "${PYTHON_INCLUDE_PATH}" NO_DEFAULT_PATH)
-
-if(PYTHON_NUMPY_INCLUDE_DIR)
-  set(PYTHON_NUMPY_FOUND 1 CACHE INTERNAL "Python numpy found")
-endif(PYTHON_NUMPY_INCLUDE_DIR)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(NumPy REQUIRED_VARS PYTHON_NUMPY_INCLUDE_DIR
-                                        VERSION_VAR __numpy_version)
diff --git a/python/eyescale-cmake/LICENSE.txt b/python/eyescale-cmake/LICENSE.txt
deleted file mode 100644
index 307d54e5..00000000
--- a/python/eyescale-cmake/LICENSE.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Unless otherwise noted in the file, all files in this directory are
-licensed under the BSD license, reproduced below.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-- Neither the name of Eyescale Software GmbH nor the names of its
-  contributors may be used to endorse or promote products derived from this
-  software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
diff --git a/python/requirements.txt b/python/requirements.txt
index 7f361298..849962b2 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,3 +1,2 @@
 cython>=0.22
 numpy>=1.8.0
-
diff --git a/python/scikit-build-cmake/FindCython.cmake b/python/scikit-build-cmake/FindCython.cmake
index 3d58c4f0..c8de1311 100644
--- a/python/scikit-build-cmake/FindCython.cmake
+++ b/python/scikit-build-cmake/FindCython.cmake
@@ -13,7 +13,7 @@
 #  ``CYTHON_FOUND``
 #    true if the program was found
 #
-# For more information on the Cython project, see http://cython.org/.
+# For more information on the Cython project, see https://cython.org/.
 #
 # *Cython is a language that makes writing C extensions for the Python language
 # as easy as Python itself.*
@@ -36,9 +36,15 @@
 
 # Use the Cython executable that lives next to the Python executable
 # if it is a local installation.
-find_package(PythonInterp)
-if(PYTHONINTERP_FOUND)
+if(Python_EXECUTABLE)
+  get_filename_component(_python_path ${Python_EXECUTABLE} PATH)
+elseif(Python3_EXECUTABLE)
+  get_filename_component(_python_path ${Python3_EXECUTABLE} PATH)
+elseif(DEFINED PYTHON_EXECUTABLE)
   get_filename_component(_python_path ${PYTHON_EXECUTABLE} PATH)
+endif()
+
+if(DEFINED _python_path)
   find_program(CYTHON_EXECUTABLE
                NAMES cython cython.bat cython3
                HINTS ${_python_path}
@@ -56,7 +62,8 @@ if(CYTHON_EXECUTABLE)
                   OUTPUT_VARIABLE CYTHON_version_output
                   ERROR_VARIABLE CYTHON_version_error
                   RESULT_VARIABLE CYTHON_version_result
-                  OUTPUT_STRIP_TRAILING_WHITESPACE)
+                  OUTPUT_STRIP_TRAILING_WHITESPACE
+                  ERROR_STRIP_TRAILING_WHITESPACE)
 
   if(NOT ${CYTHON_version_result} EQUAL 0)
     set(_error_msg "Command \"${CYTHON_version_command}\" failed with")
@@ -65,6 +72,10 @@ if(CYTHON_EXECUTABLE)
   else()
     if("${CYTHON_version_output}" MATCHES "^[Cc]ython version ([^,]+)")
       set(CYTHON_VERSION "${CMAKE_MATCH_1}")
+    else()
+      if("${CYTHON_version_error}" MATCHES "^[Cc]ython version ([^,]+)")
+        set(CYTHON_VERSION "${CMAKE_MATCH_1}")
+      endif()
     endif()
   endif()
 endif()
diff --git a/python/scikit-build-cmake/FindNumPy.cmake b/python/scikit-build-cmake/FindNumPy.cmake
new file mode 100644
index 00000000..275ae1be
--- /dev/null
+++ b/python/scikit-build-cmake/FindNumPy.cmake
@@ -0,0 +1,104 @@
+#.rst:
+#
+# Find the include directory for ``numpy/arrayobject.h`` as well as other NumPy tools like ``conv-template`` and
+# ``from-template``.
+#
+# This module sets the following variables:
+#
+# ``NumPy_FOUND``
+#   True if NumPy was found.
+# ``NumPy_INCLUDE_DIRS``
+#   The include directories needed to use NumpPy.
+# ``NumPy_VERSION``
+#   The version of NumPy found.
+# ``NumPy_CONV_TEMPLATE_EXECUTABLE``
+#   Path to conv-template executable.
+# ``NumPy_FROM_TEMPLATE_EXECUTABLE``
+#   Path to from-template executable.
+#
+# The module will also explicitly define one cache variable:
+#
+# ``NumPy_INCLUDE_DIR``
+#
+# .. note::
+#
+#     To support NumPy < v0.15.0 where ``from-template`` and ``conv-template`` are not declared as entry points,
+#     the module emulates the behavior of standalone executables by setting the corresponding variables with the
+#     path the the python interpreter and the path to the associated script. For example:
+#     ::
+#
+#         set(NumPy_CONV_TEMPLATE_EXECUTABLE /path/to/python /path/to/site-packages/numpy/distutils/conv_template.py CACHE STRING "Command executing conv-template program" FORCE)
+#
+#         set(NumPy_FROM_TEMPLATE_EXECUTABLE /path/to/python /path/to/site-packages/numpy/distutils/from_template.py CACHE STRING "Command executing from-template program" FORCE)
+#
+
+if(NOT NumPy_FOUND)
+  set(_find_extra_args)
+  if(NumPy_FIND_REQUIRED)
+    list(APPEND _find_extra_args REQUIRED)
+  endif()
+  if(NumPy_FIND_QUIET)
+    list(APPEND _find_extra_args QUIET)
+  endif()
+
+  find_program(NumPy_CONV_TEMPLATE_EXECUTABLE NAMES conv-template)
+  find_program(NumPy_FROM_TEMPLATE_EXECUTABLE NAMES from-template)
+
+  if(PYTHON_EXECUTABLE)
+    execute_process(COMMAND "${PYTHON_EXECUTABLE}"
+      -c "import numpy; print(numpy.get_include())"
+      OUTPUT_VARIABLE _numpy_include_dir
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+      ERROR_QUIET
+      )
+    execute_process(COMMAND "${PYTHON_EXECUTABLE}"
+      -c "import numpy; print(numpy.__version__)"
+      OUTPUT_VARIABLE NumPy_VERSION
+      OUTPUT_STRIP_TRAILING_WHITESPACE
+      ERROR_QUIET
+      )
+
+    # XXX This is required to support NumPy < v0.15.0. See note in module documentation above.
+    if(NOT NumPy_CONV_TEMPLATE_EXECUTABLE)
+      execute_process(COMMAND "${PYTHON_EXECUTABLE}"
+        -c "from numpy.distutils import conv_template; print(conv_template.__file__)"
+        OUTPUT_VARIABLE _numpy_conv_template_file
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        ERROR_QUIET
+        )
+      set(NumPy_CONV_TEMPLATE_EXECUTABLE "${PYTHON_EXECUTABLE}" "${_numpy_conv_template_file}" CACHE STRING "Command executing conv-template program" FORCE)
+    endif()
+
+    # XXX This is required to support NumPy < v0.15.0. See note in module documentation above.
+    if(NOT NumPy_FROM_TEMPLATE_EXECUTABLE)
+      execute_process(COMMAND "${PYTHON_EXECUTABLE}"
+        -c "from numpy.distutils import from_template; print(from_template.__file__)"
+        OUTPUT_VARIABLE _numpy_from_template_file
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        ERROR_QUIET
+        )
+      set(NumPy_FROM_TEMPLATE_EXECUTABLE "${PYTHON_EXECUTABLE}" "${_numpy_from_template_file}" CACHE STRING "Command executing from-template program" FORCE)
+    endif()
+  endif()
+endif()
+
+find_path(NumPy_INCLUDE_DIR
+  numpy/arrayobject.h
+  PATHS "${_numpy_include_dir}" "${PYTHON_INCLUDE_DIR}"
+  PATH_SUFFIXES numpy/core/include
+  )
+
+set(NumPy_INCLUDE_DIRS ${NumPy_INCLUDE_DIR})
+
+# handle the QUIETLY and REQUIRED arguments and set NumPy_FOUND to TRUE if
+# all listed variables are TRUE
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NumPy
+                                  REQUIRED_VARS
+                                    NumPy_INCLUDE_DIR
+                                    NumPy_CONV_TEMPLATE_EXECUTABLE
+                                    NumPy_FROM_TEMPLATE_EXECUTABLE
+                                  VERSION_VAR NumPy_VERSION
+                                  )
+
+mark_as_advanced(NumPy_INCLUDE_DIR)
diff --git a/python/scikit-build-cmake/FindPythonExtensions.cmake b/python/scikit-build-cmake/FindPythonExtensions.cmake
index 9a3d76a0..59b30c2a 100644
--- a/python/scikit-build-cmake/FindPythonExtensions.cmake
+++ b/python/scikit-build-cmake/FindPythonExtensions.cmake
@@ -104,9 +104,10 @@
 #                         [HEADER_OUTPUT_VAR <HeaderOutputVar>]
 #                         [INCLUDE_DIR_OUTPUT_VAR <IncludeDirOutputVar>])
 #
+# without the extension is used as the logical name.  If only ``<Name>`` is
+#
 # If only ``<Name>`` is provided, and it ends in the ".h" extension, then it
 # is assumed to be the ``<HeaderFilename>``.  The filename of the header file
-# without the extension is used as the logical name.  If only ``<Name>`` is
 # provided, and it does not end in the ".h" extension, then the
 # ``<HeaderFilename>`` is assumed to ``<Name>.h``.
 #
@@ -167,8 +168,6 @@
 #
 # .. code-block:: cmake
 #
-#    find_package(PythonInterp)
-#    find_package(PythonLibs)
 #    find_package(PythonExtensions)
 #    find_package(Cython)
 #    find_package(Boost COMPONENTS python)
@@ -200,7 +199,7 @@
 #                            FORWARD_DECL_MODULES_VAR fdecl_module_list)
 #
 #    # module2 -- dynamically linked
-#    include_directories({Boost_INCLUDE_DIRS})
+#    include_directories(${Boost_INCLUDE_DIRS})
 #    add_library(module2 SHARED boost_module2.cxx)
 #    target_link_libraries(module2 ${Boost_LIBRARIES})
 #    python_extension_module(module2
@@ -209,7 +208,7 @@
 #
 #    # module3 -- loaded at runtime
 #    add_cython_target(module3a.pyx)
-#    add_library(module1 MODULE ${module3a} module3b.cxx)
+#    add_library(module3 MODULE ${module3a} module3b.cxx)
 #    target_link_libraries(module3 ${Boost_LIBRARIES})
 #    python_extension_module(module3
 #                            LINKED_MODULES_VAR linked_module_list
@@ -244,7 +243,14 @@
 #=============================================================================
 
 find_package(PythonInterp REQUIRED)
-find_package(PythonLibs)
+if(SKBUILD AND NOT PYTHON_LIBRARY)
+  set(PYTHON_LIBRARY "no-library-required")
+  find_package(PythonLibs)
+  unset(PYTHON_LIBRARY)
+  unset(PYTHON_LIBRARIES)
+else()
+  find_package(PythonLibs)
+endif()
 include(targetLinkLibrariesWithDynamicLookup)
 
 set(_command "
@@ -254,7 +260,6 @@ import os
 import os.path
 import site
 import sys
-import sysconfig
 
 result = None
 rel_result = None
@@ -288,7 +293,7 @@ sys.stdout.write(\";\".join((
     sys.prefix,
     result,
     rel_result,
-    sysconfig.get_config_var('SO')
+    distutils.sysconfig.get_config_var('EXT_SUFFIX')
 )))
 ")
 
@@ -332,16 +337,33 @@ function(_set_python_extension_symbol_visibility _target)
     set_target_properties(${_target} PROPERTIES LINK_FLAGS
         "/EXPORT:${_modinit_prefix}${_target}"
     )
-  elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
+  elseif("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    # Option to not run version script. See https://github.com/scikit-build/scikit-build/issues/668
+    if(NOT DEFINED SKBUILD_GNU_SKIP_LOCAL_SYMBOL_EXPORT_OVERRIDE)
+       set(SKBUILD_GNU_SKIP_LOCAL_SYMBOL_EXPORT_OVERRIDE FALSE)
+    endif()
     set(_script_path
       ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}-version-script.map
     )
-    file(WRITE ${_script_path}
-               "{global: ${_modinit_prefix}${_target}; local: *; };"
-    )
-    set_property(TARGET ${_target} APPEND_STRING PROPERTY LINK_FLAGS
-        " -Wl,--version-script=${_script_path}"
-    )
+    # Export all symbols. See https://github.com/scikit-build/scikit-build/issues/668
+    if(SKBUILD_GNU_SKIP_LOCAL_SYMBOL_EXPORT_OVERRIDE)
+      file(WRITE ${_script_path}
+                 "{global: ${_modinit_prefix}${_target};};"
+      )
+    else()
+      file(WRITE ${_script_path}
+                 "{global: ${_modinit_prefix}${_target}; local: *;};"
+      )
+    endif()
+    if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
+      set_property(TARGET ${_target} APPEND_STRING PROPERTY LINK_FLAGS
+        " -Wl,--version-script=\"${_script_path}\""
+      )
+    else()
+      set_property(TARGET ${_target} APPEND_STRING PROPERTY LINK_FLAGS
+        " -Wl,-M \"${_script_path}\""
+      )
+    endif()
   endif()
 endfunction()
 
@@ -423,14 +445,14 @@ function(python_extension_module _target)
     target_link_libraries_with_dynamic_lookup(${_target} ${PYTHON_LIBRARIES})
 
     if(_is_module_lib)
-      #_set_python_extension_symbol_visibility(${_altname})
+      _set_python_extension_symbol_visibility(${_target})
     endif()
   endif()
 endfunction()
 
 function(python_standalone_executable _target)
   include_directories(${PYTHON_INCLUDE_DIRS})
-  target_link_libraries(${_target} ${PYTHON_LIBRARIES})
+  target_link_libraries(${_target} ${SKBUILD_LINK_LIBRARIES_KEYWORD} ${PYTHON_LIBRARIES})
 endfunction()
 
 function(python_modules_header _name)
@@ -571,3 +593,5 @@ function(python_modules_header _name)
   endif()
   set(${_include_dirs_var} ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
 endfunction()
+
+include(UsePythonExtensions)
diff --git a/python/scikit-build-cmake/LICENSE b/python/scikit-build-cmake/LICENSE
index 73a9db0f..3a85dcff 100644
--- a/python/scikit-build-cmake/LICENSE
+++ b/python/scikit-build-cmake/LICENSE
@@ -1,6 +1,3 @@
-Unless otherwise noted in the file, all files in this directory are
-licensed under the MIT license, reproduced below.
-
 The MIT License (MIT)
 
 Copyright (c) 2014 Mike Sarahan
diff --git a/python/scikit-build-cmake/UseCython.cmake b/python/scikit-build-cmake/UseCython.cmake
index 9a596648..4e0fa790 100644
--- a/python/scikit-build-cmake/UseCython.cmake
+++ b/python/scikit-build-cmake/UseCython.cmake
@@ -43,7 +43,7 @@
 # ``PY2 | PY3``
 #   Force compilation using either Python-2 or Python-3 syntax and code
 #   semantics.  By default, Python-2 syntax and semantics are used if the major
-#   version of Python found is 2.  Otherwise, Python-3 syntax and sematics are
+#   version of Python found is 2.  Otherwise, Python-3 syntax and semantics are
 #   used.
 #
 # ``OUTPUT_VAR <OutputVar>``
@@ -56,13 +56,13 @@
 # ``<OutputVar>``
 #   The path of the generated source file.
 #
-# Cache variables that effect the behavior include:
+# Cache variables that affect the behavior include:
 #
 # ``CYTHON_ANNOTATE``
-#   whether to create an annotated .html file when compiling
+#   Whether to create an annotated .html file when compiling.
 #
 # ``CYTHON_FLAGS``
-#   additional flags to pass to the Cython compiler
+#   Additional flags to pass to the Cython compiler.
 #
 # Example usage
 # ^^^^^^^^^^^^^
@@ -101,9 +101,6 @@ set(CYTHON_ANNOTATE OFF
 set(CYTHON_FLAGS "" CACHE STRING
     "Extra flags to the cython compiler.")
 mark_as_advanced(CYTHON_ANNOTATE CYTHON_FLAGS)
-string(REGEX REPLACE " " ";" CYTHON_FLAGS_LIST "${CYTHON_FLAGS}")
-
-find_package(PythonLibs REQUIRED)
 
 set(CYTHON_CXX_EXTENSION "cxx")
 set(CYTHON_C_EXTENSION "c")
@@ -138,10 +135,12 @@ function(add_cython_target _name)
 
   set(_embed_main FALSE)
 
-  if("${PYTHONLIBS_VERSION_STRING}" MATCHES "^2.")
-    set(_input_syntax "PY2")
+  if("C" IN_LIST languages)
+    set(_output_syntax "C")
+  elseif("CXX" IN_LIST languages)
+    set(_output_syntax "CXX")
   else()
-    set(_input_syntax "PY3")
+    message(FATAL_ERROR "Either C or CXX must be enabled to use Cython")
   endif()
 
   if(_args_EMBED_MAIN)
@@ -156,6 +155,10 @@ function(add_cython_target _name)
     set(_output_syntax "CXX")
   endif()
 
+  # Doesn't select an input syntax - Cython
+  # defaults to 2 for Cython 2 and 3 for Cython 3
+  set(_input_syntax "default")
+
   if(_args_PY2)
     set(_input_syntax "PY2")
   endif()
@@ -201,15 +204,15 @@ function(add_cython_target _name)
   set(c_header_dependencies "")
 
   # Get the include directories.
-  get_source_file_property(pyx_location ${_source_file} LOCATION)
-  get_filename_component(pyx_path ${pyx_location} PATH)
   get_directory_property(cmake_include_directories
-                         DIRECTORY ${pyx_path}
+                         DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
                          INCLUDE_DIRECTORIES)
   list(APPEND cython_include_directories ${cmake_include_directories})
 
   # Determine dependencies.
   # Add the pxd file with the same basename as the given pyx file.
+  get_source_file_property(pyx_location ${_source_file} LOCATION)
+  get_filename_component(pyx_path ${pyx_location} PATH)
   get_filename_component(pyx_file_basename ${_source_file} NAME_WE)
   unset(corresponding_pxd_file CACHE)
   find_file(corresponding_pxd_file ${pyx_file_basename}.pxd
@@ -323,21 +326,11 @@ function(add_cython_target _name)
     set(annotate_arg "--annotate")
   endif()
 
-  set(no_docstrings_arg "")
-  set(embed_signature_arg "")
-  if(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
-    set(no_docstrings_arg "--no-docstrings")
-  else()
-    set(embed_signature_arg "-Xembedsignature=True")
-  endif()
-
   set(cython_debug_arg "")
-  set(embed_pos_arg "")
   set(line_directives_arg "")
   if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR
      CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
     set(cython_debug_arg "--gdb")
-    set(embed_pos_arg "--embed-positions")
     set(line_directives_arg "--line-directives")
   endif()
 
@@ -352,12 +345,13 @@ function(add_cython_target _name)
   list(REMOVE_DUPLICATES pxd_dependencies)
   list(REMOVE_DUPLICATES c_header_dependencies)
 
+  string(REGEX REPLACE " " ";" CYTHON_FLAGS_LIST "${CYTHON_FLAGS}")
+
   # Add the command to run the compiler.
   add_custom_command(OUTPUT ${generated_file}
                      COMMAND ${CYTHON_EXECUTABLE}
                      ARGS ${cxx_arg} ${include_directory_arg} ${py_version_arg}
-                          ${embed_arg} ${annotate_arg} ${no_docstrings_arg}
-                          ${cython_debug_arg} ${embed_pos_arg} ${embed_signature_arg}
+                          ${embed_arg} ${annotate_arg} ${cython_debug_arg}
                           ${line_directives_arg} ${CYTHON_FLAGS_LIST} ${pyx_location}
                           --output-file ${generated_file}
                      DEPENDS ${_source_file}
diff --git a/python/scikit-build-cmake/UsePythonExtensions.cmake b/python/scikit-build-cmake/UsePythonExtensions.cmake
new file mode 100644
index 00000000..c411e20c
--- /dev/null
+++ b/python/scikit-build-cmake/UsePythonExtensions.cmake
@@ -0,0 +1,320 @@
+#.rst:
+#
+# The following functions are defined:
+#
+# .. cmake:command:: add_python_library
+#
+# Add a library that contains a mix of C, C++, Fortran, Cython, F2PY, Template,
+# and Tempita sources. The required targets are automatically generated to
+# "lower" source files from their high-level representation to a file that the
+# compiler can accept.
+#
+#
+#   add_python_library(<Name>
+#                      SOURCES [source1 [source2 ...]]
+#                      [INCLUDE_DIRECTORIES [dir1 [dir2 ...]]
+#                      [LINK_LIBRARIES [lib1 [lib2 ...]]
+#                      [DEPENDS [source1 [source2 ...]]])
+#
+#
+# Example usage
+# ^^^^^^^^^^^^^
+#
+# .. code-block:: cmake
+#
+#   find_package(PythonExtensions)
+#
+#   file(GLOB arpack_sources ARPACK/SRC/*.f ARPACK/UTIL/*.f)
+#
+#    add_python_library(arpack_scipy
+#      SOURCES ${arpack_sources}
+#              ${g77_wrapper_sources}
+#      INCLUDE_DIRECTORIES ARPACK/SRC
+#    )
+#
+# .. cmake:command:: add_python_extension
+#
+# Add a extension that contains a mix of C, C++, Fortran, Cython, F2PY, Template,
+# and Tempita sources. The required targets are automatically generated to
+# "lower" source files from their high-level representation to a file that the
+# compiler can accept.
+#
+#
+#   add_python_extension(<Name>
+#                        SOURCES [source1 [source2 ...]]
+#                        [INCLUDE_DIRECTORIES [dir1 [dir2 ...]]
+#                        [LINK_LIBRARIES [lib1 [lib2 ...]]
+#                        [DEPENDS [source1 [source2 ...]]])
+#
+#
+# Example usage
+# ^^^^^^^^^^^^^
+#
+# .. code-block:: cmake
+#
+#   find_package(PythonExtensions)
+#
+#   file(GLOB arpack_sources ARPACK/SRC/*.f ARPACK/UTIL/*.f)
+#
+#    add_python_extension(arpack_scipy
+#      SOURCES ${arpack_sources}
+#              ${g77_wrapper_sources}
+#      INCLUDE_DIRECTORIES ARPACK/SRC
+#    )
+#
+#
+#=============================================================================
+# Copyright 2011 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#=============================================================================
+
+macro(_remove_whitespace _output)
+  string(REGEX REPLACE "[ \r\n\t]+" " " ${_output} "${${_output}}")
+  string(STRIP "${${_output}}" ${_output})
+endmacro()
+
+function(add_python_library _name)
+  set(options STATIC SHARED MODULE)
+  set(multiValueArgs SOURCES INCLUDE_DIRECTORIES LINK_LIBRARIES COMPILE_DEFINITIONS DEPENDS)
+  cmake_parse_arguments(_args "${options}" "" "${multiValueArgs}" ${ARGN} )
+
+  # Validate arguments to allow simpler debugging
+  if(NOT _args_SOURCES)
+    message(
+      FATAL_ERROR
+      "You have called add_python_library for library ${_name} without "
+      "any source files. This typically indicates a problem with "
+      "your CMakeLists.txt file"
+    )
+  endif()
+
+  # Initialize the list of sources
+  set(_sources ${_args_SOURCES})
+
+  # Generate targets for all *.src files
+  set(_processed )
+  foreach(_source IN LISTS _sources)
+    if(${_source} MATCHES ".pyf.src$" OR ${_source} MATCHES "\\.f\\.src$")
+      if(NOT NumPy_FOUND)
+        message(
+          FATAL_ERROR
+          "NumPy is required to process *.src Template files"
+        )
+      endif()
+      string(REGEX REPLACE "\\.[^.]*$" "" _source_we ${_source})
+      add_custom_command(
+        OUTPUT ${_source_we}
+        COMMAND ${NumPy_FROM_TEMPLATE_EXECUTABLE}
+                ${CMAKE_CURRENT_SOURCE_DIR}/${_source}
+                ${CMAKE_CURRENT_BINARY_DIR}/${_source_we}
+        DEPENDS ${_source} ${_args_DEPENDS}
+        COMMENT "Generating ${_source_we} from template ${_source}"
+      )
+      list(APPEND _processed ${_source_we})
+    elseif(${_source} MATCHES "\\.c\\.src$")
+      if(NOT NumPy_FOUND)
+        message(
+          FATAL_ERROR
+          "NumPy is required to process *.src Template files"
+        )
+      endif()
+      string(REGEX REPLACE "\\.[^.]*$" "" _source_we ${_source})
+      add_custom_command(
+        OUTPUT ${_source_we}
+        COMMAND ${NumPy_CONV_TEMPLATE_EXECUTABLE}
+                ${CMAKE_CURRENT_SOURCE_DIR}/${_source}
+                ${CMAKE_CURRENT_BINARY_DIR}/${_source_we}
+        DEPENDS ${_source} ${_args_DEPENDS}
+        COMMENT "Generating ${_source_we} from template ${_source}"
+      )
+      list(APPEND _processed ${_source_we})
+    elseif(${_source} MATCHES "\\.pyx\\.in$")
+      if(NOT Cython_FOUND)
+        message(
+          FATAL_ERROR
+          "Cython is required to process *.in Tempita files"
+        )
+      endif()
+      string(REGEX REPLACE "\\.[^.]*$" "" _source_we ${_source})
+      configure_file(
+          ${CMAKE_CURRENT_SOURCE_DIR}/${_source}
+          ${CMAKE_CURRENT_BINARY_DIR}/${_source}
+          COPYONLY
+      )
+      set(_tempita_command
+          "
+            import os;
+            import sys;
+            from Cython.Tempita import Template;
+            cwd = os.getcwd();
+            open(os.path.join(cwd, '${_source_we}'), 'w+')
+            .write(
+                Template.from_filename(os.path.join(cwd, '${_source}'),
+                encoding=sys.getdefaultencoding()).substitute()
+            )
+          "
+      )
+      _remove_whitespace(_tempita_command)
+      add_custom_command(
+        OUTPUT ${_source_we}
+        COMMAND ${PYTHON_EXECUTABLE} -c "${_tempita_command}"
+        DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${_source}"
+                ${_args_DEPENDS}
+      )
+      list(APPEND _processed ${_source_we})
+    else()
+      list(APPEND _processed  ${_source})
+    endif()
+  endforeach()
+  set(_sources ${_processed})
+
+  # If we're building a Python extension and we're given only Fortran sources,
+  # We can conclude that we need to generate a Fortran interface file
+  list(FILTER _processed EXCLUDE REGEX "(\\.f|\\.f90)$")
+  if(NOT _processed AND _args_MODULE)
+    if(NOT NumPy_FOUND)
+        message(
+          FATAL_ERROR
+          "NumPy is required to process *.pyf F2PY files"
+        )
+    endif()
+    set(_sources_abs )
+    foreach(_source IN LISTS _sources)
+      if(NOT IS_ABSOLUTE ${_source})
+        set(_source ${CMAKE_CURRENT_SOURCE_DIR}/${_source})
+      endif()
+      list(APPEND _sources_abs ${_source})
+    endforeach()
+    add_custom_command(
+        OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_name}.pyf
+        WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+        COMMAND ${F2PY_EXECUTABLE}
+        ARGS -h ${_name}.pyf -m ${_name} --overwrite-signature
+             ${_sources_abs}
+        DEPENDS ${_sources} ${_args_DEPENDS}
+        COMMENT "Generating ${_name} Fortran interface file"
+    )
+    list(APPEND _sources ${_name}.pyf)
+  endif()
+
+  # Are there F2PY targets?
+  set(_has_f2py_targets OFF)
+  set(_has_cython_targets OFF)
+
+  # Generate targets for all *.pyx and *.pyf files
+  set(_processed )
+  foreach(_source IN LISTS _sources)
+    if(${_source} MATCHES \\.pyx$)
+      if(NOT Cython_FOUND)
+        message(
+          FATAL_ERROR
+          "Cython is required to process *.pyx Cython files"
+        )
+      endif()
+      string(REGEX REPLACE "\\.[^.]*$" "" _pyx_target_name ${_source})
+      set(_has_cython_targets ON)
+      add_cython_target(${_pyx_target_name}
+          ${_source}
+          OUTPUT_VAR _pyx_target_output
+          DEPENDS ${_args_DEPENDS}
+      )
+      list(APPEND _processed ${_pyx_target_output})
+    elseif(${_source} MATCHES \\.pyf$)
+      if(NOT NumPy_FOUND)
+          message(
+            FATAL_ERROR
+            "NumPy is required to process *.pyf F2PY files"
+          )
+      endif()
+      string(REGEX REPLACE "\\.[^.]*$" "" _pyf_target_name ${_source})
+      set(_has_f2py_targets ON)
+      add_f2py_target(${_pyf_target_name}
+          ${_source}
+          OUTPUT_VAR _pyf_target_output
+          DEPENDS ${_args_DEPENDS}
+      )
+      list(APPEND _processed  ${_pyf_target_output})
+    else()
+      list(APPEND _processed ${_source})
+    endif()
+  endforeach()
+  set(_sources ${_processed})
+
+  if(_args_SHARED)
+    add_library(${_name} SHARED ${_sources})
+  elseif(_args_MODULE)
+    add_library(${_name} MODULE ${_sources})
+  else()
+    # Assume static
+    add_library(${_name} STATIC ${_sources})
+  endif()
+
+  target_include_directories(${_name} PRIVATE ${_args_INCLUDE_DIRECTORIES})
+  target_link_libraries(${_name} ${SKBUILD_LINK_LIBRARIES_KEYWORD} ${_args_LINK_LIBRARIES})
+
+  if(_has_f2py_targets)
+    target_include_directories(${_name} PRIVATE ${F2PY_INCLUDE_DIRS})
+    target_link_libraries(${_name} ${SKBUILD_LINK_LIBRARIES_KEYWORD} ${F2PY_LIBRARIES})
+  endif()
+
+  if(_args_COMPILE_DEFINITIONS)
+    target_compile_definitions(${_name} PRIVATE ${_args_COMPILE_DEFINITIONS})
+  endif()
+
+  if(_args_DEPENDS)
+    add_custom_target(
+      "${_name}_depends"
+      DEPENDS ${_args_DEPENDS}
+    )
+    add_dependencies(${_name} "${_name}_depends")
+  endif()
+endfunction()
+
+function(add_python_extension _name)
+  # FIXME: make sure that extensions with the same name can happen
+  # in multiple directories
+
+  set(multiValueArgs SOURCES INCLUDE_DIRECTORIES LINK_LIBRARIES COMPILE_DEFINITIONS DEPENDS)
+  cmake_parse_arguments(_args "" "" "${multiValueArgs}" ${ARGN} )
+
+  # Validate arguments to allow simpler debugging
+  if(NOT _args_SOURCES)
+    message(
+      FATAL_ERROR
+      "You have called add_python_extension for library ${_name} without "
+      "any source files. This typically indicates a problem with "
+      "your CMakeLists.txt file"
+    )
+  endif()
+
+  add_python_library(${_name} MODULE
+    SOURCES ${_args_SOURCES}
+    INCLUDE_DIRECTORIES ${_args_INCLUDE_DIRECTORIES}
+    LINK_LIBRARIES ${_args_LINK_LIBRARIES}
+    COMPILE_DEFINITIONS ${_args_COMPILE_DEFINITIONS}
+    DEPENDS ${_args_DEPENDS}
+  )
+  python_extension_module(${_name})
+
+  file(RELATIVE_PATH _relative "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}")
+  if(_relative STREQUAL "")
+    set(_relative ".")
+  endif()
+
+  install(
+    TARGETS ${_name}
+    LIBRARY DESTINATION "${_relative}"
+    RUNTIME DESTINATION "${_relative}"
+  )
+endfunction()
diff --git a/python/scikit-build-cmake/targetLinkLibrariesWithDynamicLookup.cmake b/python/scikit-build-cmake/targetLinkLibrariesWithDynamicLookup.cmake
index 020fc404..a583f42c 100644
--- a/python/scikit-build-cmake/targetLinkLibrariesWithDynamicLookup.cmake
+++ b/python/scikit-build-cmake/targetLinkLibrariesWithDynamicLookup.cmake
@@ -198,6 +198,28 @@ function(_test_weak_link_project
   set(osx_dynamic_lookup           "-undefined dynamic_lookup")
   set(no_flag                                               "")
 
+  if(CMAKE_CROSSCOMPILING)
+    set(link_flag_spec "no_flag")
+    set(link_flag "${${link_flag_spec}}")
+    set(test_skipping_reason "")
+    set(test_pass FALSE)
+
+    if(APPLE AND NOT CMAKE_CROSSCOMPILING_EMULATOR)
+      set(link_flag_spec "osx_dynamic_lookup")
+      set(link_flag "${${link_flag_spec}}")
+      set(test_skipping_reason " (Cross compiling without emulator on macOS)")
+      set(test_pass TRUE)
+    endif()
+
+    if(test_pass)
+      set(test_description "Weak Link ${target_type} -> ${lib_type} (${link_flag_spec})")
+      message(STATUS "Performing Test ${test_description} - Assuming Success${test_skipping_reason}")
+      set(${can_weak_link_var} ${test_pass} PARENT_SCOPE)
+      set(${project_name} ${link_flag} PARENT_SCOPE)
+      return()
+    endif()
+  endif()
+
   foreach(link_flag_spec gnu_ld_ignore osx_dynamic_lookup no_flag)
     set(link_flag "${${link_flag_spec}}")
 
@@ -248,7 +270,7 @@ function(_test_weak_link_project
 
     if(link_mod_lib)
       file(APPEND "${test_project_src_dir}/CMakeLists.txt" "
-        target_link_libraries(counter number)
+        target_link_libraries(counter ${SKBUILD_LINK_LIBRARIES_KEYWORD} number)
       ")
     elseif(NOT link_flag STREQUAL "")
       file(APPEND "${test_project_src_dir}/CMakeLists.txt" "
@@ -262,21 +284,21 @@ function(_test_weak_link_project
 
     if(link_exe_lib)
       file(APPEND "${test_project_src_dir}/CMakeLists.txt" "
-        target_link_libraries(main number)
+        target_link_libraries(main ${SKBUILD_LINK_LIBRARIES_KEYWORD} number)
       ")
     elseif(NOT link_flag STREQUAL "")
       file(APPEND "${test_project_src_dir}/CMakeLists.txt" "
-        target_link_libraries(main \"${link_flag}\")
+        target_link_libraries(main ${SKBUILD_LINK_LIBRARIES_KEYWORD} \"${link_flag}\")
       ")
     endif()
 
     if(link_exe_mod)
       file(APPEND "${test_project_src_dir}/CMakeLists.txt" "
-        target_link_libraries(main counter)
+        target_link_libraries(main ${SKBUILD_LINK_LIBRARIES_KEYWORD} counter)
       ")
     else()
       file(APPEND "${test_project_src_dir}/CMakeLists.txt" "
-        target_link_libraries(main \"${CMAKE_DL_LIBS}\")
+        target_link_libraries(main ${SKBUILD_LINK_LIBRARIES_KEYWORD} \"${CMAKE_DL_LIBS}\")
       ")
     endif()
 
@@ -362,7 +384,7 @@ function(_test_weak_link_project
       file(APPEND "${test_project_src_dir}/main.c" "
         goto done;
         error:
-          fprintf(stderr, \"Error occured:\\n    %s\\n\", dlerror());
+          fprintf(stderr, \"Error occurred:\\n    %s\\n\", dlerror());
           result = 1;
 
         done:
@@ -492,21 +514,15 @@ function(_check_dynamic_lookup
   endif()
 
   if(NOT DEFINED ${cache_var})
-    set(skip_test FALSE)
 
-   if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)
+    if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)
       set(skip_test TRUE)
     endif()
 
-    if(skip_test)
-      set(has_dynamic_lookup FALSE)
-      set(link_flags)
-    else()
-      _test_weak_link_project(${target_type}
-                              ${lib_type}
-                              has_dynamic_lookup
-                              link_flags)
-    endif()
+    _test_weak_link_project(${target_type}
+                            ${lib_type}
+                            has_dynamic_lookup
+                            link_flags)
 
     set(caveat " (when linking ${target_type} against ${lib_type})")
 
@@ -576,6 +592,6 @@ function(target_link_libraries_with_dynamic_lookup target)
 
   set(links "${link_items}" "${link_libs}")
   if(links)
-    target_link_libraries(${target} "${links}")
+    target_link_libraries(${target} ${SKBUILD_LINK_LIBRARIES_KEYWORD} "${links}")
   endif()
 endfunction()
diff --git a/python/zfpy.pxd b/python/zfpy.pxd
index f812aed6..c92c2983 100644
--- a/python/zfpy.pxd
+++ b/python/zfpy.pxd
@@ -1,11 +1,12 @@
 import cython
 cimport libc.stdint as stdint
+from libc.stddef cimport ptrdiff_t
 
-cdef extern from "bitstream.h":
+cdef extern from "zfp/bitstream.h":
     cdef struct bitstream:
         pass
-    bitstream* stream_open(void* data, size_t);
-    void stream_close(bitstream* stream);
+    bitstream* stream_open(void* data, size_t)
+    void stream_close(bitstream* stream)
 
 cdef extern from "zfp.h":
     # enums
@@ -21,51 +22,58 @@ cdef extern from "zfp.h":
         zfp_mode_expert          = 1,
         zfp_mode_fixed_rate      = 2,
         zfp_mode_fixed_precision = 3,
-        zfp_mode_fixed_accuracy  = 4
+        zfp_mode_fixed_accuracy  = 4,
+        zfp_mode_reversible      = 5
 
     # structs
     ctypedef struct zfp_field:
         zfp_type _type "type"
-        cython.uint nx, ny, nz, nw
-        int sx, sy, sz, sw
+        size_t nx, ny, nz, nw
+        ptrdiff_t sx, sy, sz, sw
         void* data
     ctypedef struct zfp_stream:
         pass
 
+    ctypedef int zfp_bool
+
     # include #define's
     cython.uint ZFP_HEADER_MAGIC
     cython.uint ZFP_HEADER_META
     cython.uint ZFP_HEADER_MODE
     cython.uint ZFP_HEADER_FULL
 
-    # function definitions
-    zfp_stream* zfp_stream_open(bitstream* stream);
-    void zfp_stream_close(zfp_stream* stream);
-    size_t zfp_stream_maximum_size(const zfp_stream* stream, const zfp_field* field);
-    void zfp_stream_set_bit_stream(zfp_stream* stream, bitstream* bs);
-    cython.uint zfp_stream_set_precision(zfp_stream* stream, cython.uint precision);
-    double zfp_stream_set_accuracy(zfp_stream* stream, double tolerance);
-    double zfp_stream_set_rate(zfp_stream* stream, double rate, zfp_type type, cython.uint dims, int wra);
-    void zfp_stream_set_reversible(zfp_stream* stream);
-    stdint.uint64_t zfp_stream_mode(const zfp_stream* zfp);
-    zfp_mode zfp_stream_set_mode(zfp_stream* stream, stdint.uint64_t mode);
-    zfp_field* zfp_field_alloc();
-    zfp_field* zfp_field_1d(void* pointer, zfp_type, cython.uint nx);
-    zfp_field* zfp_field_2d(void* pointer, zfp_type, cython.uint nx, cython.uint ny);
-    zfp_field* zfp_field_3d(void* pointer, zfp_type, cython.uint nx, cython.uint ny, cython.uint nz);
-    zfp_field* zfp_field_4d(void* pointer, zfp_type, cython.uint nx, cython.uint ny, cython.uint nz, cython.uint nw);
-    void zfp_field_set_stride_1d(zfp_field* field, int sx);
-    void zfp_field_set_stride_2d(zfp_field* field, int sx, int sy);
-    void zfp_field_set_stride_3d(zfp_field* field, int sx, int sy, int sz);
-    void zfp_field_set_stride_4d(zfp_field* field, int sx, int sy, int sz, int sw);
-    int zfp_field_stride(const zfp_field* field, int* stride)
-    void zfp_field_free(zfp_field* field);
-    zfp_type zfp_field_set_type(zfp_field* field, zfp_type type);
-    size_t zfp_compress(zfp_stream* stream, const zfp_field* field) nogil;
-    size_t zfp_decompress(zfp_stream* stream, zfp_field* field) nogil;
-    size_t zfp_write_header(zfp_stream* stream, const zfp_field* field, cython.uint mask);
-    size_t zfp_read_header(zfp_stream* stream, zfp_field* field, cython.uint mask);
-    void zfp_stream_rewind(zfp_stream* stream);
-    void zfp_field_set_pointer(zfp_field* field, void* pointer) nogil;
-
+    # function declarations
+    zfp_stream* zfp_stream_open(bitstream* stream)
+    void zfp_stream_close(zfp_stream* stream)
+    stdint.uint64_t zfp_stream_mode(const zfp_stream* zfp)
+    size_t zfp_stream_maximum_size(const zfp_stream* stream, const zfp_field* field)
+    void zfp_stream_rewind(zfp_stream* stream)
+    void zfp_stream_set_bit_stream(zfp_stream* stream, bitstream* bs)
+    void zfp_stream_set_reversible(zfp_stream* stream)
+    double zfp_stream_set_rate(zfp_stream* stream, double rate, zfp_type type, cython.uint dims, zfp_bool align)
+    cython.uint zfp_stream_set_precision(zfp_stream* stream, cython.uint precision)
+    double zfp_stream_set_accuracy(zfp_stream* stream, double tolerance)
+    zfp_mode zfp_stream_set_mode(zfp_stream* stream, stdint.uint64_t mode)
+    zfp_mode zfp_stream_compression_mode(zfp_stream* stream)
+    double zfp_stream_accuracy(zfp_stream* stream)
+    double zfp_stream_rate(zfp_stream* stream, cython.uint dims)
+    cython.uint zfp_stream_precision(const zfp_stream* stream)
+    zfp_field* zfp_field_alloc()
+    zfp_field* zfp_field_1d(void* pointer, zfp_type, size_t nx)
+    zfp_field* zfp_field_2d(void* pointer, zfp_type, size_t nx, size_t ny)
+    zfp_field* zfp_field_3d(void* pointer, zfp_type, size_t nx, size_t ny, size_t nz)
+    zfp_field* zfp_field_4d(void* pointer, zfp_type, size_t nx, size_t ny, size_t nz, size_t nw)
+    void zfp_field_free(zfp_field* field)
+    zfp_bool zfp_field_stride(const zfp_field* field, ptrdiff_t* stride)
+    void zfp_field_set_pointer(zfp_field* field, void* pointer) nogil
+    zfp_type zfp_field_set_type(zfp_field* field, zfp_type type)
+    void zfp_field_set_stride_1d(zfp_field* field, ptrdiff_t sx)
+    void zfp_field_set_stride_2d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy)
+    void zfp_field_set_stride_3d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
+    void zfp_field_set_stride_4d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
+    size_t zfp_compress(zfp_stream* stream, const zfp_field* field) nogil
+    size_t zfp_decompress(zfp_stream* stream, zfp_field* field) nogil
+    size_t zfp_write_header(zfp_stream* stream, const zfp_field* field, cython.uint mask)
+    size_t zfp_read_header(zfp_stream* stream, zfp_field* field, cython.uint mask)
+    void zfp_stream_params(zfp_stream* stream, cython.uint* minbits, cython.uint* maxbits, cython.uint* maxprec, int* minexp);
 cdef gen_padded_int_list(orig_array, pad=*, length=*)
diff --git a/python/zfpy.pyx b/python/zfpy.pyx
index 1f38697f..0b7a19e1 100644
--- a/python/zfpy.pyx
+++ b/python/zfpy.pyx
@@ -4,8 +4,7 @@ import functools
 import cython
 from libc.stdlib cimport malloc, free
 from cython cimport view
-from cpython cimport array
-import array
+from libc.stdint cimport uint8_t
 
 import itertools
 if sys.version_info[0] == 2:
@@ -51,11 +50,11 @@ cpdef dtype_to_ztype(dtype):
 
 cpdef dtype_to_format(dtype):
     # format characters detailed here:
-    # https://docs.python.org/2/library/array.html#module-array
+    # https://docs.python.org/3/library/array.html
     if dtype == np.int32:
         return 'i' # signed int
     elif dtype == np.int64:
-        return 'l' # signed long
+        return 'q' # signed long long
     elif dtype == np.float32:
         return 'f' # float
     elif dtype == np.float64:
@@ -75,7 +74,21 @@ cpdef ztype_to_dtype(zfp_type ztype):
     except KeyError:
         raise ValueError("Unsupported zfp_type {}".format(ztype))
 
-cdef zfp_field* _init_field(np.ndarray arr):
+zfp_mode_map = {
+    zfp_mode_null: "null",
+    zfp_mode_expert: "expert",
+    zfp_mode_reversible: "reversible",
+    zfp_mode_fixed_accuracy: "tolerance",
+    zfp_mode_fixed_precision: "precision",
+    zfp_mode_fixed_rate: "rate",
+}
+cpdef zmode_to_str(zfp_mode zmode):
+    try:
+        return zfp_mode_map[zmode]
+    except KeyError:
+        raise ValueError("Unsupported zfp_mode {}".format(zmode))
+
+cdef zfp_field* _init_field(np.ndarray arr) except NULL:
     shape = arr.shape
     cdef int ndim = arr.ndim
     cdef zfp_type ztype = dtype_to_ztype(arr.dtype)
@@ -142,8 +155,8 @@ cpdef bytes compress_numpy(
     cdef zfp_field* field = _init_field(arr)
     cdef zfp_stream* stream = zfp_stream_open(NULL)
 
-    cdef zfp_type ztype = zfp_type_none;
-    cdef int ndim = arr.ndim;
+    cdef zfp_type ztype = zfp_type_none
+    cdef int ndim = arr.ndim
     _set_compression_mode(stream, ztype, ndim, tolerance, rate, precision)
 
     # Allocate space based on the maximum size potentially required by zfp to
@@ -245,7 +258,7 @@ cdef _validate_4d_list(in_list, list_name):
         )
 
 cpdef np.ndarray _decompress(
-    bytes compressed_data,
+    const uint8_t[::1] compressed_data,
     zfp_type ztype,
     shape,
     out=None,
@@ -253,17 +266,16 @@ cpdef np.ndarray _decompress(
     double rate = -1,
     int precision = -1,
 ):
-
     if compressed_data is None:
         raise TypeError("compressed_data cannot be None")
     if compressed_data is out:
         raise ValueError("Cannot decompress in-place")
     _validate_4d_list(shape, "shape")
 
-    cdef char* comp_data_pointer = compressed_data
+    cdef const void* comp_data_pointer = <const void*>&compressed_data[0]
     cdef zfp_field* field = zfp_field_alloc()
     cdef bitstream* bstream = stream_open(
-        comp_data_pointer,
+        <void *>comp_data_pointer,
         len(compressed_data)
     )
     cdef zfp_stream* stream = zfp_stream_open(bstream)
@@ -329,15 +341,15 @@ cpdef np.ndarray _decompress(
     return output
 
 cpdef np.ndarray decompress_numpy(
-    bytes compressed_data,
+    const uint8_t[::1] compressed_data,
 ):
     if compressed_data is None:
         raise TypeError("compressed_data cannot be None")
 
-    cdef char* comp_data_pointer = compressed_data
+    cdef const void* comp_data_pointer = <const void *>&compressed_data[0]
     cdef zfp_field* field = zfp_field_alloc()
     cdef bitstream* bstream = stream_open(
-        comp_data_pointer,
+        <void *>comp_data_pointer,
         len(compressed_data)
     )
     cdef zfp_stream* stream = zfp_stream_open(bstream)
@@ -353,3 +365,59 @@ cpdef np.ndarray decompress_numpy(
         stream_close(bstream)
 
     return output
+
+cpdef dict header(const uint8_t[::1] compressed_data):
+    """Return stream header information in a python dict."""
+    if compressed_data is None:
+        raise TypeError("compressed_data cannot be None")
+
+    cdef const void* comp_data_pointer = <const void *>&compressed_data[0]
+    cdef zfp_field* field = zfp_field_alloc()
+    cdef bitstream* bstream = stream_open(
+        <void *>comp_data_pointer,
+        len(compressed_data)
+    )
+    cdef zfp_stream* stream = zfp_stream_open(bstream)
+    cdef zfp_mode mode
+
+    cdef unsigned int minbits = 0
+    cdef unsigned int maxbits = 0
+    cdef unsigned int maxprec = 0
+    cdef int minexp = 0
+
+    try:
+        if zfp_read_header(stream, field, HEADER_FULL) == 0:
+            raise ValueError("Failed to read required zfp header")
+
+        mode = zfp_stream_compression_mode(stream)
+
+        ndim = 0
+        for dim in [field.nx, field.ny, field.nz, field.nw]:
+            ndim += int(dim > 0)
+
+        zfp_stream_params(stream, &minbits, &maxbits, &maxprec, &minexp)
+
+        return {
+            "nx": int(field.nx),
+            "ny": int(field.ny),
+            "nz": int(field.nz),
+            "nw": int(field.nw),
+            "type": ztype_to_dtype(field._type),
+            "mode": zmode_to_str(mode),
+            "config": {
+                "mode": int(mode),
+                "tolerance": float(zfp_stream_accuracy(stream)),
+                "rate": float(zfp_stream_rate(stream, ndim)),
+                "precision": int(zfp_stream_precision(stream)),
+                "expert": {
+                    "minbits": int(minbits),
+                    "maxbits": int(minbits),
+                    "maxprec": int(maxprec),
+                    "minexp": int(minexp),
+                },
+            },
+        }
+    finally:
+        zfp_field_free(field)
+        zfp_stream_close(stream)
+        stream_close(bstream)
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..fa2da6ef
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,15 @@
+from setuptools import setup, Extension
+import numpy as np
+
+setup(
+    name="zfpy",
+    version="1.0.1",
+    author="Peter Lindstrom, Danielle Asher",
+    author_email="zfp@llnl.gov",
+    url="https://zfp.llnl.gov",
+    description="zfp compression in Python",
+    long_description="zfp is a compressed format for representing multidimensional floating-point and integer arrays. zfp provides compressed-array classes that support high throughput read and write random access to individual array elements. zfp also supports serial and parallel compression of whole arrays using both lossless and lossy compression with error tolerances. zfp is primarily written in C and C++ but also includes Python and Fortran bindings.",
+    ext_modules=[Extension("zfpy", ["build/python/zfpy.c"],
+                           include_dirs=["include", np.get_include()],
+                           libraries=["zfp"], library_dirs=["build/lib64", "build/lib/Release"]), language_level = "3"]
+)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 46887588..fd5702e5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,11 +1,11 @@
 if(ZFP_WITH_CUDA)
-  SET(CMAKE_CXX_FLAGS_PREVIOUS ${CMAKE_CXX_FLAGS})
-  SET(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -fPIC" )
+  set(CMAKE_CXX_FLAGS_PREVIOUS ${CMAKE_CXX_FLAGS})
+  set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -fPIC" )
 
   add_subdirectory(cuda_zfp)
   cuda_include_directories(${PROJECT_SOURCE_DIR}/include)
-  cuda_wrap_srcs(zfp OBJ zfp_cuda_backend_obj cuda_zfp/cuZFP.cu)
-  SET(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_PREVIOUS})
+  cuda_wrap_srcs(zfp OBJ zfp_cuda_backend_obj cuda_zfp/cuZFP.cu OPTIONS ${CMAKE_CUDA_FLAGS})
+  set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_PREVIOUS})
   add_definitions(-DZFP_WITH_CUDA)
 endif()
 
@@ -28,16 +28,16 @@ add_library(zfp ${zfp_source}
 add_library(zfp::zfp ALIAS zfp)
 
 if(ZFP_WITH_OPENMP)
-  target_compile_options(zfp PRIVATE ${OpenMP_C_FLAGS})
-  target_link_libraries(zfp PRIVATE ${OpenMP_C_LIBRARIES})
+  target_link_libraries(zfp PRIVATE OpenMP::OpenMP_C)
 endif()
 
 if(HAVE_LIBM_MATH)
   target_link_libraries(zfp PRIVATE m)
 endif()
 
-if(WIN32)
+if(WIN32 AND BUILD_SHARED_LIBS)
   # Define ZFP_SOURCE when compiling libzfp to export symbols to Windows DLL
+  list(APPEND zfp_public_defs ZFP_SHARED_LIBS)
   list(APPEND zfp_private_defs ZFP_SOURCE)
 endif()
 
@@ -54,8 +54,7 @@ target_include_directories(zfp
   PUBLIC
     $<BUILD_INTERFACE:${ZFP_SOURCE_DIR}/include>
     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
-  INTERFACE
-    $<BUILD_INTERFACE:${ZFP_SOURCE_DIR}/array>)
+)
 
 set_property(TARGET zfp PROPERTY VERSION ${ZFP_VERSION})
 set_property(TARGET zfp PROPERTY SOVERSION ${ZFP_VERSION_MAJOR})
diff --git a/src/Makefile b/src/Makefile
index 227a7803..239261fb 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -21,4 +21,4 @@ $(LIBDIR)/libzfp.so: $(OBJECTS)
 	$(CC) $(CFLAGS) -shared $^ -o $@
 
 .c.o:
-	$(CC) $(CFLAGS) -c $<
+	$(CC) $(CFLAGS) -I../include -c $<
diff --git a/src/bitstream.c b/src/bitstream.c
index 05094c6d..29a4543a 100644
--- a/src/bitstream.c
+++ b/src/bitstream.c
@@ -1,4 +1,4 @@
-#include "bitstream.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.h"
+#include "zfp/bitstream.inl"
 
-export_ const size_t stream_word_bits = wsize;
+const size_t stream_word_bits = wsize;
diff --git a/src/cuda_zfp/CMakeLists.txt b/src/cuda_zfp/CMakeLists.txt
index 2fe402fa..b19546d8 100644
--- a/src/cuda_zfp/CMakeLists.txt
+++ b/src/cuda_zfp/CMakeLists.txt
@@ -18,7 +18,6 @@ set(cuZFP_sources
     type_info.cuh)
 
 set(cuZFP_headers
-    constant_setup.cuh
     shared.h
     cuZFP.h
     ErrorCheck.h)
diff --git a/src/cuda_zfp/constant_setup.cuh b/src/cuda_zfp/constant_setup.cuh
deleted file mode 100644
index 1c1221ad..00000000
--- a/src/cuda_zfp/constant_setup.cuh
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef cuZFP_CONSTANT_SETUP
-#define cuZFP_CONSTANT_SETUP
-
-#include "constants.h"
-#include "shared.h"
-#include "ErrorCheck.h"
-#include "type_info.cuh"
-
-namespace cuZFP {
-
-class ConstantSetup
-{
-public:
-  static void setup_3d()
-  { 
-    ErrorCheck ec;
-    cudaMemcpyToSymbol(c_perm, perm_3d, sizeof(unsigned char) * 64, 0); 
-    ec.chk("setupConst: c_perm");
-  }
-
-  static void setup_2d()
-  {
-    ErrorCheck ec;
-    cudaMemcpyToSymbol(c_perm_2, perm_2, sizeof(unsigned char) * 16, 0); 
-    ec.chk("setupConst: c_perm_2");
-  }
-
-  static void setup_1d()
-  {
-    ErrorCheck ec;
-    cudaMemcpyToSymbol(c_perm_1, perm_1, sizeof(unsigned char) * 4, 0); 
-    ec.chk("setupConst: c_perm_1");
-  }
-};
-
-
-} //namespace 
-
-#endif
diff --git a/src/cuda_zfp/constants.h b/src/cuda_zfp/constants.h
index 423ac91c..a03eb6f8 100644
--- a/src/cuda_zfp/constants.h
+++ b/src/cuda_zfp/constants.h
@@ -5,7 +5,7 @@ namespace cuZFP {
 
 #define index_3d(x, y, z) ((x) + 4 * ((y) + 4 * (z)))
 
-static const unsigned char
+__device__ static const unsigned char
 perm_3d[64] = {
 	index_3d(0, 0, 0), //  0 : 0
 
@@ -94,7 +94,7 @@ perm_3d[64] = {
 
 #undef index_3d
 
-static const unsigned char perm_1[4] = 
+__device__ static const unsigned char perm_1[4] =
 {
   0, 1, 2, 3
 };
@@ -102,7 +102,7 @@ static const unsigned char perm_1[4] =
 #define index(i, j) ((i) + 4 * (j))
 
 /* order coefficients (i, j) by i + j, then i^2 + j^2 */
-static const unsigned char perm_2[16] = {
+__device__ static const unsigned char perm_2[16] = {
   index(0, 0), /*  0 : 0 */
 
   index(1, 0), /*  1 : 1 */
diff --git a/src/cuda_zfp/cuZFP.cu b/src/cuda_zfp/cuZFP.cu
index ffbb9933..e1de467f 100644
--- a/src/cuda_zfp/cuZFP.cu
+++ b/src/cuda_zfp/cuZFP.cu
@@ -12,7 +12,6 @@
 
 #include "ErrorCheck.h"
 
-#include "constant_setup.cuh"
 #include "pointers.cuh"
 #include "type_info.cuh"
 #include <iostream>
@@ -24,7 +23,7 @@
   #define inline_ inline
 #endif
 
-#include "../inline/bitstream.c"
+#include "zfp/bitstream.inl"
 namespace internal 
 { 
   
@@ -119,7 +118,6 @@ size_t encode(uint dims[3], int3 stride, int bits_per_block, T *d_data, Word *d_
   {
     int dim = dims[0];
     int sx = stride.x;
-    cuZFP::ConstantSetup::setup_1d();
     stream_size = cuZFP::encode1<T>(dim, sx, d_data, d_stream, bits_per_block); 
   }
   else if(d == 2)
@@ -128,7 +126,6 @@ size_t encode(uint dims[3], int3 stride, int bits_per_block, T *d_data, Word *d_
     int2 s;
     s.x = stride.x; 
     s.y = stride.y; 
-    cuZFP::ConstantSetup::setup_2d();
     stream_size = cuZFP::encode2<T>(ndims, s, d_data, d_stream, bits_per_block); 
   }
   else if(d == 3)
@@ -138,7 +135,6 @@ size_t encode(uint dims[3], int3 stride, int bits_per_block, T *d_data, Word *d_
     s.y = stride.y; 
     s.z = stride.z; 
     uint3 ndims = make_uint3(dims[0], dims[1], dims[2]);
-    cuZFP::ConstantSetup::setup_3d();
     stream_size = cuZFP::encode<T>(ndims, s, d_data, d_stream, bits_per_block); 
   }
 
@@ -172,7 +168,6 @@ size_t decode(uint ndims[3], int3 stride, int bits_per_block, Word *stream, T *o
     s.y = stride.y; 
     s.z = stride.z; 
 
-    cuZFP::ConstantSetup::setup_3d();
     stream_bytes = cuZFP::decode3<T>(dims, s, stream, out, bits_per_block); 
   }
   else if(d == 1)
@@ -180,7 +175,6 @@ size_t decode(uint ndims[3], int3 stride, int bits_per_block, Word *stream, T *o
     uint dim = ndims[0];
     int sx = stride.x;
 
-    cuZFP::ConstantSetup::setup_1d();
     stream_bytes = cuZFP::decode1<T>(dim, sx, stream, out, bits_per_block); 
 
   }
@@ -194,7 +188,6 @@ size_t decode(uint ndims[3], int3 stride, int bits_per_block, Word *stream, T *o
     s.x = stride.x; 
     s.y = stride.y; 
 
-    cuZFP::ConstantSetup::setup_2d();
     stream_bytes = cuZFP::decode2<T>(dims, s, stream, out, bits_per_block); 
   }
   else std::cerr<<" d ==  "<<d<<" not implemented\n";
@@ -202,21 +195,37 @@ size_t decode(uint ndims[3], int3 stride, int bits_per_block, Word *stream, T *o
   return stream_bytes;
 }
 
-Word *setup_device_stream(zfp_stream *stream,const zfp_field *field)
+Word *setup_device_stream_compress(zfp_stream *stream,const zfp_field *field)
 {
   bool stream_device = cuZFP::is_gpu_ptr(stream->stream->begin);
-  assert(sizeof(word) == sizeof(Word)); // "CUDA version currently only supports 64bit words");
+  assert(sizeof(bitstream_word) == sizeof(Word)); // "CUDA version currently only supports 64bit words");
 
   if(stream_device)
   {
     return (Word*) stream->stream->begin;
-  } 
+  }
 
   Word *d_stream = NULL;
-  // TODO: we we have a real stream we can just ask it how big it is
   size_t max_size = zfp_stream_maximum_size(stream, field);
   cudaMalloc(&d_stream, max_size);
-  cudaMemcpy(d_stream, stream->stream->begin, max_size, cudaMemcpyHostToDevice);
+  return d_stream;
+}
+
+Word *setup_device_stream_decompress(zfp_stream *stream,const zfp_field *field)
+{
+  bool stream_device = cuZFP::is_gpu_ptr(stream->stream->begin);
+  assert(sizeof(bitstream_word) == sizeof(Word)); // "CUDA version currently only supports 64bit words");
+
+  if(stream_device)
+  {
+    return (Word*) stream->stream->begin;
+  }
+
+  Word *d_stream = NULL;
+  //TODO: change maximum_size to compressed stream size
+  size_t size = zfp_stream_maximum_size(stream, field);
+  cudaMalloc(&d_stream, size);
+  cudaMemcpy(d_stream, stream->stream->begin, size, cudaMemcpyHostToDevice);
   return d_stream;
 }
 
@@ -246,7 +255,7 @@ void * offset_void(zfp_type type, void *ptr, long long int offset)
   return offset_ptr;
 }
 
-void *setup_device_field(const zfp_field *field, const int3 &stride, long long int &offset)
+void *setup_device_field_compress(const zfp_field *field, const int3 &stride, long long int &offset)
 {
   bool field_device = cuZFP::is_gpu_ptr(field->data);
 
@@ -287,6 +296,43 @@ void *setup_device_field(const zfp_field *field, const int3 &stride, long long i
   return offset_void(field->type, d_data, -offset);
 }
 
+void *setup_device_field_decompress(const zfp_field *field, const int3 &stride, long long int &offset)
+{
+  bool field_device = cuZFP::is_gpu_ptr(field->data);
+
+  if(field_device)
+  {
+    offset = 0;
+    return field->data;
+  }
+
+  uint dims[3];
+  dims[0] = field->nx;
+  dims[1] = field->ny;
+  dims[2] = field->nz;
+
+  size_t type_size = zfp_type_size(field->type);
+
+  size_t field_size = 1;
+  for(int i = 0; i < 3; ++i)
+  {
+    if(dims[i] != 0)
+    {
+      field_size *= dims[i];
+    }
+  }
+
+  bool contig = internal::is_contigous(dims, stride, offset);
+
+  void *d_data = NULL;
+  if(contig)
+  {
+    size_t field_bytes = type_size * field_size;
+    cudaMalloc(&d_data, field_bytes);
+  }
+  return offset_void(field->type, d_data, -offset);
+}
+
 void cleanup_device_ptr(void *orig_ptr, void *d_ptr, size_t bytes, long long int offset, zfp_type type)
 {
   bool device = cuZFP::is_gpu_ptr(orig_ptr);
@@ -323,7 +369,7 @@ cuda_compress(zfp_stream *stream, const zfp_field *field)
   
   size_t stream_bytes = 0;
   long long int offset = 0; 
-  void *d_data = internal::setup_device_field(field, stride, offset);
+  void *d_data = internal::setup_device_field_compress(field, stride, offset);
 
   if(d_data == NULL)
   {
@@ -331,7 +377,7 @@ cuda_compress(zfp_stream *stream, const zfp_field *field)
     return 0;
   }
 
-  Word *d_stream = internal::setup_device_stream(stream, field);
+  Word *d_stream = internal::setup_device_stream_compress(stream, field);
 
   if(field->type == zfp_type_float)
   {
@@ -382,7 +428,7 @@ cuda_decompress(zfp_stream *stream, zfp_field *field)
 
   size_t decoded_bytes = 0;
   long long int offset = 0;
-  void *d_data = internal::setup_device_field(field, stride, offset);
+  void *d_data = internal::setup_device_field_decompress(field, stride, offset);
   
   if(d_data == NULL)
   {
@@ -390,7 +436,7 @@ cuda_decompress(zfp_stream *stream, zfp_field *field)
     return;
   }
 
-  Word *d_stream = internal::setup_device_stream(stream, field);
+  Word *d_stream = internal::setup_device_stream_decompress(stream, field);
 
   if(field->type == zfp_type_float)
   {
@@ -437,7 +483,7 @@ cuda_decompress(zfp_stream *stream, zfp_field *field)
   internal::cleanup_device_ptr(stream->stream->begin, d_stream, 0, 0, field->type);
   internal::cleanup_device_ptr(field->data, d_data, bytes, offset, field->type);
   
-  // this is how zfp determins if this was a success
+  // this is how zfp determines if this was a success
   size_t words_read = decoded_bytes / sizeof(Word);
   stream->stream->bits = wsize;
   // set stream pointer to end of stream
diff --git a/src/cuda_zfp/decode.cuh b/src/cuda_zfp/decode.cuh
index d3d08772..636de7d4 100644
--- a/src/cuda_zfp/decode.cuh
+++ b/src/cuda_zfp/decode.cuh
@@ -6,17 +6,34 @@
 namespace cuZFP
 {
 
-/* map two's complement signed integer to negabinary unsigned integer */
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+// bias values such that truncation is equivalent to round to nearest
+template <typename UInt, uint BlockSize>
+__device__
+static void
+inv_round(UInt* ublock, uint m, uint prec)
+{
+  // add 1/6 ulp to unbias errors
+  if (prec < (uint)(CHAR_BIT * sizeof(UInt) - 1)) {
+    // the first m values (0 <= m <= n) have one more bit of precision
+    uint n = BlockSize - m;
+    while (m--) *ublock++ += (((UInt)NBMASK >> 2) >> prec);
+    while (n--) *ublock++ += (((UInt)NBMASK >> 1) >> prec);
+  }
+}
+#endif
+
+// map two's complement signed integer to negabinary unsigned integer
 inline __device__
 long long int uint2int(unsigned long long int x)
 {
-	return (x ^0xaaaaaaaaaaaaaaaaull) - 0xaaaaaaaaaaaaaaaaull;
+  return (x ^ 0xaaaaaaaaaaaaaaaaull) - 0xaaaaaaaaaaaaaaaaull;
 }
 
 inline __device__
 int uint2int(unsigned int x)
 {
-	return (x ^0xaaaaaaaau) - 0xaaaaaaaau;
+  return (x ^ 0xaaaaaaaau) - 0xaaaaaaaau;
 }
 
 template<int block_size>
@@ -40,10 +57,10 @@ public:
     :  m_maxbits(maxbits), m_valid_block(true)
   {
     if(block_idx >= num_blocks) m_valid_block = false;
-    int word_index = (block_idx * maxbits)  / (sizeof(Word) * 8); 
+    size_t word_index = ((size_t)block_idx * maxbits)  / (sizeof(Word) * 8); 
     m_words = b + word_index;
     m_buffer = *m_words;
-    m_current_bit = (block_idx * maxbits) % (sizeof(Word) * 8); 
+    m_current_bit = ((size_t)block_idx * maxbits) % (sizeof(Word) * 8); 
 
     m_buffer >>= m_current_bit;
     m_block_idx = block_idx;
@@ -95,7 +112,7 @@ public:
       next_read = n_bits - first_read; 
     }
    
-    // this is basically a no-op when first read constained 
+    // this is basically a no-op when first read contained 
     // all the bits. TODO: if we have aligned reads, this could 
     // be a conditional without divergence
     mask = ((Word)1<<((next_read)))-1;
@@ -107,38 +124,59 @@ public:
 
 }; // block reader
 
-template<typename Scalar, int Size, typename UInt>
+template <typename Scalar, uint size, typename UInt>
 inline __device__
-void decode_ints(BlockReader<Size> &reader, uint &max_bits, UInt *data)
+void decode_ints(BlockReader<size> &reader, uint maxbits, UInt *data)
 {
   const int intprec = get_precision<Scalar>();
-  memset(data, 0, sizeof(UInt) * Size);
-  uint64 x; 
   // maxprec = 64;
   const uint kmin = 0; //= intprec > maxprec ? intprec - maxprec : 0;
-  int bits = max_bits;
-  for (uint k = intprec, n = 0; bits && k-- > kmin;)
-  {
-    // read bit plane
-    uint m = MIN(n, bits);
+  uint bits = maxbits;
+  uint k, m, n;
+
+  // initialize data array to all zeros
+  memset(data, 0, size * sizeof(UInt));
+
+  // decode one bit plane at a time from MSB to LSB
+  for (k = intprec, m = n = 0; bits && (m = 0, k-- > kmin);) {
+    // step 1: decode first n bits of bit plane #k
+    m = min(n, bits);
     bits -= m;
-    x = reader.read_bits(m);
-    for (; n < Size && bits && (bits--, reader.read_bit()); x += (Word) 1 << n++)
-      for (; n < (Size - 1) && bits && (bits--, !reader.read_bit()); n++);
-    
-    // deposit bit plane
+    uint64 x = reader.read_bits(m);
+    // step 2: unary run-length decode remainder of bit plane
+    for (; bits && n < size; n++, m = n) {
+      bits--;
+      if (reader.read_bit()) {
+        // positive group test; scan for next one-bit
+        for (; bits && n < size - 1; n++) {
+          bits--;
+          if (reader.read_bit())
+            break;
+        }
+        // set bit and continue decoding bit plane
+        x += (uint64)1 << n;
+      }
+      else {
+        // negative group test; done with bit plane
+        m = size;
+        break;
+      }
+    }
+    // step 3: deposit bit plane from x
 #if (CUDART_VERSION < 8000)
     #pragma unroll
 #else
-    #pragma unroll Size
+    #pragma unroll size
 #endif
-    for (int i = 0; i < Size; i++, x >>= 1)
-    {
+    for (uint i = 0; i < size; i++, x >>= 1)
       data[i] += (UInt)(x & 1u) << k;
-    }
-  } 
-}
+  }
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+  // bias values to achieve proper rounding
+  inv_round<UInt, size>(data, m, intprec - k);
+#endif
+}
 
 template<int BlockSize>
 struct inv_transform;
@@ -149,21 +187,19 @@ struct inv_transform<64>
   template<typename Int>
   __device__ void inv_xform(Int *p)
   {
-    uint x, y, z;
-    /* transform along z */
-    for (y = 0; y < 4; y++)
-      for (x = 0; x < 4; x++)
-        inv_lift<Int,16>(p + 1 * x + 4 * y);
-    /* transform along y */
-    for (x = 0; x < 4; x++)
-      for (z = 0; z < 4; z++)
-        inv_lift<Int,4>(p + 16 * z + 1 * x);
-    /* transform along x */
-    for (z = 0; z < 4; z++)
-      for (y = 0; y < 4; y++)
-        inv_lift<Int,1>(p + 4 * y + 16 * z); 
+    // transform along z
+    for (uint y = 0; y < 4; y++)
+      for (uint x = 0; x < 4; x++)
+        inv_lift<Int, 16>(p + 1 * x + 4 * y);
+    // transform along y
+    for (uint x = 0; x < 4; x++)
+      for (uint z = 0; z < 4; z++)
+        inv_lift<Int, 4>(p + 16 * z + 1 * x);
+    // transform along x
+    for (uint z = 0; z < 4; z++)
+      for (uint y = 0; y < 4; y++)
+        inv_lift<Int, 1>(p + 4 * y + 16 * z); 
   }
-
 };
 
 template<>
@@ -172,17 +208,11 @@ struct inv_transform<16>
   template<typename Int>
   __device__ void inv_xform(Int *p)
   {
-
-    for(int x = 0; x < 4; ++x)
-    {
-      inv_lift<Int,4>(p + 1 * x);
-    }
-    for(int y = 0; y < 4; ++y)
-    {
-      inv_lift<Int,1>(p + 4 * y);
-    }
+    for (uint x = 0; x < 4; ++x)
+      inv_lift<Int, 4>(p + 1 * x);
+    for (uint y = 0; y < 4; ++y)
+      inv_lift<Int, 1>(p + 4 * y);
   }
-
 };
 
 template<>
@@ -191,9 +221,8 @@ struct inv_transform<4>
   template<typename Int>
   __device__ void inv_xform(Int *p)
   {
-    inv_lift<Int,1>(p);
+    inv_lift<Int, 1>(p);
   }
-
 };
 
 template<typename Scalar, int BlockSize>
@@ -227,39 +256,34 @@ __device__ void zfp_decode(BlockReader<BlockSize> &reader, Scalar *fblock, uint
       ebits = 0;
     }
 
-	  maxbits -= ebits;
-    
+    maxbits -= ebits;
+
     UInt ublock[BlockSize];
 
     decode_ints<Scalar, BlockSize, UInt>(reader, maxbits, ublock);
 
     Int iblock[BlockSize];
-    unsigned char *perm = get_perm<BlockSize>();
+    const unsigned char *perm = get_perm<BlockSize>();
 #if (CUDART_VERSION < 8000)
     #pragma unroll 
 #else
     #pragma unroll BlockSize
 #endif
-    for(int i = 0; i < BlockSize; ++i)
-    {
-		  iblock[perm[i]] = uint2int(ublock[i]);
-    }
+    for (int i = 0; i < BlockSize; ++i)
+      iblock[perm[i]] = uint2int(ublock[i]);
     
     inv_transform<BlockSize> trans;
     trans.inv_xform(iblock);
 
-		Scalar inv_w = dequantize<Int, Scalar>(1, emax);
+    Scalar inv_w = dequantize<Int, Scalar>(1, emax);
 
 #if (CUDART_VERSION < 8000)
     #pragma unroll 
 #else
     #pragma unroll BlockSize
 #endif
-    for(int i = 0; i < BlockSize; ++i)
-    {
-		  fblock[i] = inv_w * (Scalar)iblock[i];
-    }
-     
+    for (int i = 0; i < BlockSize; ++i)
+      fblock[i] = inv_w * (Scalar)iblock[i];
   }
 }
 
diff --git a/src/cuda_zfp/decode1.cuh b/src/cuda_zfp/decode1.cuh
index 996d9ed1..6d357f63 100644
--- a/src/cuda_zfp/decode1.cuh
+++ b/src/cuda_zfp/decode1.cuh
@@ -13,8 +13,8 @@ __device__ __host__ inline
 void scatter_partial1(const Scalar* q, Scalar* p, int nx, int sx)
 {
   uint x;
-  for (x = 0; x < nx; x++, p += sx)
-   *p = *q++;
+  for (x = 0; x < 4; x++)
+    if (x < nx) p[x * sx] = q[x];
 }
 
 template<typename Scalar> 
@@ -127,9 +127,9 @@ size_t decode1launch(uint dim,
   cudaEventSynchronize(stop);
 	cudaStreamSynchronize(0);
 
-  float miliseconds = 0;
-  cudaEventElapsedTime(&miliseconds, start, stop);
-  float seconds = miliseconds / 1000.f;
+  float milliseconds = 0;
+  cudaEventElapsedTime(&milliseconds, start, stop);
+  float seconds = milliseconds / 1000.f;
   float rate = (float(dim) * sizeof(Scalar) ) / seconds;
   rate /= 1024.f;
   rate /= 1024.f;
diff --git a/src/cuda_zfp/decode2.cuh b/src/cuda_zfp/decode2.cuh
index 41e112b5..fa60a82f 100644
--- a/src/cuda_zfp/decode2.cuh
+++ b/src/cuda_zfp/decode2.cuh
@@ -12,9 +12,15 @@ __device__ __host__ inline
 void scatter_partial2(const Scalar* q, Scalar* p, int nx, int ny, int sx, int sy)
 {
   uint x, y;
-  for (y = 0; y < ny; y++, p += sy - nx * sx, q += 4 - nx)
-    for (x = 0; x < nx; x++, p += sx, q++)
-      *p = *q;
+  for (y = 0; y < 4; y++)
+    if (y < ny) {
+      for (x = 0; x < 4; x++)
+        if (x < nx) {
+          *p = q[4 * y + x];
+          p += sx;
+        }
+      p += sy - nx * sx;
+    }
 }
 
 template<typename Scalar> 
@@ -144,9 +150,9 @@ size_t decode2launch(uint2 dims,
   cudaEventSynchronize(stop);
 	cudaStreamSynchronize(0);
 
-  float miliseconds = 0;
-  cudaEventElapsedTime(&miliseconds, start, stop);
-  float seconds = miliseconds / 1000.f;
+  float milliseconds = 0;
+  cudaEventElapsedTime(&milliseconds, start, stop);
+  float seconds = milliseconds / 1000.f;
   float rate = (float(dims.x * dims.y) * sizeof(Scalar) ) / seconds;
   rate /= 1024.f;
   rate /= 1024.f;
diff --git a/src/cuda_zfp/decode3.cuh b/src/cuda_zfp/decode3.cuh
index 2a3ef008..9f2a98a8 100644
--- a/src/cuda_zfp/decode3.cuh
+++ b/src/cuda_zfp/decode3.cuh
@@ -12,10 +12,19 @@ __device__ __host__ inline
 void scatter_partial3(const Scalar* q, Scalar* p, int nx, int ny, int nz, int sx, int sy, int sz)
 {
   uint x, y, z;
-  for (z = 0; z < nz; z++, p += sz - ny * sy, q += 4 * (4 - ny))
-    for (y = 0; y < ny; y++, p += sy - nx * sx, q += 4 - nx)
-      for (x = 0; x < nx; x++, p += sx, q++)
-        *p = *q;
+  for (z = 0; z < 4; z++)
+    if (z < nz) {
+      for (y = 0; y < 4; y++)
+        if (y < ny) {
+          for (x = 0; x < 4; x++)
+            if (x < nx) {
+              *p = q[16 * z + 4 * y + x];
+              p += sx;
+            }
+          p += sy - nx * sx;
+        }
+      p += sz - ny * sy;
+    }
 }
 
 template<typename Scalar> 
@@ -154,9 +163,9 @@ size_t decode3launch(uint3 dims,
   cudaEventSynchronize(stop);
 	cudaStreamSynchronize(0);
 
-  float miliseconds = 0;
-  cudaEventElapsedTime(&miliseconds, start, stop);
-  float seconds = miliseconds / 1000.f;
+  float milliseconds = 0;
+  cudaEventElapsedTime(&milliseconds, start, stop);
+  float seconds = milliseconds / 1000.f;
   float rate = (float(dims.x * dims.y * dims.z) * sizeof(Scalar) ) / seconds;
   rate /= 1024.f;
   rate /= 1024.f;
diff --git a/src/cuda_zfp/encode.cuh b/src/cuda_zfp/encode.cuh
index c65bd356..995c9c32 100644
--- a/src/cuda_zfp/encode.cuh
+++ b/src/cuda_zfp/encode.cuh
@@ -11,7 +11,7 @@ __device__
 static int
 precision(int maxexp, int maxprec, int minexp)
 {
-  return MIN(maxprec, MAX(0, maxexp - minexp + 8));
+  return min(maxprec, max(0, maxexp - minexp + 8));
 }
 
 template<typename Scalar>
@@ -42,13 +42,19 @@ __device__
 static int
 exponent(Scalar x)
 {
+  int e = -get_ebias<Scalar>();
+#ifdef ZFP_WITH_DAZ
+  // treat subnormals as zero; resolves issue #119 by avoiding overflow
+  if (x >= get_scalar_min<Scalar>())
+    frexp(x, &e);
+#else
   if (x > 0) {
-    int e;
     frexp(x, &e);
-    // clamp exponent in case x is denormalized
-    return max(e, 1 - get_ebias<Scalar>());
+    // clamp exponent in case x is subnormal; may still result in overflow
+    e = max(e, 1 - get_ebias<Scalar>());
   }
-  return -get_ebias<Scalar>();
+#endif
+  return e;
 }
 
 template<class Scalar, int BlockSize>
@@ -57,10 +63,9 @@ static int
 max_exponent(const Scalar* p)
 {
   Scalar max_val = 0;
-  for(int i = 0; i < BlockSize; ++i)
-  {
+  for (int i = 0; i < BlockSize; ++i) {
     Scalar f = fabs(p[i]);
-    max_val = max(max_val,f);
+    max_val = max(max_val, f);
   }
   return exponent<Scalar>(max_val);
 }
@@ -93,6 +98,25 @@ fwd_lift(Int* p)
   p -= s; *p = x;
 }
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_FIRST
+// bias values such that truncation is equivalent to round to nearest
+template <typename Int, uint BlockSize>
+__device__
+static void
+fwd_round(Int* iblock, uint maxprec)
+{
+  // add or subtract 1/6 ulp to unbias errors
+  if (maxprec < (uint)(CHAR_BIT * sizeof(Int))) {
+    Int bias = (static_cast<typename zfp_traits<Int>::UInt>(NBMASK) >> 2) >> maxprec;
+    uint n = BlockSize;
+    if (maxprec & 1u)
+      do *iblock++ += bias; while (--n);
+    else
+      do *iblock++ -= bias; while (--n);
+  }
+}
+#endif
+
 template<typename Scalar>
 Scalar
 inline __device__
@@ -103,7 +127,7 @@ float
 inline __device__
 quantize_factor<float>(const int &exponent, float)
 {
-	return  LDEXP(1.0, get_precision<float>() - 2 - exponent);
+  return LDEXP(1.0, get_precision<float>() - 2 - exponent);
 }
 
 template<>
@@ -111,13 +135,13 @@ double
 inline __device__
 quantize_factor<double>(const int &exponent, double)
 {
-	return  LDEXP(1.0, get_precision<double>() - 2 - exponent);
+  return LDEXP(1.0, get_precision<double>() - 2 - exponent);
 }
 
 template<typename Scalar, typename Int, int BlockSize>
 void __device__ fwd_cast(Int *iblock, const Scalar *fblock, int emax)
 {
-	Scalar s = quantize_factor(emax, Scalar());
+  Scalar s = quantize_factor(emax, Scalar());
   for(int i = 0; i < BlockSize; ++i)
   {
     iblock[i] = (Int) (s * fblock[i]);
@@ -133,7 +157,6 @@ struct transform<64>
   template<typename Int>
   __device__ void fwd_xform(Int *p)
   {
-
     uint x, y, z;
     /* transform along x */
     for (z = 0; z < 4; z++)
@@ -149,7 +172,6 @@ struct transform<64>
         fwd_lift<Int,16>(p + 1 * x + 4 * y);
 
    }
-
 };
 
 template<>
@@ -158,16 +180,14 @@ struct transform<16>
   template<typename Int>
   __device__ void fwd_xform(Int *p)
   {
-
     uint x, y;
     /* transform along x */
     for (y = 0; y < 4; y++)
-     fwd_lift<Int,1>(p + 4 * y);
+      fwd_lift<Int,1>(p + 4 * y);
     /* transform along y */
     for (x = 0; x < 4; x++)
       fwd_lift<Int,4>(p + 1 * x);
-    }
-
+  }
 };
 
 template<>
@@ -178,14 +198,14 @@ struct transform<4>
   {
     fwd_lift<Int,1>(p);
   }
-
 };
 
 template<typename Int, typename UInt, int BlockSize>
 __device__ void fwd_order(UInt *ublock, const Int *iblock)
 {
-  unsigned char *perm = get_perm<BlockSize>();
-  for(int i = 0; i < BlockSize; ++i)
+  const unsigned char *perm = get_perm<BlockSize>();
+
+  for (int i = 0; i < BlockSize; ++i)
   {
     ublock[i] = int2uint(iblock[perm[i]]);
   }
@@ -206,8 +226,8 @@ struct BlockWriter
       m_maxbits(maxbits),
       m_stream(stream)
   {
-    m_word_index = (block_idx * maxbits)  / (sizeof(Word) * 8); 
-    m_start_bit = uint((block_idx * maxbits) % (sizeof(Word) * 8)); 
+    m_word_index = ((size_t)block_idx * maxbits)  / (sizeof(Word) * 8); 
+    m_start_bit = uint(((size_t)block_idx * maxbits) % (sizeof(Word) * 8)); 
   }
 
   template<typename T>
@@ -289,41 +309,39 @@ void inline __device__ encode_block(BlockWriter<BlockSize> &stream,
                                     int maxprec,
                                     Int *iblock)
 {
+  // perform decorrelating transform
   transform<BlockSize> tform;
   tform.fwd_xform(iblock);
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_FIRST
+  // bias values to achieve proper rounding
+  fwd_round<Int, BlockSize>(iblock, maxprec);
+#endif
+
+  // reorder signed coefficients and convert to unsigned integer
   typedef typename zfp_traits<Int>::UInt UInt;
   UInt ublock[BlockSize]; 
   fwd_order<Int, UInt, BlockSize>(ublock, iblock);
 
-  uint intprec = CHAR_BIT * (uint)sizeof(UInt);
+  // encode integer coefficients
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
   uint kmin = intprec > maxprec ? intprec - maxprec : 0;
   uint bits = maxbits;
-  uint i, k, m, n;
-  uint64 x;
 
-  for (k = intprec, n = 0; bits && k-- > kmin;) {
-    /* step 1: extract bit plane #k to x */
-    x = 0;
-    for (i = 0; i < BlockSize; i++)
-    {
+  for (uint k = intprec, n = 0; bits && k-- > kmin;) {
+    // step 1: extract bit plane #k to x
+    uint64 x = 0;
+    for (uint i = 0; i < BlockSize; i++)
       x += (uint64)((ublock[i] >> k) & 1u) << i;
-    }
-    /* step 2: encode first n bits of bit plane */
-    m = min(n, bits);
-    //uint temp  = bits;
+    // step 2: encode first n bits of bit plane
+    uint m = min(n, bits);
     bits -= m;
     x = stream.write_bits(x, m);
-    
-    /* step 3: unary run-length encode remainder of bit plane */
+    // step 3: unary run-length encode remainder of bit plane
     for (; n < BlockSize && bits && (bits--, stream.write_bit(!!x)); x >>= 1, n++)
-    {
       for (; n < BlockSize - 1 && bits && (bits--, !stream.write_bit(x & 1u)); x >>= 1, n++)
-      {  
-      }
-    }
+        ;
   }
-  
 }
 
 template<typename Scalar, int BlockSize>
diff --git a/src/cuda_zfp/encode1.cuh b/src/cuda_zfp/encode1.cuh
index 9353f8c0..98ce5a75 100644
--- a/src/cuda_zfp/encode1.cuh
+++ b/src/cuda_zfp/encode1.cuh
@@ -17,8 +17,8 @@ __device__ __host__ inline
 void gather_partial1(Scalar* q, const Scalar* p, int nx, int sx)
 {
   uint x;
-  for (x = 0; x < nx; x++, p += sx)
-    q[x] = *p;
+  for (x = 0; x < 4; x++)
+    if (x < nx) q[x] = p[x * sx];
   pad_block(q, nx, 1);
 }
 
@@ -131,7 +131,7 @@ size_t encode1launch(uint dim,
   cudaEventRecord(start);
 #endif
 
-	cudaEncode1<Scalar> << <grid_size, block_size>> >
+  cudaEncode1<Scalar> <<<grid_size, block_size>>>
     (maxbits,
      d_data,
      stream,
@@ -145,9 +145,9 @@ size_t encode1launch(uint dim,
   cudaEventSynchronize(stop);
   cudaStreamSynchronize(0);
 
-  float miliseconds = 0.f;
-  cudaEventElapsedTime(&miliseconds, start, stop);
-  float seconds = miliseconds / 1000.f;
+  float milliseconds = 0.f;
+  cudaEventElapsedTime(&milliseconds, start, stop);
+  float seconds = milliseconds / 1000.f;
   float gb = (float(dim) * float(sizeof(Scalar))) / (1024.f * 1024.f * 1024.f);
   float rate = gb / seconds;
   printf("Encode elapsed time: %.5f (s)\n", seconds);
diff --git a/src/cuda_zfp/encode2.cuh b/src/cuda_zfp/encode2.cuh
index 7d9ebfe0..0d577d51 100644
--- a/src/cuda_zfp/encode2.cuh
+++ b/src/cuda_zfp/encode2.cuh
@@ -17,11 +17,16 @@ __device__ __host__ inline
 void gather_partial2(Scalar* q, const Scalar* p, int nx, int ny, int sx, int sy)
 {
   uint x, y;
-  for (y = 0; y < ny; y++, p += sy - nx * sx) {
-    for (x = 0; x < nx; x++, p += sx)
-      q[4 * y + x] = *p;
+  for (y = 0; y < 4; y++)
+    if (y < ny) {
+      for (x = 0; x < 4; x++)
+        if (x < nx) {
+          q[4 * y + x] = *p;//[x * sx];
+          p += sx;
+        }
       pad_block(q + 4 * y, nx, 1);
-  }
+      p += sy - nx * sx;
+    }
   for (x = 0; x < 4; x++)
     pad_block(q + x, ny, 4);
 }
@@ -143,7 +148,7 @@ size_t encode2launch(uint2 dims,
   cudaEventRecord(start);
 #endif
 
-	cudaEncode2<Scalar> << <grid_size, block_size>> >
+  cudaEncode2<Scalar> <<<grid_size, block_size>>>
     (maxbits,
      d_data,
      stream,
@@ -158,9 +163,9 @@ size_t encode2launch(uint2 dims,
   cudaEventSynchronize(stop);
   cudaStreamSynchronize(0);
 
-  float miliseconds = 0.f;
-  cudaEventElapsedTime(&miliseconds, start, stop);
-  float seconds = miliseconds / 1000.f;
+  float milliseconds = 0.f;
+  cudaEventElapsedTime(&milliseconds, start, stop);
+  float seconds = milliseconds / 1000.f;
   float mb = (float(dims.x * dims.y) * sizeof(Scalar)) / (1024.f * 1024.f *1024.f);
   float rate = mb / seconds;
   printf("Encode elapsed time: %.5f (s)\n", seconds);
diff --git a/src/cuda_zfp/encode3.cuh b/src/cuda_zfp/encode3.cuh
index 9fe7ddd2..1edee9e9 100644
--- a/src/cuda_zfp/encode3.cuh
+++ b/src/cuda_zfp/encode3.cuh
@@ -14,15 +14,22 @@ __device__ __host__ inline
 void gather_partial3(Scalar* q, const Scalar* p, int nx, int ny, int nz, int sx, int sy, int sz)
 {
   uint x, y, z;
-  for (z = 0; z < nz; z++, p += sz - ny * sy) {
-    for (y = 0; y < ny; y++, p += sy - nx * sx) {
-      for (x = 0; x < nx; x++, p += sx)
-        q[16 * z + 4 * y + x] = *p; 
-        pad_block(q + 16 * z + 4 * y, nx, 1);
+  for (z = 0; z < 4; z++)
+    if (z < nz) {
+      for (y = 0; y < 4; y++)
+        if (y < ny) {
+          for (x = 0; x < 4; x++)
+            if (x < nx) {
+              q[16 * z + 4 * y + x] = *p;
+              p += sx;
+          }
+          p += sy - nx * sx;
+          pad_block(q + 16 * z + 4 * y, nx, 1);
+        }
+      for (x = 0; x < 4; x++)
+        pad_block(q + 16 * z + x, ny, 4);
+      p += sz - ny * sy;
     }
-    for (x = 0; x < 4; x++)
-      pad_block(q + 16 * z + x, ny, 4);
-  }
   for (y = 0; y < 4; y++)
     for (x = 0; x < 4; x++)
       pad_block(q + 4 * y + x, nz, 16);
@@ -150,7 +157,7 @@ size_t encode3launch(uint3 dims,
   cudaEventRecord(start);
 #endif
 
-	cudaEncode<Scalar> << <grid_size, block_size>> >
+  cudaEncode<Scalar> <<<grid_size, block_size>>>
     (maxbits,
      d_data,
      stream,
@@ -164,9 +171,9 @@ size_t encode3launch(uint3 dims,
   cudaEventSynchronize(stop);
   cudaStreamSynchronize(0);
 
-  float miliseconds = 0;
-  cudaEventElapsedTime(&miliseconds, start, stop);
-  float seconds = miliseconds / 1000.f;
+  float milliseconds = 0;
+  cudaEventElapsedTime(&milliseconds, start, stop);
+  float seconds = milliseconds / 1000.f;
   float rate = (float(dims.x * dims.y * dims.z) * sizeof(Scalar) ) / seconds;
   rate /= 1024.f;
   rate /= 1024.f;
diff --git a/src/cuda_zfp/shared.h b/src/cuda_zfp/shared.h
index 52de03ad..27df25be 100644
--- a/src/cuda_zfp/shared.h
+++ b/src/cuda_zfp/shared.h
@@ -7,20 +7,17 @@ typedef unsigned long long Word;
 
 #include "type_info.cuh"
 #include "zfp.h"
+#include "constants.h"
 #include <stdio.h>
 
 #define MAX(x, y) ((x) > (y) ? (x) : (y))
 #define MIN(x, y) ((x) < (y) ? (x) : (y))
-#define bitsize(x) (CHAR_BIT * (uint)sizeof(x))
+#define bitsize(x) ((uint)(CHAR_BIT * sizeof(x)))
 
 #define LDEXP(x, e) ldexp(x, e)
 
 #define NBMASK 0xaaaaaaaaaaaaaaaaull
 
-__constant__ unsigned char c_perm_1[4];
-__constant__ unsigned char c_perm_2[16];
-__constant__ unsigned char c_perm[64];
-
 namespace cuZFP
 {
 
@@ -87,9 +84,17 @@ size_t calc_device_mem3d(const uint3 encoded_dims,
 
 dim3 get_max_grid_dims()
 {
-  cudaDeviceProp prop; 
-  int device = 0;
-  cudaGetDeviceProperties(&prop, device);
+  static cudaDeviceProp prop;
+  static bool firstTime = true;
+
+  if( firstTime )
+  {
+    firstTime = false;
+
+    int device = 0;
+    cudaGetDeviceProperties(&prop, device);
+  }
+
   dim3 grid_dims;
   grid_dims.x = prop.maxGridSize[0];
   grid_dims.y = prop.maxGridSize[1];
@@ -126,7 +131,7 @@ dim3 calculate_grid_size(size_t size, size_t cuda_block_size)
   if(dims == 2)
   {
     float sq_r = sqrt((float)grids);
-    float intpart = 0.;
+    float intpart = 0;
     modf(sq_r,&intpart); 
     uint base = intpart;
     grid_size.x = base; 
@@ -141,7 +146,7 @@ dim3 calculate_grid_size(size_t size, size_t cuda_block_size)
   if(dims == 3)
   {
     float cub_r = pow((float)grids, 1.f/3.f);;
-    float intpart = 0.;
+    float intpart = 0;
     modf(cub_r,&intpart); 
     int base = intpart;
     grid_size.x = base; 
@@ -185,7 +190,7 @@ __device__
 double
 dequantize<long long int, double>(const long long int &x, const int &e)
 {
-	return LDEXP((double)x, e - (CHAR_BIT * scalar_sizeof<double>() - 2));
+	return LDEXP((double)x, e - ((int)(CHAR_BIT * scalar_sizeof<double>()) - 2));
 }
 
 template<>
@@ -193,7 +198,7 @@ __device__
 float
 dequantize<int, float>(const int &x, const int &e)
 {
-	return LDEXP((float)x, e - (CHAR_BIT * scalar_sizeof<float>() - 2));
+	return LDEXP((float)x, e - ((int)(CHAR_BIT * scalar_sizeof<float>()) - 2));
 }
 
 template<>
@@ -245,28 +250,28 @@ inv_lift(Int* p)
 
 
 template<int BlockSize>
-__device__
-unsigned char* get_perm();
+__device__ inline
+const unsigned char* get_perm();
 
 template<>
-__device__
-unsigned char* get_perm<64>()
+__device__ inline
+const unsigned char* get_perm<64>()
 {
-  return c_perm;
+  return perm_3d;
 }
 
 template<>
-__device__
-unsigned char* get_perm<16>()
+__device__ inline
+const unsigned char* get_perm<16>()
 {
-  return c_perm_2;
+  return perm_2;
 }
 
 template<>
-__device__
-unsigned char* get_perm<4>()
+__device__ inline
+const unsigned char* get_perm<4>()
 {
-  return c_perm_1;
+  return perm_1;
 }
 
 
diff --git a/src/cuda_zfp/type_info.cuh b/src/cuda_zfp/type_info.cuh
index 969f5532..25d76922 100644
--- a/src/cuda_zfp/type_info.cuh
+++ b/src/cuda_zfp/type_info.cuh
@@ -1,6 +1,8 @@
 #ifndef cuZFP_TYPE_INFO
 #define cuZFP_TYPE_INFO
 
+#include <cfloat>
+
 namespace cuZFP {
 
 template<typename T> inline __host__ __device__ int get_ebias();
@@ -27,13 +29,22 @@ template<> inline __host__ __device__ int get_min_exp<float>() { return -1074; }
 template<> inline __host__ __device__ int get_min_exp<long long int>() { return 0; }
 template<> inline __host__ __device__ int get_min_exp<int>() { return 0; }
 
-template<typename T> inline __host__ __device__ int scalar_sizeof();
+template<typename T> inline __host__ __device__ T get_scalar_min();
+template<> inline __host__ __device__ float get_scalar_min<float>() { return FLT_MIN; }
+template<> inline __host__ __device__ double get_scalar_min<double>() { return DBL_MIN; }
+template<> inline __host__ __device__ long long int get_scalar_min<long long int>() { return 0; }
+template<> inline __host__ __device__ int get_scalar_min<int>() { return 0; }
 
+template<typename T> inline __host__ __device__ int scalar_sizeof();
 template<> inline __host__ __device__ int scalar_sizeof<double>() { return 8; }
 template<> inline __host__ __device__ int scalar_sizeof<long long int>() { return 8; }
 template<> inline __host__ __device__ int scalar_sizeof<float>() { return 4; }
 template<> inline __host__ __device__ int scalar_sizeof<int>() { return 4; }
 
+template<typename T> inline __host__ __device__ T get_nbmask();
+template<> inline __host__ __device__ unsigned int get_nbmask<unsigned int>() { return 0xaaaaaaaau; }
+template<> inline __host__ __device__ unsigned long long int get_nbmask<unsigned long long int>() { return 0xaaaaaaaaaaaaaaaaull; }
+
 template<typename T> struct zfp_traits;
 
 template<> struct zfp_traits<double>
@@ -75,6 +86,7 @@ template<> inline __host__ __device__ bool is_int<long long int>()
   return true;
 }
 
+#if 0
 template<int T> struct block_traits;
 
 template<> struct block_traits<1>
@@ -86,7 +98,8 @@ template<> struct block_traits<2>
 {
   typedef unsigned short PlaneType;
 };
-
+#endif
 
 } // namespace cuZFP
+
 #endif
diff --git a/src/decode1d.c b/src/decode1d.c
index 436515a9..b95995fa 100644
--- a/src/decode1d.c
+++ b/src/decode1d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec1.c"
 #include "template/decode.c"
diff --git a/src/decode1f.c b/src/decode1f.c
index 443b8522..f08119f7 100644
--- a/src/decode1f.c
+++ b/src/decode1f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec1.c"
 #include "template/decode.c"
diff --git a/src/decode1i.c b/src/decode1i.c
index 73f58e6c..b148641e 100644
--- a/src/decode1i.c
+++ b/src/decode1i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec1.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode1l.c b/src/decode1l.c
index cedcc532..d79e8e46 100644
--- a/src/decode1l.c
+++ b/src/decode1l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec1.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode2d.c b/src/decode2d.c
index 8c3a994d..d7f3a77c 100644
--- a/src/decode2d.c
+++ b/src/decode2d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec2.c"
 #include "template/decode.c"
diff --git a/src/decode2f.c b/src/decode2f.c
index 7b3c35cf..5d44e072 100644
--- a/src/decode2f.c
+++ b/src/decode2f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec2.c"
 #include "template/decode.c"
diff --git a/src/decode2i.c b/src/decode2i.c
index 70a4a5a2..579eaa82 100644
--- a/src/decode2i.c
+++ b/src/decode2i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec2.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode2l.c b/src/decode2l.c
index 93a2cf83..b4d871f5 100644
--- a/src/decode2l.c
+++ b/src/decode2l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec2.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode3d.c b/src/decode3d.c
index b8cb9d18..e9291aa4 100644
--- a/src/decode3d.c
+++ b/src/decode3d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec3.c"
 #include "template/decode.c"
diff --git a/src/decode3f.c b/src/decode3f.c
index 914c4999..cc517b13 100644
--- a/src/decode3f.c
+++ b/src/decode3f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec3.c"
 #include "template/decode.c"
diff --git a/src/decode3i.c b/src/decode3i.c
index 46af93e0..0eb05dea 100644
--- a/src/decode3i.c
+++ b/src/decode3i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec3.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode3l.c b/src/decode3l.c
index 1e76d171..d895d0e7 100644
--- a/src/decode3l.c
+++ b/src/decode3l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec3.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode4d.c b/src/decode4d.c
index ee5b31fc..38861b5d 100644
--- a/src/decode4d.c
+++ b/src/decode4d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec4.c"
 #include "template/decode.c"
diff --git a/src/decode4f.c b/src/decode4f.c
index 5eb3b900..7ef87f10 100644
--- a/src/decode4f.c
+++ b/src/decode4f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec4.c"
 #include "template/decode.c"
diff --git a/src/decode4i.c b/src/decode4i.c
index b871eba4..ade99493 100644
--- a/src/decode4i.c
+++ b/src/decode4i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec4.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/decode4l.c b/src/decode4l.c
index b37e47e1..bbbdefbb 100644
--- a/src/decode4l.c
+++ b/src/decode4l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec4.c"
 #include "template/decode.c"
 #include "template/decodei.c"
diff --git a/src/encode1d.c b/src/encode1d.c
index 84b9ac8c..43f5101c 100644
--- a/src/encode1d.c
+++ b/src/encode1d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec1.c"
 #include "template/encode.c"
diff --git a/src/encode1f.c b/src/encode1f.c
index a57a7cf7..ae509d53 100644
--- a/src/encode1f.c
+++ b/src/encode1f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec1.c"
 #include "template/encode.c"
diff --git a/src/encode1i.c b/src/encode1i.c
index dcd9aa64..ea3593cd 100644
--- a/src/encode1i.c
+++ b/src/encode1i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec1.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode1l.c b/src/encode1l.c
index 032c3de6..e9415e20 100644
--- a/src/encode1l.c
+++ b/src/encode1l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block1.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec1.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode2d.c b/src/encode2d.c
index 50e8dd83..8f445892 100644
--- a/src/encode2d.c
+++ b/src/encode2d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec2.c"
 #include "template/encode.c"
diff --git a/src/encode2f.c b/src/encode2f.c
index 713a74e4..814a18a2 100644
--- a/src/encode2f.c
+++ b/src/encode2f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec2.c"
 #include "template/encode.c"
diff --git a/src/encode2i.c b/src/encode2i.c
index d0b4b54c..8417031f 100644
--- a/src/encode2i.c
+++ b/src/encode2i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec2.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode2l.c b/src/encode2l.c
index d834cfa1..87f5a2f3 100644
--- a/src/encode2l.c
+++ b/src/encode2l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block2.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec2.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode3d.c b/src/encode3d.c
index 16c385e2..55f55d62 100644
--- a/src/encode3d.c
+++ b/src/encode3d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec3.c"
 #include "template/encode.c"
diff --git a/src/encode3f.c b/src/encode3f.c
index 1668aff8..de3bbaf4 100644
--- a/src/encode3f.c
+++ b/src/encode3f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec3.c"
 #include "template/encode.c"
diff --git a/src/encode3i.c b/src/encode3i.c
index c92a1a69..257a1ecd 100644
--- a/src/encode3i.c
+++ b/src/encode3i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec3.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode3l.c b/src/encode3l.c
index 4d53304e..c6269699 100644
--- a/src/encode3l.c
+++ b/src/encode3l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block3.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec3.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode4d.c b/src/encode4d.c
index c82d19a1..346f1747 100644
--- a/src/encode4d.c
+++ b/src/encode4d.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsd.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec4.c"
 #include "template/encode.c"
diff --git a/src/encode4f.c b/src/encode4f.c
index e0ce0146..b855262b 100644
--- a/src/encode4f.c
+++ b/src/encode4f.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsf.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codecf.c"
 #include "template/codec4.c"
 #include "template/encode.c"
diff --git a/src/encode4i.c b/src/encode4i.c
index ab82e0e2..5bed6cdf 100644
--- a/src/encode4i.c
+++ b/src/encode4i.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsi.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec4.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/encode4l.c b/src/encode4l.c
index 805ee01a..fd84e5a1 100644
--- a/src/encode4l.c
+++ b/src/encode4l.c
@@ -1,11 +1,12 @@
-#include "inline/inline.h"
+#include "zfp/internal/zfp/inline.h"
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 #include "block4.h"
 #include "traitsl.h"
 #include "template/template.h"
 #include "template/codec.h"
-#include "inline/bitstream.c"
+#include "zfp/bitstream.inl"
+#include "template/codec.c"
 #include "template/codec4.c"
 #include "template/encode.c"
 #include "template/encodei.c"
diff --git a/src/share/omp.c b/src/share/omp.c
index 9ee26b9a..02507e56 100644
--- a/src/share/omp.c
+++ b/src/share/omp.c
@@ -1,11 +1,13 @@
 #ifdef _OPENMP
+#include <limits.h>
 #include <omp.h>
+#include "zfp.h"
 
 /* number of omp threads to use */
-static int
+static uint
 thread_count_omp(const zfp_stream* stream)
 {
-  int count = stream->exec.params.omp.threads;
+  uint count = zfp_stream_omp_threads(stream);
   /* if no thread count is specified, use default number of threads */
   if (!count)
     count = omp_get_max_threads();
@@ -13,13 +15,17 @@ thread_count_omp(const zfp_stream* stream)
 }
 
 /* number of chunks to partition array into */
-static uint
-chunk_count_omp(const zfp_stream* stream, uint blocks, uint threads)
+static size_t
+chunk_count_omp(const zfp_stream* stream, size_t blocks, uint threads)
 {
-  uint chunk_size = stream->exec.params.omp.chunk_size;
+  size_t chunk_size = (size_t)zfp_stream_omp_chunk_size(stream);
   /* if no chunk size is specified, assign one chunk per thread */
-  uint chunks = chunk_size ? (blocks + chunk_size - 1) / chunk_size : threads;
-  return MIN(chunks, blocks);
+  size_t chunks = chunk_size ? (blocks + chunk_size - 1) / chunk_size : threads;
+  /* each chunk must contain at least one block */
+  chunks = MIN(chunks, blocks);
+  /* OpenMP 2.0 loop counters must be ints */
+  chunks = MIN(chunks, INT_MAX);
+  return chunks;
 }
 
 #endif
diff --git a/src/share/parallel.c b/src/share/parallel.c
index e778ac7c..1ae36526 100644
--- a/src/share/parallel.c
+++ b/src/share/parallel.c
@@ -1,41 +1,42 @@
 #ifdef _OPENMP
 
 /* block index at which chunk begins */
-static uint
-chunk_offset(uint blocks, uint chunks, uint chunk)
+static size_t
+chunk_offset(size_t blocks, size_t chunks, size_t chunk)
 {
-  return (uint)((blocks * (uint64)chunk) / chunks);
+  return (size_t)(((uint64)blocks * (uint64)chunk) / chunks);
 }
 
 /* initialize per-thread bit streams for parallel compression */
 static bitstream**
-compress_init_par(zfp_stream* stream, const zfp_field* field, uint chunks, uint blocks)
+compress_init_par(zfp_stream* stream, const zfp_field* field, size_t chunks, size_t blocks)
 {
   bitstream** bs;
+  zfp_bool copy;
+  size_t n = 4 * (blocks + chunks - 1) / chunks;
   size_t size;
-  int copy = 0;
-  uint i;
+  size_t chunk;
 
   /* determine maximum size buffer needed per thread */
   zfp_field f = *field;
   switch (zfp_field_dimensionality(field)) {
     case 1:
-      f.nx = 4 * (blocks + chunks - 1) / chunks;
+      f.nx = n;
       break;
     case 2:
       f.nx = 4;
-      f.ny = 4 * (blocks + chunks - 1) / chunks;
+      f.ny = n;
       break;
     case 3:
       f.nx = 4;
       f.ny = 4;
-      f.nz = 4 * (blocks + chunks - 1) / chunks;
+      f.nz = n;
       break;
     case 4:
       f.nx = 4;
       f.ny = 4;
       f.nz = 4;
-      f.nw = 4 * (blocks + chunks - 1) / chunks;
+      f.nw = n;
       break;
     default:
       return NULL;
@@ -43,27 +44,27 @@ compress_init_par(zfp_stream* stream, const zfp_field* field, uint chunks, uint
   size = zfp_stream_maximum_size(stream, &f);
 
   /* avoid copies in fixed-rate mode when each bitstream is word aligned */
-  copy |= stream->minbits != stream->maxbits;
-  copy |= (stream->maxbits % stream_word_bits) != 0;
-  copy |= (stream_wtell(stream->stream) % stream_word_bits) != 0;
+  copy = (stream->minbits != stream->maxbits) ||
+         (stream->maxbits % stream_word_bits != 0) ||
+         (stream_wtell(stream->stream) % stream_word_bits != 0);
 
   /* set up buffer for each thread to compress to */
   bs = (bitstream**)malloc(chunks * sizeof(bitstream*));
   if (!bs)
     return NULL;
-  for (i = 0; i < chunks; i++) {
-    uint block = chunk_offset(blocks, chunks, i);
-    void* buffer = copy ? malloc(size) : (uchar*)stream_data(stream->stream) + stream_size(stream->stream) + block * stream->maxbits / CHAR_BIT;
+  for (chunk = 0; chunk < chunks; chunk++) {
+    size_t block = chunk_offset(blocks, chunks, chunk);
+    void* buffer = copy ? malloc(size) : (uchar*)stream_data(stream->stream) + stream_size(stream->stream) + block * (stream->maxbits / CHAR_BIT);
     if (!buffer)
       break;
-    bs[i] = stream_open(buffer, size);
+    bs[chunk] = stream_open(buffer, size);
   }
 
   /* handle memory allocation failure */
-  if (copy && i < chunks) {
-    while (i--) {
-      free(stream_data(bs[i]));
-      stream_close(bs[i]);
+  if (copy && chunk < chunks) {
+    while (chunk--) {
+      free(stream_data(bs[chunk]));
+      stream_close(bs[chunk]);
     }
     free(bs);
     bs = NULL;
@@ -74,24 +75,27 @@ compress_init_par(zfp_stream* stream, const zfp_field* field, uint chunks, uint
 
 /* flush and concatenate bit streams if needed */
 static void
-compress_finish_par(zfp_stream* stream, bitstream** src, uint chunks)
+compress_finish_par(zfp_stream* stream, bitstream** src, size_t chunks)
 {
   bitstream* dst = zfp_stream_bit_stream(stream);
-  int copy = (stream_data(dst) != stream_data(*src));
-  size_t offset = stream_wtell(dst);
-  uint i;
-  for (i = 0; i < chunks; i++) {
-    size_t bits = stream_wtell(src[i]);
+  zfp_bool copy = (stream_data(dst) != stream_data(*src));
+  bitstream_offset offset = stream_wtell(dst);
+  size_t chunk;
+
+  /* flush each stream and concatenate if necessary */
+  for (chunk = 0; chunk < chunks; chunk++) {
+    bitstream_size bits = stream_wtell(src[chunk]);
     offset += bits;
-    stream_flush(src[i]);
+    stream_flush(src[chunk]);
     /* concatenate streams if they are not already contiguous */
     if (copy) {
-      stream_rewind(src[i]);
-      stream_copy(dst, src[i], bits);
-      free(stream_data(src[i]));
+      stream_rewind(src[chunk]);
+      stream_copy(dst, src[chunk], bits);
+      free(stream_data(src[chunk]));
     }
-    stream_close(src[i]);
+    stream_close(src[chunk]);
   }
+
   free(src);
   if (!copy)
     stream_wseek(dst, offset);
diff --git a/src/template/codec.c b/src/template/codec.c
new file mode 100644
index 00000000..539bca98
--- /dev/null
+++ b/src/template/codec.c
@@ -0,0 +1,6 @@
+/* true if max compressed size exceeds maxbits */
+static int
+with_maxbits(uint maxbits, uint maxprec, uint size)
+{
+  return (maxprec + 1) * size - 1 > maxbits;
+}
diff --git a/src/template/codecf.c b/src/template/codecf.c
index bc2cc808..50929fa7 100644
--- a/src/template/codecf.c
+++ b/src/template/codecf.c
@@ -5,14 +5,18 @@
 static uint
 precision(int maxexp, uint maxprec, int minexp, int dims)
 {
-  return MIN(maxprec, (uint)MAX(0, maxexp - minexp + 2 * (dims + 1)));
+#if (ZFP_ROUNDING_MODE != ZFP_ROUND_NEVER) && defined(ZFP_WITH_TIGHT_ERROR)
+  return MIN(maxprec, (uint)MAX(0, maxexp - minexp + 2 * dims + 1));
+#else
+  return MIN(maxprec, (uint)MAX(0, maxexp - minexp + 2 * dims + 2));
+#endif
 }
 
 /* map integer x relative to exponent e to floating-point number */
 static Scalar
 _t1(dequantize, Scalar)(Int x, int e)
 {
-  return LDEXP((Scalar)x, e - (CHAR_BIT * (int)sizeof(Scalar) - 2));
+  return LDEXP((Scalar)x, e - ((int)(CHAR_BIT * sizeof(Scalar)) - 2));
 }
 
 /* inverse block-floating-point transform from signed integers */
diff --git a/src/template/compress.c b/src/template/compress.c
index 3bef658d..74983c56 100644
--- a/src/template/compress.c
+++ b/src/template/compress.c
@@ -3,9 +3,9 @@ static void
 _t2(compress, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
 {
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
-  uint mx = nx & ~3u;
-  uint x;
+  size_t nx = field->nx;
+  size_t mx = nx & ~3u;
+  size_t x;
 
   /* compress array one block of 4 values at a time */
   for (x = 0; x < mx; x += 4, data += 4)
@@ -19,9 +19,9 @@ static void
 _t2(compress_strided, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
 {
   const Scalar* data = field->data;
-  uint nx = field->nx;
-  int sx = field->sx ? field->sx : 1;
-  uint x;
+  size_t nx = field->nx;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  size_t x;
 
   /* compress array one block of 4 values at a time */
   for (x = 0; x < nx; x += 4) {
@@ -38,11 +38,11 @@ static void
 _t2(compress_strided, Scalar, 2)(zfp_stream* stream, const zfp_field* field)
 {
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  uint x, y;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  size_t x, y;
 
   /* compress array one block of 4x4 values at a time */
   for (y = 0; y < ny; y += 4)
@@ -60,13 +60,13 @@ static void
 _t2(compress_strided, Scalar, 3)(zfp_stream* stream, const zfp_field* field)
 {
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  uint nz = field->nz;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  int sz = field->sz ? field->sz : (int)(nx * ny);
-  uint x, y, z;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(nx * ny);
+  size_t x, y, z;
 
   /* compress array one block of 4x4x4 values at a time */
   for (z = 0; z < nz; z += 4)
@@ -85,15 +85,15 @@ static void
 _t2(compress_strided, Scalar, 4)(zfp_stream* stream, const zfp_field* field)
 {
   const Scalar* data = field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  uint nz = field->nz;
-  uint nw = field->nw;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  int sz = field->sz ? field->sz : (int)(nx * ny);
-  int sw = field->sw ? field->sw : (int)(nx * ny * nz);
-  uint x, y, z, w;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+  size_t nw = field->nw;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(nx * ny);
+  ptrdiff_t sw = field->sw ? field->sw : (ptrdiff_t)(nx * ny * nz);
+  size_t x, y, z, w;
 
   /* compress array one block of 4x4x4x4 values at a time */
   for (w = 0; w < nw; w += 4)
diff --git a/src/template/cudacompress.c b/src/template/cudacompress.c
index 1d685c92..8249beb9 100644
--- a/src/template/cudacompress.c
+++ b/src/template/cudacompress.c
@@ -5,40 +5,32 @@
 static void 
 _t2(compress_cuda, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  { 
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_compress(stream, field);   
-  }
 }
 
 /* compress 1d strided array */
 static void 
 _t2(compress_strided_cuda, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_compress(stream, field);   
-  }
 }
 
 /* compress 2d strided array */
 static void 
 _t2(compress_strided_cuda, Scalar, 2)(zfp_stream* stream, const zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_compress(stream, field);   
-  }
 }
 
 /* compress 3d strided array */
 static void
 _t2(compress_strided_cuda, Scalar, 3)(zfp_stream* stream, const zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_compress(stream, field);   
-  }
 }
 
 #endif
diff --git a/src/template/cudadecompress.c b/src/template/cudadecompress.c
index 4ea4e5bf..1dc918a6 100644
--- a/src/template/cudadecompress.c
+++ b/src/template/cudadecompress.c
@@ -5,40 +5,32 @@
 static void
 _t2(decompress_cuda, Scalar, 1)(zfp_stream* stream, zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_decompress(stream, field);   
-  }
 }
 
 /* compress 1d strided array */
 static void
 _t2(decompress_strided_cuda, Scalar, 1)(zfp_stream* stream, zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_decompress(stream, field);   
-  }
 }
 
 /* compress 2d strided array */
 static void
 _t2(decompress_strided_cuda, Scalar, 2)(zfp_stream* stream, zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_decompress(stream, field);   
-  }
 }
 
 /* compress 3d strided array */
 static void
 _t2(decompress_strided_cuda, Scalar, 3)(zfp_stream* stream, zfp_field* field)
 {
-  if(zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
-  {
+  if (zfp_stream_compression_mode(stream) == zfp_mode_fixed_rate)
     cuda_decompress(stream, field);   
-  }
 }
 
 #endif
diff --git a/src/template/decode.c b/src/template/decode.c
index e2a2f276..2f39bebd 100644
--- a/src/template/decode.c
+++ b/src/template/decode.c
@@ -6,7 +6,7 @@ static void _t2(inv_xform, Int, DIMS)(Int* p);
 
 /* inverse lifting transform of 4-vector */
 static void
-_t1(inv_lift, Int)(Int* p, uint s)
+_t1(inv_lift, Int)(Int* p, ptrdiff_t s)
 {
   Int x, y, z, w;
   x = *p; p += s;
@@ -33,6 +33,21 @@ _t1(inv_lift, Int)(Int* p, uint s)
   p -= s; *p = x;
 }
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+/* bias values such that truncation is equivalent to round to nearest */
+static void
+_t1(inv_round, UInt)(UInt* ublock, uint n, uint m, uint prec)
+{
+  /* add 1/6 ulp to unbias errors */
+  if (prec < (uint)(CHAR_BIT * sizeof(UInt) - 1)) {
+    /* the first m values (0 <= m <= n) have one more bit of precision */
+    n -= m;
+    while (m--) *ublock++ += ((NBMASK >> 2) >> prec);
+    while (n--) *ublock++ += ((NBMASK >> 1) >> prec);
+  }
+}
+#endif
+
 /* map two's complement signed integer to negabinary unsigned integer */
 static Int
 _t1(uint2int, UInt)(UInt x)
@@ -49,13 +64,13 @@ _t1(inv_order, Int)(const UInt* ublock, Int* iblock, const uchar* perm, uint n)
   while (--n);
 }
 
-/* decompress sequence of size unsigned integers */
+/* decompress sequence of size <= 64 unsigned integers */
 static uint
-_t1(decode_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec, UInt* restrict_ data, uint size)
+_t1(decode_few_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec, UInt* restrict_ data, uint size)
 {
   /* make a copy of bit stream to avoid aliasing */
   bitstream s = *stream;
-  uint intprec = CHAR_BIT * (uint)sizeof(UInt);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
   uint kmin = intprec > maxprec ? intprec - maxprec : 0;
   uint bits = maxbits;
   uint i, k, m, n;
@@ -66,20 +81,40 @@ _t1(decode_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec,
     data[i] = 0;
 
   /* decode one bit plane at a time from MSB to LSB */
-  for (k = intprec, n = 0; bits && k-- > kmin;) {
-    /* decode first n bits of bit plane #k */
+  for (k = intprec, m = n = 0; bits && (m = 0, k-- > kmin);) {
+    /* step 1: decode first n bits of bit plane #k */
     m = MIN(n, bits);
     bits -= m;
     x = stream_read_bits(&s, m);
-    /* unary run-length decode remainder of bit plane */
-    for (; n < size && bits && (bits--, stream_read_bit(&s)); x += (uint64)1 << n++)
-      for (; n < size - 1 && bits && (bits--, !stream_read_bit(&s)); n++)
-        ;
-    /* deposit bit plane from x */
+    /* step 2: unary run-length decode remainder of bit plane */
+    for (; bits && n < size; n++, m = n) {
+      bits--;
+      if (stream_read_bit(&s)) {
+        /* positive group test; scan for next one-bit */
+        for (; bits && n < size - 1; n++) {
+          bits--;
+          if (stream_read_bit(&s))
+            break;
+        }
+        /* set bit and continue decoding bit plane */
+        x += (uint64)1 << n;
+      }
+      else {
+        /* negative group test; done with bit plane */
+        m = size;
+        break;
+      }
+    }
+    /* step 3: deposit bit plane from x */
     for (i = 0; x; i++, x >>= 1)
       data[i] += (UInt)(x & 1u) << k;
   }
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+  /* bias values to achieve proper rounding */
+  _t1(inv_round, UInt)(data, size, m, intprec - k);
+#endif
+
   *stream = s;
   return maxbits - bits;
 }
@@ -90,7 +125,7 @@ _t1(decode_many_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxp
 {
   /* make a copy of bit stream to avoid aliasing */
   bitstream s = *stream;
-  uint intprec = CHAR_BIT * (uint)sizeof(UInt);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
   uint kmin = intprec > maxprec ? intprec - maxprec : 0;
   uint bits = maxbits;
   uint i, k, m, n;
@@ -100,34 +135,145 @@ _t1(decode_many_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxp
     data[i] = 0;
 
   /* decode one bit plane at a time from MSB to LSB */
-  for (k = intprec, n = 0; bits && k-- > kmin;) {
-    /* decode first n bits of bit plane #k */
+  for (k = intprec, m = n = 0; bits && (m = 0, k-- > kmin);) {
+    /* step 1: decode first n bits of bit plane #k */
     m = MIN(n, bits);
     bits -= m;
     for (i = 0; i < m; i++)
       if (stream_read_bit(&s))
         data[i] += (UInt)1 << k;
-    /* unary run-length decode remainder of bit plane */
-    for (; n < size && bits && (--bits, stream_read_bit(&s)); data[n] += (UInt)1 << k, n++)
-      for (; n < size - 1 && bits && (--bits, !stream_read_bit(&s)); n++)
-        ;
+    /* step 2: unary run-length decode remainder of bit plane */
+    for (; bits && n < size; n++, m = n) {
+      bits--;
+      if (stream_read_bit(&s)) {
+        /* positive group test; scan for next one-bit */
+        for (; bits && n < size - 1; n++) {
+          bits--;
+          if (stream_read_bit(&s))
+            break;
+        }
+        /* set bit and continue decoding bit plane */
+        data[n] += (UInt)1 << k;
+      }
+      else {
+        /* negative group test; done with bit plane */
+        m = size;
+        break;
+      }
+    }
   }
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+  /* bias values to achieve proper rounding */
+  _t1(inv_round, UInt)(data, size, m, intprec - k);
+#endif
+
   *stream = s;
   return maxbits - bits;
 }
 
+/* decompress sequence of size <= 64 unsigned integers with no rate constraint */
+static uint
+_t1(decode_few_ints_prec, UInt)(bitstream* restrict_ stream, uint maxprec, UInt* restrict_ data, uint size)
+{
+  /* make a copy of bit stream to avoid aliasing */
+  bitstream s = *stream;
+  bitstream_offset offset = stream_rtell(&s);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
+  uint kmin = intprec > maxprec ? intprec - maxprec : 0;
+  uint i, k, n;
+
+  /* initialize data array to all zeros */
+  for (i = 0; i < size; i++)
+    data[i] = 0;
+
+  /* decode one bit plane at a time from MSB to LSB */
+  for (k = intprec, n = 0; k-- > kmin;) {
+    /* step 1: decode first n bits of bit plane #k */
+    uint64 x = stream_read_bits(&s, n);
+    /* step 2: unary run-length decode remainder of bit plane */
+    for (; n < size && stream_read_bit(&s); x += (uint64)1 << n, n++)
+      for (; n < size - 1 && !stream_read_bit(&s); n++)
+        ;
+    /* step 3: deposit bit plane from x */
+    for (i = 0; x; i++, x >>= 1)
+      data[i] += (UInt)(x & 1u) << k;
+  }
+
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+  /* bias values to achieve proper rounding */
+  _t1(inv_round, UInt)(data, size, 0, intprec - k);
+#endif
+
+  *stream = s;
+  return (uint)(stream_rtell(&s) - offset);
+}
+
+/* decompress sequence of size > 64 unsigned integers with no rate constraint */
+static uint
+_t1(decode_many_ints_prec, UInt)(bitstream* restrict_ stream, uint maxprec, UInt* restrict_ data, uint size)
+{
+  /* make a copy of bit stream to avoid aliasing */
+  bitstream s = *stream;
+  bitstream_offset offset = stream_rtell(&s);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
+  uint kmin = intprec > maxprec ? intprec - maxprec : 0;
+  uint i, k, n;
+
+  /* initialize data array to all zeros */
+  for (i = 0; i < size; i++)
+    data[i] = 0;
+
+  /* decode one bit plane at a time from MSB to LSB */
+  for (k = intprec, n = 0; k-- > kmin;) {
+    /* step 1: decode first n bits of bit plane #k */
+    for (i = 0; i < n; i++)
+      if (stream_read_bit(&s))
+        data[i] += (UInt)1 << k;
+    /* step 2: unary run-length decode remainder of bit plane */
+    for (; n < size && stream_read_bit(&s); data[n] += (UInt)1 << k, n++)
+      for (; n < size - 1 && !stream_read_bit(&s); n++)
+        ;
+  }
+
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_LAST
+  /* bias values to achieve proper rounding */
+  _t1(inv_round, UInt)(data, size, 0, intprec - k);
+#endif
+
+  *stream = s;
+  return (uint)(stream_rtell(&s) - offset);
+}
+
+/* decompress sequence of size unsigned integers */
+static uint
+_t1(decode_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec, UInt* restrict_ data, uint size)
+{
+  /* use fastest available decoder implementation */
+  if (with_maxbits(maxbits, maxprec, size)) {
+    /* rate constrained path: decode partial bit planes */
+    if (size <= 64)
+      return _t1(decode_few_ints, UInt)(stream, maxbits, maxprec, data, size); /* 1D, 2D, 3D blocks */
+    else
+      return _t1(decode_many_ints, UInt)(stream, maxbits, maxprec, data, size); /* 4D blocks */
+  }
+  else {
+    /* variable-rate path: decode whole bit planes */
+    if (size <= 64)
+      return _t1(decode_few_ints_prec, UInt)(stream, maxprec, data, size); /* 1D, 2D, 3D blocks */
+    else
+      return _t1(decode_many_ints_prec, UInt)(stream, maxprec, data, size); /* 4D blocks */
+  }
+}
+
 /* decode block of integers */
 static uint
-_t2(decode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, int maxprec, Int* iblock)
+_t2(decode_block, Int, DIMS)(bitstream* stream, uint minbits, uint maxbits, uint maxprec, Int* iblock)
 {
-  int bits;
+  uint bits;
   cache_align_(UInt ublock[BLOCK_SIZE]);
   /* decode integer coefficients */
-  if (BLOCK_SIZE <= 64)
-    bits = _t1(decode_ints, UInt)(stream, maxbits, maxprec, ublock, BLOCK_SIZE);
-  else
-    bits = _t1(decode_many_ints, UInt)(stream, maxbits, maxprec, ublock, BLOCK_SIZE);
+  bits = _t1(decode_ints, UInt)(stream, maxbits, maxprec, ublock, BLOCK_SIZE);
   /* read at least minbits bits */
   if (bits < minbits) {
     stream_skip(stream, minbits - bits);
diff --git a/src/template/decode1.c b/src/template/decode1.c
index 68ee0793..76444d86 100644
--- a/src/template/decode1.c
+++ b/src/template/decode1.c
@@ -2,7 +2,7 @@
 
 /* scatter 4-value block to strided array */
 static void
-_t2(scatter, Scalar, 1)(const Scalar* q, Scalar* p, int sx)
+_t2(scatter, Scalar, 1)(const Scalar* q, Scalar* p, ptrdiff_t sx)
 {
   uint x;
   for (x = 0; x < 4; x++, p += sx)
@@ -11,11 +11,11 @@ _t2(scatter, Scalar, 1)(const Scalar* q, Scalar* p, int sx)
 
 /* scatter nx-value block to strided array */
 static void
-_t2(scatter_partial, Scalar, 1)(const Scalar* q, Scalar* p, uint nx, int sx)
+_t2(scatter_partial, Scalar, 1)(const Scalar* q, Scalar* p, size_t nx, ptrdiff_t sx)
 {
-  uint x;
+  size_t x;
   for (x = 0; x < nx; x++, p += sx)
-   *p = *q++;
+    *p = *q++;
 }
 
 /* inverse decorrelating 1D transform */
@@ -28,26 +28,26 @@ _t2(inv_xform, Int, 1)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* decode 4-value floating-point block and store at p using stride sx */
-uint
-_t2(zfp_decode_block_strided, Scalar, 1)(zfp_stream* stream, Scalar* p, int sx)
+/* decode 4-value block and store at p using stride sx */
+size_t
+_t2(zfp_decode_block_strided, Scalar, 1)(zfp_stream* stream, Scalar* p, ptrdiff_t sx)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[4]);
-  uint bits = _t2(zfp_decode_block, Scalar, 1)(stream, fblock);
+  cache_align_(Scalar block[4]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 1)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter, Scalar, 1)(fblock, p, sx);
+  _t2(scatter, Scalar, 1)(block, p, sx);
   return bits;
 }
 
-/* decode nx-value floating-point block and store at p using stride sx */
-uint
-_t2(zfp_decode_partial_block_strided, Scalar, 1)(zfp_stream* stream, Scalar* p, uint nx, int sx)
+/* decode nx-value block and store at p using stride sx */
+size_t
+_t2(zfp_decode_partial_block_strided, Scalar, 1)(zfp_stream* stream, Scalar* p, size_t nx, ptrdiff_t sx)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[4]);
-  uint bits = _t2(zfp_decode_block, Scalar, 1)(stream, fblock);
+  cache_align_(Scalar block[4]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 1)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter_partial, Scalar, 1)(fblock, p, nx, sx);
+  _t2(scatter_partial, Scalar, 1)(block, p, nx, sx);
   return bits;
 }
diff --git a/src/template/decode2.c b/src/template/decode2.c
index 23e1892c..4d3d5bc2 100644
--- a/src/template/decode2.c
+++ b/src/template/decode2.c
@@ -2,7 +2,7 @@
 
 /* scatter 4*4 block to strided array */
 static void
-_t2(scatter, Scalar, 2)(const Scalar* q, Scalar* p, int sx, int sy)
+_t2(scatter, Scalar, 2)(const Scalar* q, Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
 {
   uint x, y;
   for (y = 0; y < 4; y++, p += sy - 4 * sx)
@@ -12,9 +12,9 @@ _t2(scatter, Scalar, 2)(const Scalar* q, Scalar* p, int sx, int sy)
 
 /* scatter nx*ny block to strided array */
 static void
-_t2(scatter_partial, Scalar, 2)(const Scalar* q, Scalar* p, uint nx, uint ny, int sx, int sy)
+_t2(scatter_partial, Scalar, 2)(const Scalar* q, Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
 {
-  uint x, y;
+  size_t x, y;
   for (y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 4 - nx)
     for (x = 0; x < nx; x++, p += sx, q++)
       *p = *q;
@@ -35,26 +35,26 @@ _t2(inv_xform, Int, 2)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* decode 4*4 floating-point block and store at p using strides (sx, sy) */
-uint
-_t2(zfp_decode_block_strided, Scalar, 2)(zfp_stream* stream, Scalar* p, int sx, int sy)
+/* decode 4*4 block and store at p using strides (sx, sy) */
+size_t
+_t2(zfp_decode_block_strided, Scalar, 2)(zfp_stream* stream, Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[16]);
-  uint bits = _t2(zfp_decode_block, Scalar, 2)(stream, fblock);
+  cache_align_(Scalar block[16]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 2)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter, Scalar, 2)(fblock, p, sx, sy);
+  _t2(scatter, Scalar, 2)(block, p, sx, sy);
   return bits;
 }
 
-/* decode nx*ny floating-point block and store at p using strides (sx, sy) */
-uint
-_t2(zfp_decode_partial_block_strided, Scalar, 2)(zfp_stream* stream, Scalar* p, uint nx, uint ny, int sx, int sy)
+/* decode nx*ny block and store at p using strides (sx, sy) */
+size_t
+_t2(zfp_decode_partial_block_strided, Scalar, 2)(zfp_stream* stream, Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[16]);
-  uint bits = _t2(zfp_decode_block, Scalar, 2)(stream, fblock);
+  cache_align_(Scalar block[16]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 2)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter_partial, Scalar, 2)(fblock, p, nx, ny, sx, sy);
+  _t2(scatter_partial, Scalar, 2)(block, p, nx, ny, sx, sy);
   return bits;
 }
diff --git a/src/template/decode3.c b/src/template/decode3.c
index b4841182..c9232fcd 100644
--- a/src/template/decode3.c
+++ b/src/template/decode3.c
@@ -2,7 +2,7 @@
 
 /* scatter 4*4*4 block to strided array */
 static void
-_t2(scatter, Scalar, 3)(const Scalar* q, Scalar* p, int sx, int sy, int sz)
+_t2(scatter, Scalar, 3)(const Scalar* q, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   uint x, y, z;
   for (z = 0; z < 4; z++, p += sz - 4 * sy)
@@ -13,9 +13,9 @@ _t2(scatter, Scalar, 3)(const Scalar* q, Scalar* p, int sx, int sy, int sz)
 
 /* scatter nx*ny*nz block to strided array */
 static void
-_t2(scatter_partial, Scalar, 3)(const Scalar* q, Scalar* p, uint nx, uint ny, uint nz, int sx, int sy, int sz)
+_t2(scatter_partial, Scalar, 3)(const Scalar* q, Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
-  uint x, y, z;
+  size_t x, y, z;
   for (z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 4 * (4 - ny))
     for (y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 1 * (4 - nx))
       for (x = 0; x < nx; x++, p += sx, q++)
@@ -43,26 +43,26 @@ _t2(inv_xform, Int, 3)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* decode 4*4*4 floating-point block and store at p using strides (sx, sy, sz) */
-uint
-_t2(zfp_decode_block_strided, Scalar, 3)(zfp_stream* stream, Scalar* p, int sx, int sy, int sz)
+/* decode 4*4*4 block and store at p using strides (sx, sy, sz) */
+size_t
+_t2(zfp_decode_block_strided, Scalar, 3)(zfp_stream* stream, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[64]);
-  uint bits = _t2(zfp_decode_block, Scalar, 3)(stream, fblock);
+  cache_align_(Scalar block[64]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 3)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter, Scalar, 3)(fblock, p, sx, sy, sz);
+  _t2(scatter, Scalar, 3)(block, p, sx, sy, sz);
   return bits;
 }
 
-/* decode nx*ny*nz floating-point block and store at p using strides (sx, sy, sz) */
-uint
-_t2(zfp_decode_partial_block_strided, Scalar, 3)(zfp_stream* stream, Scalar* p, uint nx, uint ny, uint nz, int sx, int sy, int sz)
+/* decode nx*ny*nz block and store at p using strides (sx, sy, sz) */
+size_t
+_t2(zfp_decode_partial_block_strided, Scalar, 3)(zfp_stream* stream, Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[64]);
-  uint bits = _t2(zfp_decode_block, Scalar, 3)(stream, fblock);
+  cache_align_(Scalar block[64]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 3)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter_partial, Scalar, 3)(fblock, p, nx, ny, nz, sx, sy, sz);
+  _t2(scatter_partial, Scalar, 3)(block, p, nx, ny, nz, sx, sy, sz);
   return bits;
 }
diff --git a/src/template/decode4.c b/src/template/decode4.c
index 8d34abfc..3274b429 100644
--- a/src/template/decode4.c
+++ b/src/template/decode4.c
@@ -2,7 +2,7 @@
 
 /* scatter 4*4*4*4 block to strided array */
 static void
-_t2(scatter, Scalar, 4)(const Scalar* q, Scalar* p, int sx, int sy, int sz, int sw)
+_t2(scatter, Scalar, 4)(const Scalar* q, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   uint x, y, z, w;
   for (w = 0; w < 4; w++, p += sw - 4 * sz)
@@ -14,9 +14,9 @@ _t2(scatter, Scalar, 4)(const Scalar* q, Scalar* p, int sx, int sy, int sz, int
 
 /* scatter nx*ny*nz*nw block to strided array */
 static void
-_t2(scatter_partial, Scalar, 4)(const Scalar* q, Scalar* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw)
+_t2(scatter_partial, Scalar, 4)(const Scalar* q, Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
-  uint x, y, z, w;
+  size_t x, y, z, w;
   for (w = 0; w < nw; w++, p += sw - (ptrdiff_t)nz * sz, q += 16 * (4 - nz))
     for (z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy, q += 4 * (4 - ny))
       for (y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx, q += 1 * (4 - nx))
@@ -53,26 +53,26 @@ _t2(inv_xform, Int, 4)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* decode 4*4*4*4 floating-point block and store at p using strides (sx, sy, sz, sw) */
-uint
-_t2(zfp_decode_block_strided, Scalar, 4)(zfp_stream* stream, Scalar* p, int sx, int sy, int sz, int sw)
+/* decode 4*4*4*4 block and store at p using strides (sx, sy, sz, sw) */
+size_t
+_t2(zfp_decode_block_strided, Scalar, 4)(zfp_stream* stream, Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[256]);
-  uint bits = _t2(zfp_decode_block, Scalar, 4)(stream, fblock);
+  cache_align_(Scalar block[256]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 4)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter, Scalar, 4)(fblock, p, sx, sy, sz, sw);
+  _t2(scatter, Scalar, 4)(block, p, sx, sy, sz, sw);
   return bits;
 }
 
-/* decode nx*ny*nz*nw floating-point block and store at p using strides (sx, sy, sz, sw) */
-uint
-_t2(zfp_decode_partial_block_strided, Scalar, 4)(zfp_stream* stream, Scalar* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw)
+/* decode nx*ny*nz*nw block and store at p using strides (sx, sy, sz, sw) */
+size_t
+_t2(zfp_decode_partial_block_strided, Scalar, 4)(zfp_stream* stream, Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   /* decode contiguous block */
-  cache_align_(Scalar fblock[256]);
-  uint bits = _t2(zfp_decode_block, Scalar, 4)(stream, fblock);
+  cache_align_(Scalar block[256]);
+  size_t bits = _t2(zfp_decode_block, Scalar, 4)(stream, block);
   /* scatter block to strided array */
-  _t2(scatter_partial, Scalar, 4)(fblock, p, nx, ny, nz, nw, sx, sy, sz, sw);
+  _t2(scatter_partial, Scalar, 4)(block, p, nx, ny, nz, nw, sx, sy, sz, sw);
   return bits;
 }
diff --git a/src/template/decodef.c b/src/template/decodef.c
index 5df15638..e8fa40c8 100644
--- a/src/template/decodef.c
+++ b/src/template/decodef.c
@@ -10,12 +10,14 @@ _t2(decode_block, Scalar, DIMS)(zfp_stream* zfp, Scalar* fblock)
   /* test if block has nonzero values */
   if (stream_read_bit(zfp->stream)) {
     cache_align_(Int iblock[BLOCK_SIZE]);
+    uint maxprec;
+    int emax;
     /* decode common exponent */
     bits += EBITS;
-    int emax = (int)stream_read_bits(zfp->stream, EBITS) - EBIAS;
-    int maxprec = precision(emax, zfp->maxprec, zfp->minexp, DIMS);
+    emax = (int)stream_read_bits(zfp->stream, EBITS) - EBIAS;
+    maxprec = precision(emax, zfp->maxprec, zfp->minexp, DIMS);
     /* decode integer block */
-    bits += _t2(decode_block, Int, DIMS)(zfp->stream, zfp->minbits - bits, zfp->maxbits - bits, maxprec, iblock);
+    bits += _t2(decode_block, Int, DIMS)(zfp->stream, zfp->minbits - MIN(bits, zfp->minbits), zfp->maxbits - bits, maxprec, iblock);
     /* perform inverse block-floating-point transform */
     _t1(inv_cast, Scalar)(iblock, fblock, BLOCK_SIZE, emax);
   }
@@ -35,7 +37,7 @@ _t2(decode_block, Scalar, DIMS)(zfp_stream* zfp, Scalar* fblock)
 /* public functions -------------------------------------------------------- */
 
 /* decode contiguous floating-point block */
-uint
+size_t
 _t2(zfp_decode_block, Scalar, DIMS)(zfp_stream* zfp, Scalar* fblock)
 {
   return REVERSIBLE(zfp) ? _t2(rev_decode_block, Scalar, DIMS)(zfp, fblock) : _t2(decode_block, Scalar, DIMS)(zfp, fblock);
diff --git a/src/template/decodei.c b/src/template/decodei.c
index 12f62a98..3cea9651 100644
--- a/src/template/decodei.c
+++ b/src/template/decodei.c
@@ -1,9 +1,9 @@
-static uint _t2(rev_decode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, Int* iblock);
+static uint _t2(rev_decode_block, Int, DIMS)(bitstream* stream, uint minbits, uint maxbits, Int* iblock);
 
 /* public functions -------------------------------------------------------- */
 
 /* decode contiguous integer block */
-uint
+size_t
 _t2(zfp_decode_block, Int, DIMS)(zfp_stream* zfp, Int* iblock)
 {
   return REVERSIBLE(zfp) ? _t2(rev_decode_block, Int, DIMS)(zfp->stream, zfp->minbits, zfp->maxbits, iblock) : _t2(decode_block, Int, DIMS)(zfp->stream, zfp->minbits, zfp->maxbits, zfp->maxprec, iblock);
diff --git a/src/template/decompress.c b/src/template/decompress.c
index 22313f81..72610773 100644
--- a/src/template/decompress.c
+++ b/src/template/decompress.c
@@ -3,9 +3,9 @@ static void
 _t2(decompress, Scalar, 1)(zfp_stream* stream, zfp_field* field)
 {
   Scalar* data = (Scalar*)field->data;
-  uint nx = field->nx;
-  uint mx = nx & ~3u;
-  uint x;
+  size_t nx = field->nx;
+  size_t mx = nx & ~3u;
+  size_t x;
 
   /* decompress array one block of 4 values at a time */
   for (x = 0; x < mx; x += 4, data += 4)
@@ -19,9 +19,9 @@ static void
 _t2(decompress_strided, Scalar, 1)(zfp_stream* stream, zfp_field* field)
 {
   Scalar* data = field->data;
-  uint nx = field->nx;
-  int sx = field->sx ? field->sx : 1;
-  uint x;
+  size_t nx = field->nx;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  size_t x;
 
   /* decompress array one block of 4 values at a time */
   for (x = 0; x < nx; x += 4) {
@@ -38,11 +38,11 @@ static void
 _t2(decompress_strided, Scalar, 2)(zfp_stream* stream, zfp_field* field)
 {
   Scalar* data = (Scalar*)field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  uint x, y;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  size_t x, y;
 
   /* decompress array one block of 4x4 values at a time */
   for (y = 0; y < ny; y += 4)
@@ -60,13 +60,13 @@ static void
 _t2(decompress_strided, Scalar, 3)(zfp_stream* stream, zfp_field* field)
 {
   Scalar* data = (Scalar*)field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  uint nz = field->nz;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  int sz = field->sz ? field->sz : (int)(nx * ny);
-  uint x, y, z;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(nx * ny);
+  size_t x, y, z;
 
   /* decompress array one block of 4x4x4 values at a time */
   for (z = 0; z < nz; z += 4)
@@ -85,15 +85,15 @@ static void
 _t2(decompress_strided, Scalar, 4)(zfp_stream* stream, zfp_field* field)
 {
   Scalar* data = field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  uint nz = field->nz;
-  uint nw = field->nw;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  int sz = field->sz ? field->sz : (int)(nx * ny);
-  int sw = field->sw ? field->sw : (int)(nx * ny * nz);
-  uint x, y, z, w;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+  size_t nw = field->nw;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(nx * ny);
+  ptrdiff_t sw = field->sw ? field->sw : (ptrdiff_t)(nx * ny * nz);
+  size_t x, y, z, w;
 
   /* decompress array one block of 4x4x4x4 values at a time */
   for (w = 0; w < nw; w += 4)
diff --git a/src/template/encode.c b/src/template/encode.c
index bba18f60..c085a4ab 100644
--- a/src/template/encode.c
+++ b/src/template/encode.c
@@ -6,7 +6,7 @@ static void _t2(fwd_xform, Int, DIMS)(Int* p);
 
 /* pad partial block of width n <= 4 and stride s */
 static void
-_t1(pad_block, Scalar)(Scalar* p, uint n, uint s)
+_t1(pad_block, Scalar)(Scalar* p, size_t n, ptrdiff_t s)
 {
   switch (n) {
     case 0:
@@ -28,7 +28,7 @@ _t1(pad_block, Scalar)(Scalar* p, uint n, uint s)
 
 /* forward lifting transform of 4-vector */
 static void
-_t1(fwd_lift, Int)(Int* p, uint s)
+_t1(fwd_lift, Int)(Int* p, ptrdiff_t s)
 {
   Int x, y, z, w;
   x = *p; p += s;
@@ -55,6 +55,22 @@ _t1(fwd_lift, Int)(Int* p, uint s)
   p -= s; *p = x;
 }
 
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_FIRST
+/* bias values such that truncation is equivalent to round to nearest */
+static void
+_t1(fwd_round, Int)(Int* iblock, uint n, uint maxprec)
+{
+  /* add or subtract 1/6 ulp to unbias errors */
+  if (maxprec < (uint)(CHAR_BIT * sizeof(Int))) {
+    Int bias = (NBMASK >> 2) >> maxprec;
+    if (maxprec & 1u)
+      do *iblock++ += bias; while (--n);
+    else
+      do *iblock++ -= bias; while (--n);
+  }
+}
+#endif
+
 /* map two's complement signed integer to negabinary unsigned integer */
 static UInt
 _t1(int2uint, Int)(Int x)
@@ -71,13 +87,13 @@ _t1(fwd_order, Int)(UInt* ublock, const Int* iblock, const uchar* perm, uint n)
   while (--n);
 }
 
-/* compress sequence of size unsigned integers */
+/* compress sequence of size <= 64 unsigned integers */
 static uint
-_t1(encode_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec, const UInt* restrict_ data, uint size)
+_t1(encode_few_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec, const UInt* restrict_ data, uint size)
 {
   /* make a copy of bit stream to avoid aliasing */
   bitstream s = *stream;
-  uint intprec = CHAR_BIT * (uint)sizeof(UInt);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
   uint kmin = intprec > maxprec ? intprec - maxprec : 0;
   uint bits = maxbits;
   uint i, k, m, n;
@@ -94,9 +110,21 @@ _t1(encode_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec,
     bits -= m;
     x = stream_write_bits(&s, x, m);
     /* step 3: unary run-length encode remainder of bit plane */
-    for (; n < size && bits && (bits--, stream_write_bit(&s, !!x)); x >>= 1, n++)
-      for (; n < size - 1 && bits && (bits--, !stream_write_bit(&s, x & 1u)); x >>= 1, n++)
-        ;
+    for (; bits && n < size; x >>= 1, n++) {
+      bits--;
+      if (stream_write_bit(&s, !!x)) {
+        /* positive group test (x != 0); scan for one-bit */
+        for (; bits && n < size - 1; x >>= 1, n++) {
+          bits--;
+          if (stream_write_bit(&s, x & 1u))
+            break;
+        }
+      }
+      else {
+        /* negative group test (x == 0); done with bit plane */
+        break;
+      }
+    }
   }
 
   *stream = s;
@@ -109,7 +137,7 @@ _t1(encode_many_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxp
 {
   /* make a copy of bit stream to avoid aliasing */
   bitstream s = *stream;
-  uint intprec = CHAR_BIT * (uint)sizeof(UInt);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
   uint kmin = intprec > maxprec ? intprec - maxprec : 0;
   uint bits = maxbits;
   uint i, k, m, n, c;
@@ -126,30 +154,123 @@ _t1(encode_many_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxp
     for (i = m; i < size; i++)
       c += (data[i] >> k) & 1u;
     /* step 3: unary run-length encode remainder of bit plane */
-    for (; n < size && bits && (--bits, stream_write_bit(&s, !!c)); c--, n++)
-      for (; n < size - 1 && bits && (--bits, !stream_write_bit(&s, (data[n] >> k) & 1u)); n++)
-        ;
+    for (; bits && n < size; n++) {
+      bits--;
+      if (stream_write_bit(&s, !!c)) {
+        /* positive group test (c > 0); scan for one-bit */
+        for (c--; bits && n < size - 1; n++) {
+          bits--;
+          if (stream_write_bit(&s, (data[n] >> k) & 1u))
+            break;
+        }
+      }
+      else {
+        /* negative group test (c == 0); done with bit plane */
+        break;
+      }
+    }
   }
 
   *stream = s;
   return maxbits - bits;
 }
 
+/* compress sequence of size <= 64 unsigned integers with no rate constraint */
+static uint
+_t1(encode_few_ints_prec, UInt)(bitstream* restrict_ stream, uint maxprec, const UInt* restrict_ data, uint size)
+{
+  /* make a copy of bit stream to avoid aliasing */
+  bitstream s = *stream;
+  bitstream_offset offset = stream_wtell(&s);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
+  uint kmin = intprec > maxprec ? intprec - maxprec : 0;
+  uint i, k, n;
+
+  /* encode one bit plane at a time from MSB to LSB */
+  for (k = intprec, n = 0; k-- > kmin;) {
+    /* step 1: extract bit plane #k to x */
+    uint64 x = 0;
+    for (i = 0; i < size; i++)
+      x += (uint64)((data[i] >> k) & 1u) << i;
+    /* step 2: encode first n bits of bit plane */
+    x = stream_write_bits(&s, x, n);
+    /* step 3: unary run-length encode remainder of bit plane */
+    for (; n < size && stream_write_bit(&s, !!x); x >>= 1, n++)
+      for (; n < size - 1 && !stream_write_bit(&s, x & 1u); x >>= 1, n++)
+        ;
+  }
+
+  *stream = s;
+  return (uint)(stream_wtell(&s) - offset);
+}
+
+/* compress sequence of size > 64 unsigned integers with no rate constraint */
+static uint
+_t1(encode_many_ints_prec, UInt)(bitstream* restrict_ stream, uint maxprec, const UInt* restrict_ data, uint size)
+{
+  /* make a copy of bit stream to avoid aliasing */
+  bitstream s = *stream;
+  bitstream_offset offset = stream_wtell(&s);
+  uint intprec = (uint)(CHAR_BIT * sizeof(UInt));
+  uint kmin = intprec > maxprec ? intprec - maxprec : 0;
+  uint i, k, n, c;
+
+  /* encode one bit plane at a time from MSB to LSB */
+  for (k = intprec, n = 0; k-- > kmin;) {
+    /* step 1: encode first n bits of bit plane #k */
+    for (i = 0; i < n; i++)
+      stream_write_bit(&s, (data[i] >> k) & 1u);
+    /* step 2: count remaining one-bits in bit plane */
+    c = 0;
+    for (i = n; i < size; i++)
+      c += (data[i] >> k) & 1u;
+    /* step 3: unary run-length encode remainder of bit plane */
+    for (; n < size && stream_write_bit(&s, !!c); n++)
+      for (c--; n < size - 1 && !stream_write_bit(&s, (data[n] >> k) & 1u); n++)
+        ;
+  }
+
+  *stream = s;
+  return (uint)(stream_wtell(&s) - offset);
+}
+
+/* compress sequence of size unsigned integers */
+static uint
+_t1(encode_ints, UInt)(bitstream* restrict_ stream, uint maxbits, uint maxprec, const UInt* restrict_ data, uint size)
+{
+  /* use fastest available encoder implementation */
+  if (with_maxbits(maxbits, maxprec, size)) {
+    /* rate constrained path: encode partial bit planes */
+    if (size <= 64)
+      return _t1(encode_few_ints, UInt)(stream, maxbits, maxprec, data, size); /* 1D, 2D, 3D blocks */
+    else
+      return _t1(encode_many_ints, UInt)(stream, maxbits, maxprec, data, size); /* 4D blocks */
+  }
+  else {
+    /* variable-rate path: encode whole bit planes */
+    if (size <= 64)
+      return _t1(encode_few_ints_prec, UInt)(stream, maxprec, data, size); /* 1D, 2D, 3D blocks */
+    else
+      return _t1(encode_many_ints_prec, UInt)(stream, maxprec, data, size); /* 4D blocks */
+  }
+}
+
 /* encode block of integers */
 static uint
-_t2(encode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, int maxprec, Int* iblock)
+_t2(encode_block, Int, DIMS)(bitstream* stream, uint minbits, uint maxbits, uint maxprec, Int* iblock)
 {
-  int bits;
+  uint bits;
   cache_align_(UInt ublock[BLOCK_SIZE]);
   /* perform decorrelating transform */
   _t2(fwd_xform, Int, DIMS)(iblock);
+#if ZFP_ROUNDING_MODE == ZFP_ROUND_FIRST
+  /* bias values to achieve proper rounding */
+  _t1(fwd_round, Int)(iblock, BLOCK_SIZE, maxprec);
+#endif
   /* reorder signed coefficients and convert to unsigned integer */
   _t1(fwd_order, Int)(ublock, iblock, PERM, BLOCK_SIZE);
   /* encode integer coefficients */
-  if (BLOCK_SIZE <= 64)
-    bits = _t1(encode_ints, UInt)(stream, maxbits, maxprec, ublock, BLOCK_SIZE);
-  else
-    bits = _t1(encode_many_ints, UInt)(stream, maxbits, maxprec, ublock, BLOCK_SIZE);
+  bits = _t1(encode_ints, UInt)(stream, maxbits, maxprec, ublock, BLOCK_SIZE);
   /* write at least minbits bits by padding with zeros */
   if (bits < minbits) {
     stream_pad(stream, minbits - bits);
diff --git a/src/template/encode1.c b/src/template/encode1.c
index c6184929..ff9d5c04 100644
--- a/src/template/encode1.c
+++ b/src/template/encode1.c
@@ -2,7 +2,7 @@
 
 /* gather 4-value block from strided array */
 static void
-_t2(gather, Scalar, 1)(Scalar* q, const Scalar* p, int sx)
+_t2(gather, Scalar, 1)(Scalar* q, const Scalar* p, ptrdiff_t sx)
 {
   uint x;
   for (x = 0; x < 4; x++, p += sx)
@@ -11,9 +11,9 @@ _t2(gather, Scalar, 1)(Scalar* q, const Scalar* p, int sx)
 
 /* gather nx-value block from strided array */
 static void
-_t2(gather_partial, Scalar, 1)(Scalar* q, const Scalar* p, uint nx, int sx)
+_t2(gather_partial, Scalar, 1)(Scalar* q, const Scalar* p, size_t nx, ptrdiff_t sx)
 {
-  uint x;
+  size_t x;
   for (x = 0; x < nx; x++, p += sx)
     q[x] = *p;
   _t1(pad_block, Scalar)(q, nx, 1);
@@ -29,24 +29,24 @@ _t2(fwd_xform, Int, 1)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* encode 4-value floating-point block stored at p using stride sx */
-uint
-_t2(zfp_encode_block_strided, Scalar, 1)(zfp_stream* stream, const Scalar* p, int sx)
+/* encode 4-value block stored at p using stride sx */
+size_t
+_t2(zfp_encode_block_strided, Scalar, 1)(zfp_stream* stream, const Scalar* p, ptrdiff_t sx)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[4]);
-  _t2(gather, Scalar, 1)(fblock, p, sx);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 1)(stream, fblock);
+  cache_align_(Scalar block[4]);
+  _t2(gather, Scalar, 1)(block, p, sx);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 1)(stream, block);
 }
 
-/* encode nx-value floating-point block stored at p using stride sx */
-uint
-_t2(zfp_encode_partial_block_strided, Scalar, 1)(zfp_stream* stream, const Scalar* p, uint nx, int sx)
+/* encode nx-value block stored at p using stride sx */
+size_t
+_t2(zfp_encode_partial_block_strided, Scalar, 1)(zfp_stream* stream, const Scalar* p, size_t nx, ptrdiff_t sx)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[4]);
-  _t2(gather_partial, Scalar, 1)(fblock, p, nx, sx);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 1)(stream, fblock);
+  cache_align_(Scalar block[4]);
+  _t2(gather_partial, Scalar, 1)(block, p, nx, sx);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 1)(stream, block);
 }
diff --git a/src/template/encode2.c b/src/template/encode2.c
index 4bec256a..b77b4394 100644
--- a/src/template/encode2.c
+++ b/src/template/encode2.c
@@ -2,7 +2,7 @@
 
 /* gather 4*4 block from strided array */
 static void
-_t2(gather, Scalar, 2)(Scalar* q, const Scalar* p, int sx, int sy)
+_t2(gather, Scalar, 2)(Scalar* q, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
 {
   uint x, y;
   for (y = 0; y < 4; y++, p += sy - 4 * sx)
@@ -12,9 +12,9 @@ _t2(gather, Scalar, 2)(Scalar* q, const Scalar* p, int sx, int sy)
 
 /* gather nx*ny block from strided array */
 static void
-_t2(gather_partial, Scalar, 2)(Scalar* q, const Scalar* p, uint nx, uint ny, int sx, int sy)
+_t2(gather_partial, Scalar, 2)(Scalar* q, const Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
 {
-  uint x, y;
+  size_t x, y;
   for (y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx) {
     for (x = 0; x < nx; x++, p += sx)
       q[4 * y + x] = *p;
@@ -39,24 +39,24 @@ _t2(fwd_xform, Int, 2)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* encode 4*4 floating-point block stored at p using strides (sx, sy) */
-uint
-_t2(zfp_encode_block_strided, Scalar, 2)(zfp_stream* stream, const Scalar* p, int sx, int sy)
+/* encode 4*4 block stored at p using strides (sx, sy) */
+size_t
+_t2(zfp_encode_block_strided, Scalar, 2)(zfp_stream* stream, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[16]);
-  _t2(gather, Scalar, 2)(fblock, p, sx, sy);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 2)(stream, fblock);
+  cache_align_(Scalar block[16]);
+  _t2(gather, Scalar, 2)(block, p, sx, sy);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 2)(stream, block);
 }
 
-/* encode nx*ny floating-point block stored at p using strides (sx, sy) */
-uint
-_t2(zfp_encode_partial_block_strided, Scalar, 2)(zfp_stream* stream, const Scalar* p, uint nx, uint ny, int sx, int sy)
+/* encode nx*ny block stored at p using strides (sx, sy) */
+size_t
+_t2(zfp_encode_partial_block_strided, Scalar, 2)(zfp_stream* stream, const Scalar* p, size_t nx, size_t ny, ptrdiff_t sx, ptrdiff_t sy)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[16]);
-  _t2(gather_partial, Scalar, 2)(fblock, p, nx, ny, sx, sy);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 2)(stream, fblock);
+  cache_align_(Scalar block[16]);
+  _t2(gather_partial, Scalar, 2)(block, p, nx, ny, sx, sy);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 2)(stream, block);
 }
diff --git a/src/template/encode3.c b/src/template/encode3.c
index a16a8add..3206060d 100644
--- a/src/template/encode3.c
+++ b/src/template/encode3.c
@@ -2,7 +2,7 @@
 
 /* gather 4*4*4 block from strided array */
 static void
-_t2(gather, Scalar, 3)(Scalar* q, const Scalar* p, int sx, int sy, int sz)
+_t2(gather, Scalar, 3)(Scalar* q, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   uint x, y, z;
   for (z = 0; z < 4; z++, p += sz - 4 * sy)
@@ -13,9 +13,9 @@ _t2(gather, Scalar, 3)(Scalar* q, const Scalar* p, int sx, int sy, int sz)
 
 /* gather nx*ny*nz block from strided array */
 static void
-_t2(gather_partial, Scalar, 3)(Scalar* q, const Scalar* p, uint nx, uint ny, uint nz, int sx, int sy, int sz)
+_t2(gather_partial, Scalar, 3)(Scalar* q, const Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
-  uint x, y, z;
+  size_t x, y, z;
   for (z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy) {
     for (y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx) {
       for (x = 0; x < nx; x++, p += sx)
@@ -51,24 +51,24 @@ _t2(fwd_xform, Int, 3)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* encode 4*4*4 floating-point block stored at p using strides (sx, sy, sz) */
-uint
-_t2(zfp_encode_block_strided, Scalar, 3)(zfp_stream* stream, const Scalar* p, int sx, int sy, int sz)
+/* encode 4*4*4 block stored at p using strides (sx, sy, sz) */
+size_t
+_t2(zfp_encode_block_strided, Scalar, 3)(zfp_stream* stream, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[64]);
-  _t2(gather, Scalar, 3)(fblock, p, sx, sy, sz);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 3)(stream, fblock);
+  cache_align_(Scalar block[64]);
+  _t2(gather, Scalar, 3)(block, p, sx, sy, sz);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 3)(stream, block);
 }
 
-/* encode nx*ny*nz floating-point block stored at p using strides (sx, sy, sz) */
-uint
-_t2(zfp_encode_partial_block_strided, Scalar, 3)(zfp_stream* stream, const Scalar* p, uint nx, uint ny, uint nz, int sx, int sy, int sz)
+/* encode nx*ny*nz block stored at p using strides (sx, sy, sz) */
+size_t
+_t2(zfp_encode_partial_block_strided, Scalar, 3)(zfp_stream* stream, const Scalar* p, size_t nx, size_t ny, size_t nz, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[64]);
-  _t2(gather_partial, Scalar, 3)(fblock, p, nx, ny, nz, sx, sy, sz);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 3)(stream, fblock);
+  cache_align_(Scalar block[64]);
+  _t2(gather_partial, Scalar, 3)(block, p, nx, ny, nz, sx, sy, sz);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 3)(stream, block);
 }
diff --git a/src/template/encode4.c b/src/template/encode4.c
index c9ed5425..90ca40a8 100644
--- a/src/template/encode4.c
+++ b/src/template/encode4.c
@@ -2,7 +2,7 @@
 
 /* gather 4*4*4*4 block from strided array */
 static void
-_t2(gather, Scalar, 4)(Scalar* q, const Scalar* p, int sx, int sy, int sz, int sw)
+_t2(gather, Scalar, 4)(Scalar* q, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   uint x, y, z, w;
   for (w = 0; w < 4; w++, p += sw - 4 * sz)
@@ -14,9 +14,9 @@ _t2(gather, Scalar, 4)(Scalar* q, const Scalar* p, int sx, int sy, int sz, int s
 
 /* gather nx*ny*nz*nw block from strided array */
 static void
-_t2(gather_partial, Scalar, 4)(Scalar* q, const Scalar* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw)
+_t2(gather_partial, Scalar, 4)(Scalar* q, const Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
-  uint x, y, z, w;
+  size_t x, y, z, w;
   for (w = 0; w < nw; w++, p += sw - (ptrdiff_t)nz * sz) {
     for (z = 0; z < nz; z++, p += sz - (ptrdiff_t)ny * sy) {
       for (y = 0; y < ny; y++, p += sy - (ptrdiff_t)nx * sx) {
@@ -66,24 +66,24 @@ _t2(fwd_xform, Int, 4)(Int* p)
 
 /* public functions -------------------------------------------------------- */
 
-/* encode 4*4*4*4 floating-point block stored at p using strides (sx, sy, sz, sw) */
-uint
-_t2(zfp_encode_block_strided, Scalar, 4)(zfp_stream* stream, const Scalar* p, int sx, int sy, int sz, int sw)
+/* encode 4*4*4*4 block stored at p using strides (sx, sy, sz, sw) */
+size_t
+_t2(zfp_encode_block_strided, Scalar, 4)(zfp_stream* stream, const Scalar* p, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[256]);
-  _t2(gather, Scalar, 4)(fblock, p, sx, sy, sz, sw);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 4)(stream, fblock);
+  cache_align_(Scalar block[256]);
+  _t2(gather, Scalar, 4)(block, p, sx, sy, sz, sw);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 4)(stream, block);
 }
 
-/* encode nx*ny*nz*nw floating-point block stored at p using strides (sx, sy, sz, sw) */
-uint
-_t2(zfp_encode_partial_block_strided, Scalar, 4)(zfp_stream* stream, const Scalar* p, uint nx, uint ny, uint nz, uint nw, int sx, int sy, int sz, int sw)
+/* encode nx*ny*nz*nw block stored at p using strides (sx, sy, sz, sw) */
+size_t
+_t2(zfp_encode_partial_block_strided, Scalar, 4)(zfp_stream* stream, const Scalar* p, size_t nx, size_t ny, size_t nz, size_t nw, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   /* gather block from strided array */
-  cache_align_(Scalar fblock[256]);
-  _t2(gather_partial, Scalar, 4)(fblock, p, nx, ny, nz, nw, sx, sy, sz, sw);
-  /* encode floating-point block */
-  return _t2(zfp_encode_block, Scalar, 4)(stream, fblock);
+  cache_align_(Scalar block[256]);
+  _t2(gather_partial, Scalar, 4)(block, p, nx, ny, nz, nw, sx, sy, sz, sw);
+  /* encode block */
+  return _t2(zfp_encode_block, Scalar, 4)(stream, block);
 }
diff --git a/src/template/encodef.c b/src/template/encodef.c
index 5c6ec537..10e50438 100644
--- a/src/template/encodef.c
+++ b/src/template/encodef.c
@@ -1,3 +1,4 @@
+#include <float.h>
 #include <limits.h>
 #include <math.h>
 
@@ -9,13 +10,20 @@ static uint _t2(rev_encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* f
 static int
 _t1(exponent, Scalar)(Scalar x)
 {
+  /* use e = -EBIAS when x = 0 */
+  int e = -EBIAS;
+#ifdef ZFP_WITH_DAZ
+  /* treat subnormals as zero; resolves issue #119 by avoiding overflow */
+  if (x >= SCALAR_MIN)
+    FREXP(x, &e);
+#else
   if (x > 0) {
-    int e;
     FREXP(x, &e);
-    /* clamp exponent in case x is denormal */
-    return MAX(e, 1 - EBIAS);
+    /* clamp exponent in case x is subnormal; may still result in overflow */
+    e = MAX(e, 1 - EBIAS);
   }
-  return -EBIAS;
+#endif
+  return e;
 }
 
 /* compute maximum floating-point exponent in block of n values */
@@ -35,7 +43,7 @@ _t1(exponent_block, Scalar)(const Scalar* p, uint n)
 static Scalar
 _t1(quantize, Scalar)(Scalar x, int e)
 {
-  return LDEXP(x, (CHAR_BIT * (int)sizeof(Scalar) - 2) - e);
+  return LDEXP(x, ((int)(CHAR_BIT * sizeof(Scalar)) - 2) - e);
 }
 
 /* forward block-floating-point transform to signed integers */
@@ -57,8 +65,8 @@ _t2(encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* fblock)
   uint bits = 1;
   /* compute maximum exponent */
   int emax = _t1(exponent_block, Scalar)(fblock, BLOCK_SIZE);
-  int maxprec = precision(emax, zfp->maxprec, zfp->minexp, DIMS);
-  uint e = maxprec ? emax + EBIAS : 0;
+  uint maxprec = precision(emax, zfp->maxprec, zfp->minexp, DIMS);
+  uint e = maxprec ? (uint)(emax + EBIAS) : 0;
   /* encode block only if biased exponent is nonzero */
   if (e) {
     cache_align_(Int iblock[BLOCK_SIZE]);
@@ -68,7 +76,7 @@ _t2(encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* fblock)
     /* perform forward block-floating-point transform */
     _t1(fwd_cast, Scalar)(iblock, fblock, BLOCK_SIZE, emax);
     /* encode integer block */
-    bits += _t2(encode_block, Int, DIMS)(zfp->stream, zfp->minbits - bits, zfp->maxbits - bits, maxprec, iblock);
+    bits += _t2(encode_block, Int, DIMS)(zfp->stream, zfp->minbits - MIN(bits, zfp->minbits), zfp->maxbits - bits, maxprec, iblock);
   }
   else {
     /* write single zero-bit to indicate that all values are zero */
@@ -84,7 +92,7 @@ _t2(encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* fblock)
 /* public functions -------------------------------------------------------- */
 
 /* encode contiguous floating-point block */
-uint
+size_t
 _t2(zfp_encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* fblock)
 {
   return REVERSIBLE(zfp) ? _t2(rev_encode_block, Scalar, DIMS)(zfp, fblock) : _t2(encode_block, Scalar, DIMS)(zfp, fblock);
diff --git a/src/template/encodei.c b/src/template/encodei.c
index 41d5fbd6..2aa4e7e3 100644
--- a/src/template/encodei.c
+++ b/src/template/encodei.c
@@ -1,9 +1,9 @@
-static uint _t2(rev_encode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, int maxprec, Int* iblock);
+static uint _t2(rev_encode_block, Int, DIMS)(bitstream* stream, uint minbits, uint maxbits, uint maxprec, Int* iblock);
 
 /* public functions -------------------------------------------------------- */
 
 /* encode contiguous integer block */
-uint
+size_t
 _t2(zfp_encode_block, Int, DIMS)(zfp_stream* zfp, const Int* iblock)
 {
   cache_align_(Int block[BLOCK_SIZE]);
diff --git a/src/template/ompcompress.c b/src/template/ompcompress.c
index b0f86d23..4e4365c7 100644
--- a/src/template/ompcompress.c
+++ b/src/template/ompcompress.c
@@ -6,12 +6,13 @@ _t2(compress_omp, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
 {
   /* array metadata */
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
+  size_t nx = field->nx;
 
   /* number of omp threads, blocks, and chunks */
   uint threads = thread_count_omp(stream);
-  uint blocks = (nx + 3) / 4;
-  uint chunks = chunk_count_omp(stream, blocks, threads);
+  size_t blocks = (nx + 3) / 4;
+  size_t chunks = chunk_count_omp(stream, blocks, threads);
+  int chunk; /* OpenMP 2.0 requires int loop counter */
 
   /* allocate per-thread streams */
   bitstream** bs = compress_init_par(stream, field, chunks, blocks);
@@ -19,13 +20,12 @@ _t2(compress_omp, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
     return;
 
   /* compress chunks of blocks in parallel */
-  int chunk;
   #pragma omp parallel for num_threads(threads)
   for (chunk = 0; chunk < (int)chunks; chunk++) {
     /* determine range of block indices assigned to this thread */
-    uint bmin = chunk_offset(blocks, chunks, chunk + 0);
-    uint bmax = chunk_offset(blocks, chunks, chunk + 1);
-    uint block;
+    size_t bmin = chunk_offset(blocks, chunks, chunk + 0);
+    size_t bmax = chunk_offset(blocks, chunks, chunk + 1);
+    size_t block;
     /* set up thread-local bit stream */
     zfp_stream s = *stream;
     zfp_stream_set_bit_stream(&s, bs[chunk]);
@@ -33,11 +33,11 @@ _t2(compress_omp, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
     for (block = bmin; block < bmax; block++) {
       /* determine block origin x within array */
       const Scalar* p = data;
-      uint x = 4 * block;
+      size_t x = 4 * block;
       p += x;
       /* compress partial or full block */
-      if (nx - x < 4)
-        _t2(zfp_encode_partial_block_strided, Scalar, 1)(&s, p, MIN(nx - x, 4u), 1);
+      if (nx - x < 4u)
+        _t2(zfp_encode_partial_block_strided, Scalar, 1)(&s, p, nx - x, 1);
       else
         _t2(zfp_encode_block, Scalar, 1)(&s, p);
     }
@@ -53,13 +53,14 @@ _t2(compress_strided_omp, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
 {
   /* array metadata */
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
-  int sx = field->sx ? field->sx : 1;
+  size_t nx = field->nx;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
 
   /* number of omp threads, blocks, and chunks */
   uint threads = thread_count_omp(stream);
-  uint blocks = (nx + 3) / 4;
-  uint chunks = chunk_count_omp(stream, blocks, threads);
+  size_t blocks = (nx + 3) / 4;
+  size_t chunks = chunk_count_omp(stream, blocks, threads);
+  int chunk; /* OpenMP 2.0 requires int loop counter */
 
   /* allocate per-thread streams */
   bitstream** bs = compress_init_par(stream, field, chunks, blocks);
@@ -67,13 +68,12 @@ _t2(compress_strided_omp, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
     return;
 
   /* compress chunks of blocks in parallel */
-  int chunk;
   #pragma omp parallel for num_threads(threads)
   for (chunk = 0; chunk < (int)chunks; chunk++) {
     /* determine range of block indices assigned to this thread */
-    uint bmin = chunk_offset(blocks, chunks, chunk + 0);
-    uint bmax = chunk_offset(blocks, chunks, chunk + 1);
-    uint block;
+    size_t bmin = chunk_offset(blocks, chunks, chunk + 0);
+    size_t bmax = chunk_offset(blocks, chunks, chunk + 1);
+    size_t block;
     /* set up thread-local bit stream */
     zfp_stream s = *stream;
     zfp_stream_set_bit_stream(&s, bs[chunk]);
@@ -81,11 +81,11 @@ _t2(compress_strided_omp, Scalar, 1)(zfp_stream* stream, const zfp_field* field)
     for (block = bmin; block < bmax; block++) {
       /* determine block origin x within array */
       const Scalar* p = data;
-      uint x = 4 * block;
+      size_t x = 4 * block;
       p += sx * (ptrdiff_t)x;
       /* compress partial or full block */
-      if (nx - x < 4)
-        _t2(zfp_encode_partial_block_strided, Scalar, 1)(&s, p, MIN(nx - x, 4u), sx);
+      if (nx - x < 4u)
+        _t2(zfp_encode_partial_block_strided, Scalar, 1)(&s, p, nx - x, sx);
       else
         _t2(zfp_encode_block_strided, Scalar, 1)(&s, p, sx);
     }
@@ -101,17 +101,18 @@ _t2(compress_strided_omp, Scalar, 2)(zfp_stream* stream, const zfp_field* field)
 {
   /* array metadata */
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
 
   /* number of omp threads, blocks, and chunks */
   uint threads = thread_count_omp(stream);
-  uint bx = (nx + 3) / 4;
-  uint by = (ny + 3) / 4;
-  uint blocks = bx * by;
-  uint chunks = chunk_count_omp(stream, blocks, threads);
+  size_t bx = (nx + 3) / 4;
+  size_t by = (ny + 3) / 4;
+  size_t blocks = bx * by;
+  size_t chunks = chunk_count_omp(stream, blocks, threads);
+  int chunk; /* OpenMP 2.0 requires int loop counter */
 
   /* allocate per-thread streams */
   bitstream** bs = compress_init_par(stream, field, chunks, blocks);
@@ -119,13 +120,12 @@ _t2(compress_strided_omp, Scalar, 2)(zfp_stream* stream, const zfp_field* field)
     return;
 
   /* compress chunks of blocks in parallel */
-  int chunk;
   #pragma omp parallel for num_threads(threads)
   for (chunk = 0; chunk < (int)chunks; chunk++) {
     /* determine range of block indices assigned to this thread */
-    uint bmin = chunk_offset(blocks, chunks, chunk + 0);
-    uint bmax = chunk_offset(blocks, chunks, chunk + 1);
-    uint block;
+    size_t bmin = chunk_offset(blocks, chunks, chunk + 0);
+    size_t bmax = chunk_offset(blocks, chunks, chunk + 1);
+    size_t block;
     /* set up thread-local bit stream */
     zfp_stream s = *stream;
     zfp_stream_set_bit_stream(&s, bs[chunk]);
@@ -133,13 +133,13 @@ _t2(compress_strided_omp, Scalar, 2)(zfp_stream* stream, const zfp_field* field)
     for (block = bmin; block < bmax; block++) {
       /* determine block origin (x, y) within array */
       const Scalar* p = data;
-      uint b = block;
-      uint x, y;
+      size_t b = block;
+      size_t x, y;
       x = 4 * (b % bx); b /= bx;
       y = 4 * b;
       p += sx * (ptrdiff_t)x + sy * (ptrdiff_t)y;
       /* compress partial or full block */
-      if (nx - x < 4 || ny - y < 4)
+      if (nx - x < 4u || ny - y < 4u)
         _t2(zfp_encode_partial_block_strided, Scalar, 2)(&s, p, MIN(nx - x, 4u), MIN(ny - y, 4u), sx, sy);
       else
         _t2(zfp_encode_block_strided, Scalar, 2)(&s, p, sx, sy);
@@ -156,20 +156,21 @@ _t2(compress_strided_omp, Scalar, 3)(zfp_stream* stream, const zfp_field* field)
 {
   /* array metadata */
   const Scalar* data = (const Scalar*)field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  uint nz = field->nz;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  int sz = field->sz ? field->sz : (int)(nx * ny);
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(nx * ny);
 
   /* number of omp threads, blocks, and chunks */
   uint threads = thread_count_omp(stream);
-  uint bx = (nx + 3) / 4;
-  uint by = (ny + 3) / 4;
-  uint bz = (nz + 3) / 4;
-  uint blocks = bx * by * bz;
-  uint chunks = chunk_count_omp(stream, blocks, threads);
+  size_t bx = (nx + 3) / 4;
+  size_t by = (ny + 3) / 4;
+  size_t bz = (nz + 3) / 4;
+  size_t blocks = bx * by * bz;
+  size_t chunks = chunk_count_omp(stream, blocks, threads);
+  int chunk; /* OpenMP 2.0 requires int loop counter */
 
   /* allocate per-thread streams */
   bitstream** bs = compress_init_par(stream, field, chunks, blocks);
@@ -177,13 +178,12 @@ _t2(compress_strided_omp, Scalar, 3)(zfp_stream* stream, const zfp_field* field)
     return;
 
   /* compress chunks of blocks in parallel */
-  int chunk;
   #pragma omp parallel for num_threads(threads)
   for (chunk = 0; chunk < (int)chunks; chunk++) {
     /* determine range of block indices assigned to this thread */
-    uint bmin = chunk_offset(blocks, chunks, chunk + 0);
-    uint bmax = chunk_offset(blocks, chunks, chunk + 1);
-    uint block;
+    size_t bmin = chunk_offset(blocks, chunks, chunk + 0);
+    size_t bmax = chunk_offset(blocks, chunks, chunk + 1);
+    size_t block;
     /* set up thread-local bit stream */
     zfp_stream s = *stream;
     zfp_stream_set_bit_stream(&s, bs[chunk]);
@@ -191,14 +191,14 @@ _t2(compress_strided_omp, Scalar, 3)(zfp_stream* stream, const zfp_field* field)
     for (block = bmin; block < bmax; block++) {
       /* determine block origin (x, y, z) within array */
       const Scalar* p = data;
-      uint b = block;
-      uint x, y, z;
+      size_t b = block;
+      size_t x, y, z;
       x = 4 * (b % bx); b /= bx;
       y = 4 * (b % by); b /= by;
       z = 4 * b;
       p += sx * (ptrdiff_t)x + sy * (ptrdiff_t)y + sz * (ptrdiff_t)z;
       /* compress partial or full block */
-      if (nx - x < 4 || ny - y < 4 || nz - z < 4)
+      if (nx - x < 4u || ny - y < 4u || nz - z < 4u)
         _t2(zfp_encode_partial_block_strided, Scalar, 3)(&s, p, MIN(nx - x, 4u), MIN(ny - y, 4u), MIN(nz - z, 4u), sx, sy, sz);
       else
         _t2(zfp_encode_block_strided, Scalar, 3)(&s, p, sx, sy, sz);
@@ -215,23 +215,24 @@ _t2(compress_strided_omp, Scalar, 4)(zfp_stream* stream, const zfp_field* field)
 {
   /* array metadata */
   const Scalar* data = field->data;
-  uint nx = field->nx;
-  uint ny = field->ny;
-  uint nz = field->nz;
-  uint nw = field->nw;
-  int sx = field->sx ? field->sx : 1;
-  int sy = field->sy ? field->sy : (int)nx;
-  int sz = field->sz ? field->sz : (int)(nx * ny);
-  int sw = field->sw ? field->sw : (int)(nx * ny * nz);
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+  size_t nw = field->nw;
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(nx * ny);
+  ptrdiff_t sw = field->sw ? field->sw : (ptrdiff_t)(nx * ny * nz);
 
   /* number of omp threads, blocks, and chunks */
   uint threads = thread_count_omp(stream);
-  uint bx = (nx + 3) / 4;
-  uint by = (ny + 3) / 4;
-  uint bz = (nz + 3) / 4;
-  uint bw = (nw + 3) / 4;
-  uint blocks = bx * by * bz * bw;
-  uint chunks = chunk_count_omp(stream, blocks, threads);
+  size_t bx = (nx + 3) / 4;
+  size_t by = (ny + 3) / 4;
+  size_t bz = (nz + 3) / 4;
+  size_t bw = (nw + 3) / 4;
+  size_t blocks = bx * by * bz * bw;
+  size_t chunks = chunk_count_omp(stream, blocks, threads);
+  int chunk; /* OpenMP 2.0 requires int loop counter */
 
   /* allocate per-thread streams */
   bitstream** bs = compress_init_par(stream, field, chunks, blocks);
@@ -239,13 +240,12 @@ _t2(compress_strided_omp, Scalar, 4)(zfp_stream* stream, const zfp_field* field)
     return;
 
   /* compress chunks of blocks in parallel */
-  int chunk;
   #pragma omp parallel for num_threads(threads)
   for (chunk = 0; chunk < (int)chunks; chunk++) {
     /* determine range of block indices assigned to this thread */
-    uint bmin = chunk_offset(blocks, chunks, chunk + 0);
-    uint bmax = chunk_offset(blocks, chunks, chunk + 1);
-    uint block;
+    size_t bmin = chunk_offset(blocks, chunks, chunk + 0);
+    size_t bmax = chunk_offset(blocks, chunks, chunk + 1);
+    size_t block;
     /* set up thread-local bit stream */
     zfp_stream s = *stream;
     zfp_stream_set_bit_stream(&s, bs[chunk]);
@@ -253,15 +253,15 @@ _t2(compress_strided_omp, Scalar, 4)(zfp_stream* stream, const zfp_field* field)
     for (block = bmin; block < bmax; block++) {
       /* determine block origin (x, y, z, w) within array */
       const Scalar* p = data;
-      uint b = block;
-      uint x, y, z, w;
+      size_t b = block;
+      size_t x, y, z, w;
       x = 4 * (b % bx); b /= bx;
       y = 4 * (b % by); b /= by;
       z = 4 * (b % bz); b /= bz;
       w = 4 * b;
       p += sx * (ptrdiff_t)x + sy * (ptrdiff_t)y + sz * (ptrdiff_t)z + sw * (ptrdiff_t)w;
       /* compress partial or full block */
-      if (nx - x < 4 || ny - y < 4 || nz - z < 4 || nw - w < 4)
+      if (nx - x < 4u || ny - y < 4u || nz - z < 4u || nw - w < 4u)
         _t2(zfp_encode_partial_block_strided, Scalar, 4)(&s, p, MIN(nx - x, 4u), MIN(ny - y, 4u), MIN(nz - z, 4u), MIN(nw - w, 4u), sx, sy, sz, sw);
       else
         _t2(zfp_encode_block_strided, Scalar, 4)(&s, p, sx, sy, sz, sw);
diff --git a/src/template/revdecode.c b/src/template/revdecode.c
index cde9877f..115b0a17 100644
--- a/src/template/revdecode.c
+++ b/src/template/revdecode.c
@@ -31,17 +31,14 @@ _t1(rev_inv_lift, Int)(Int* p, uint s)
 
 /* decode block of integers using reversible algorithm */
 static uint
-_t2(rev_decode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, Int* iblock)
+_t2(rev_decode_block, Int, DIMS)(bitstream* stream, uint minbits, uint maxbits, Int* iblock)
 {
   /* decode number of significant bits */
-  int bits = PBITS;
-  int prec = (int)stream_read_bits(stream, PBITS) + 1;
+  uint bits = PBITS;
+  uint prec = (uint)stream_read_bits(stream, PBITS) + 1;
   cache_align_(UInt ublock[BLOCK_SIZE]);
   /* decode integer coefficients */
-  if (BLOCK_SIZE <= 64)
-    bits += _t1(decode_ints, UInt)(stream, maxbits - bits, prec, ublock, BLOCK_SIZE);
-  else
-    bits += _t1(decode_many_ints, UInt)(stream, maxbits - bits, prec, ublock, BLOCK_SIZE);
+  bits += _t1(decode_ints, UInt)(stream, maxbits - bits, prec, ublock, BLOCK_SIZE);
   /* read at least minbits bits */
   if (bits < minbits) {
     stream_skip(stream, minbits - bits);
diff --git a/src/template/revdecodef.c b/src/template/revdecodef.c
index 221a4b2e..5fafcefc 100644
--- a/src/template/revdecodef.c
+++ b/src/template/revdecodef.c
@@ -30,16 +30,17 @@ _t2(rev_decode_block, Scalar, DIMS)(zfp_stream* zfp, Scalar* fblock)
     bits++;
     if (stream_read_bit(zfp->stream)) {
       /* decode integer block */
-      bits += _t2(rev_decode_block, Int, DIMS)(zfp->stream, zfp->minbits - bits, zfp->maxbits - bits, iblock);
+      bits += _t2(rev_decode_block, Int, DIMS)(zfp->stream, zfp->minbits - MIN(bits, zfp->minbits), zfp->maxbits - bits, iblock);
       /* reinterpret integers as floating values */
       _t1(rev_inv_reinterpret, Scalar)(iblock, fblock, BLOCK_SIZE);
     }
     else {
       /* decode common exponent */
+      int emax;
       bits += EBITS;
-      int emax = (int)stream_read_bits(zfp->stream, EBITS) - EBIAS;
+      emax = (int)stream_read_bits(zfp->stream, EBITS) - EBIAS;
       /* decode integer block */
-      bits += _t2(rev_decode_block, Int, DIMS)(zfp->stream, zfp->minbits - bits, zfp->maxbits - bits, iblock);
+      bits += _t2(rev_decode_block, Int, DIMS)(zfp->stream, zfp->minbits - MIN(bits, zfp->minbits), zfp->maxbits - bits, iblock);
       /* perform inverse block-floating-point transform */
       _t1(rev_inv_cast, Scalar)(iblock, fblock, BLOCK_SIZE, emax);
     }
diff --git a/src/template/revencode.c b/src/template/revencode.c
index f76238e9..fa162140 100644
--- a/src/template/revencode.c
+++ b/src/template/revencode.c
@@ -40,7 +40,7 @@ _t1(rev_precision, UInt)(const UInt* block, uint n)
   while (n--)
     m |= *block++;
   /* count trailing zeros via binary search */
-  for (s = CHAR_BIT * (uint)sizeof(UInt); m; s /= 2)
+  for (s = (uint)(CHAR_BIT * sizeof(UInt)); m; s /= 2)
     if ((UInt)(m << (s - 1))) {
       m <<= s - 1;
       m <<= 1;
@@ -51,10 +51,10 @@ _t1(rev_precision, UInt)(const UInt* block, uint n)
 
 /* encode block of integers using reversible algorithm */
 static uint
-_t2(rev_encode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, int maxprec, Int* iblock)
+_t2(rev_encode_block, Int, DIMS)(bitstream* stream, uint minbits, uint maxbits, uint maxprec, Int* iblock)
 {
-  int bits = PBITS;
-  int prec;
+  uint bits = PBITS;
+  uint prec;
   cache_align_(UInt ublock[BLOCK_SIZE]);
   /* perform decorrelating transform */
   _t2(rev_fwd_xform, Int, DIMS)(iblock);
@@ -66,10 +66,7 @@ _t2(rev_encode_block, Int, DIMS)(bitstream* stream, int minbits, int maxbits, in
   prec = MAX(prec, 1);
   stream_write_bits(stream, prec - 1, PBITS);
   /* encode integer coefficients */
-  if (BLOCK_SIZE <= 64)
-    bits += _t1(encode_ints, UInt)(stream, maxbits - bits, prec, ublock, BLOCK_SIZE);
-  else
-    bits += _t1(encode_many_ints, UInt)(stream, maxbits - bits, prec, ublock, BLOCK_SIZE);
+  bits += _t1(encode_ints, UInt)(stream, maxbits - bits, prec, ublock, BLOCK_SIZE);
   /* write at least minbits bits by padding with zeros */
   if (bits < minbits) {
     stream_pad(stream, minbits - bits);
diff --git a/src/template/revencodef.c b/src/template/revencodef.c
index 44ef3749..ee270aa7 100644
--- a/src/template/revencodef.c
+++ b/src/template/revencodef.c
@@ -53,7 +53,7 @@ _t2(rev_encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* fblock)
   /* test if block-floating-point transform is reversible */
   if (_t1(rev_fwd_reversible, Scalar)(iblock, fblock, BLOCK_SIZE, emax)) {
     /* transform is reversible; test if block has any non-zeros */
-    uint e = emax + EBIAS;
+    uint e = (uint)(emax + EBIAS);
     if (e) {
       /* encode common exponent */
       bits += 2;
@@ -71,10 +71,10 @@ _t2(rev_encode_block, Scalar, DIMS)(zfp_stream* zfp, const Scalar* fblock)
   else {
     /* transform is irreversible; reinterpret floating values as integers */
     _t1(rev_fwd_reinterpret, Scalar)(iblock, fblock, BLOCK_SIZE);
-    bits++;
+    bits += 2;
     stream_write_bits(zfp->stream, 3, 2);
   }
   /* losslessly encode integers */
-  bits += _t2(rev_encode_block, Int, DIMS)(zfp->stream, zfp->minbits - bits, zfp->maxbits - bits, zfp->maxprec, iblock);
+  bits += _t2(rev_encode_block, Int, DIMS)(zfp->stream, zfp->minbits - MIN(bits, zfp->minbits), zfp->maxbits - bits, zfp->maxprec, iblock);
   return bits;
 }
diff --git a/src/traitsd.h b/src/traitsd.h
index 4dfb271b..05110d55 100644
--- a/src/traitsd.h
+++ b/src/traitsd.h
@@ -7,6 +7,7 @@
 #define PBITS 6                            /* number of bits needed to encode precision */
 #define NBMASK UINT64C(0xaaaaaaaaaaaaaaaa) /* negabinary mask */
 #define TCMASK UINT64C(0x7fffffffffffffff) /* two's complement mask */
+#define SCALAR_MIN DBL_MIN                 /* smallest positive normal number */
 
 #define FABS(x) fabs(x)
 #define FREXP(x, e) frexp(x, e)
diff --git a/src/traitsf.h b/src/traitsf.h
index 408337e1..7e85299d 100644
--- a/src/traitsf.h
+++ b/src/traitsf.h
@@ -7,6 +7,7 @@
 #define PBITS 5            /* number of bits needed to encode precision */
 #define NBMASK 0xaaaaaaaau /* negabinary mask */
 #define TCMASK 0x7fffffffu /* two's complement mask */
+#define SCALAR_MIN FLT_MIN /* smallest positive normal number */
 
 #if __STDC_VERSION__ >= 199901L
   #define FABS(x)     fabsf(x)
diff --git a/src/zfp.c b/src/zfp.c
index 54a0e5f2..6aa8b8f6 100644
--- a/src/zfp.c
+++ b/src/zfp.c
@@ -3,35 +3,42 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
+#include "zfp/version.h"
 #include "template/template.h"
 
 /* public data ------------------------------------------------------------- */
 
-export_ const uint zfp_codec_version = ZFP_CODEC;
-export_ const uint zfp_library_version = ZFP_VERSION;
-export_ const char* const zfp_version_string = "zfp version " ZFP_VERSION_STRING " (May 5, 2019)";
+const uint zfp_codec_version = ZFP_CODEC;
+const uint zfp_library_version = ZFP_VERSION;
+const char* const zfp_version_string = "zfp version " ZFP_VERSION_STRING " (December 15, 2023)";
 
 /* private functions ------------------------------------------------------- */
 
-static uint
-type_precision(zfp_type type)
+static size_t
+field_index_span(const zfp_field* field, ptrdiff_t* min, ptrdiff_t* max)
 {
-  switch (type) {
-    case zfp_type_int32:
-      return CHAR_BIT * (uint)sizeof(int32);
-    case zfp_type_int64:
-      return CHAR_BIT * (uint)sizeof(int64);
-    case zfp_type_float:
-      return CHAR_BIT * (uint)sizeof(float);
-    case zfp_type_double:
-      return CHAR_BIT * (uint)sizeof(double);
-    default:
-      return 0;
-  }
+  /* compute strides */
+  ptrdiff_t sx = field->sx ? field->sx : 1;
+  ptrdiff_t sy = field->sy ? field->sy : (ptrdiff_t)field->nx;
+  ptrdiff_t sz = field->sz ? field->sz : (ptrdiff_t)(field->nx * field->ny);
+  ptrdiff_t sw = field->sw ? field->sw : (ptrdiff_t)(field->nx * field->ny * field->nz);
+  /* compute largest offsets from base pointer */
+  ptrdiff_t dx = field->nx ? sx * (ptrdiff_t)(field->nx - 1) : 0;
+  ptrdiff_t dy = field->ny ? sy * (ptrdiff_t)(field->ny - 1) : 0;
+  ptrdiff_t dz = field->nz ? sz * (ptrdiff_t)(field->nz - 1) : 0;
+  ptrdiff_t dw = field->nw ? sw * (ptrdiff_t)(field->nw - 1) : 0;
+  /* compute lowest and highest offset */
+  ptrdiff_t imin = MIN(dx, 0) + MIN(dy, 0) + MIN(dz, 0) + MIN(dw, 0);
+  ptrdiff_t imax = MAX(dx, 0) + MAX(dy, 0) + MAX(dz, 0) + MAX(dw, 0);
+  if (min)
+    *min = imin;
+  if (max)
+    *max = imax;
+  return (size_t)(imax - imin + 1);
 }
 
-static int
+static zfp_bool
 is_reversible(const zfp_stream* zfp)
 {
   return zfp->minexp < ZFP_MIN_EXP;
@@ -98,7 +105,7 @@ zfp_type_size(zfp_type type)
 /* public functions: fields ------------------------------------------------ */
 
 zfp_field*
-zfp_field_alloc()
+zfp_field_alloc(void)
 {
   zfp_field* field = (zfp_field*)malloc(sizeof(zfp_field));
   if (field) {
@@ -111,7 +118,7 @@ zfp_field_alloc()
 }
 
 zfp_field*
-zfp_field_1d(void* data, zfp_type type, uint nx)
+zfp_field_1d(void* data, zfp_type type, size_t nx)
 {
   zfp_field* field = zfp_field_alloc();
   if (field) {
@@ -123,7 +130,7 @@ zfp_field_1d(void* data, zfp_type type, uint nx)
 }
 
 zfp_field*
-zfp_field_2d(void* data, zfp_type type, uint nx, uint ny)
+zfp_field_2d(void* data, zfp_type type, size_t nx, size_t ny)
 {
   zfp_field* field = zfp_field_alloc();
   if (field) {
@@ -136,7 +143,7 @@ zfp_field_2d(void* data, zfp_type type, uint nx, uint ny)
 }
 
 zfp_field*
-zfp_field_3d(void* data, zfp_type type, uint nx, uint ny, uint nz)
+zfp_field_3d(void* data, zfp_type type, size_t nx, size_t ny, size_t nz)
 {
   zfp_field* field = zfp_field_alloc();
   if (field) {
@@ -150,7 +157,7 @@ zfp_field_3d(void* data, zfp_type type, uint nx, uint ny, uint nz)
 }
 
 zfp_field*
-zfp_field_4d(void* data, zfp_type type, uint nx, uint ny, uint nz, uint nw)
+zfp_field_4d(void* data, zfp_type type, size_t nx, size_t ny, size_t nz, size_t nw)
 {
   zfp_field* field = zfp_field_alloc();
   if (field) {
@@ -176,6 +183,18 @@ zfp_field_pointer(const zfp_field* field)
   return field->data;
 }
 
+void*
+zfp_field_begin(const zfp_field* field)
+{
+  if (field->data) {
+    ptrdiff_t min;
+    field_index_span(field, &min, NULL);
+    return (void*)((uchar*)field->data + min * (ptrdiff_t)zfp_type_size(field->type));
+  }
+  else
+    return NULL;
+}
+
 zfp_type
 zfp_field_type(const zfp_field* field)
 {
@@ -185,7 +204,7 @@ zfp_field_type(const zfp_field* field)
 uint
 zfp_field_precision(const zfp_field* field)
 {
-  return type_precision(field->type);
+  return (uint)(CHAR_BIT * zfp_type_size(field->type));
 }
 
 uint
@@ -195,7 +214,7 @@ zfp_field_dimensionality(const zfp_field* field)
 }
 
 size_t
-zfp_field_size(const zfp_field* field, uint* size)
+zfp_field_size(const zfp_field* field, size_t* size)
 {
   if (size)
     switch (zfp_field_dimensionality(field)) {
@@ -212,22 +231,44 @@ zfp_field_size(const zfp_field* field, uint* size)
         size[0] = field->nx;
         break;
     }
-  return (size_t)MAX(field->nx, 1u) * (size_t)MAX(field->ny, 1u) * (size_t)MAX(field->nz, 1u) * (size_t)MAX(field->nw, 1u);
+  return MAX(field->nx, 1u) * MAX(field->ny, 1u) * MAX(field->nz, 1u) * MAX(field->nw, 1u);
+}
+
+size_t
+zfp_field_size_bytes(const zfp_field* field)
+{
+  return field_index_span(field, NULL, NULL) * zfp_type_size(field->type);
+}
+
+size_t
+zfp_field_blocks(const zfp_field* field)
+{
+  size_t bx = (field->nx + 3) / 4;
+  size_t by = (field->ny + 3) / 4;
+  size_t bz = (field->nz + 3) / 4;
+  size_t bw = (field->nw + 3) / 4;
+  switch (zfp_field_dimensionality(field)) {
+    case 1: return bx;
+    case 2: return bx * by;
+    case 3: return bx * by * bz;
+    case 4: return bx * by * bz * bw;
+    default: return 0;
+  }
 }
 
-int
-zfp_field_stride(const zfp_field* field, int* stride)
+zfp_bool
+zfp_field_stride(const zfp_field* field, ptrdiff_t* stride)
 {
   if (stride)
     switch (zfp_field_dimensionality(field)) {
       case 4:
-        stride[3] = field->sw ? field->sw : (int)(field->nx * field->ny * field->nz);
+        stride[3] = field->sw ? field->sw : (ptrdiff_t)(field->nx * field->ny * field->nz);
         /* FALLTHROUGH */
       case 3:
-        stride[2] = field->sz ? field->sz : (int)(field->nx * field->ny);
+        stride[2] = field->sz ? field->sz : (ptrdiff_t)(field->nx * field->ny);
         /* FALLTHROUGH */
       case 2:
-        stride[1] = field->sy ? field->sy : (int)field->nx;
+        stride[1] = field->sy ? field->sy : (ptrdiff_t)field->nx;
         /* FALLTHROUGH */
       case 1:
         stride[0] = field->sx ? field->sx : 1;
@@ -236,6 +277,12 @@ zfp_field_stride(const zfp_field* field, int* stride)
   return field->sx || field->sy || field->sz || field->sw;
 }
 
+zfp_bool
+zfp_field_is_contiguous(const zfp_field* field)
+{
+  return field_index_span(field, NULL, NULL) == zfp_field_size(field, NULL);
+}
+
 uint64
 zfp_field_metadata(const zfp_field* field)
 {
@@ -304,7 +351,7 @@ zfp_field_set_type(zfp_field* field, zfp_type type)
 }
 
 void
-zfp_field_set_size_1d(zfp_field* field, uint n)
+zfp_field_set_size_1d(zfp_field* field, size_t n)
 {
   field->nx = n;
   field->ny = 0;
@@ -313,7 +360,7 @@ zfp_field_set_size_1d(zfp_field* field, uint n)
 }
 
 void
-zfp_field_set_size_2d(zfp_field* field, uint nx, uint ny)
+zfp_field_set_size_2d(zfp_field* field, size_t nx, size_t ny)
 {
   field->nx = nx;
   field->ny = ny;
@@ -322,7 +369,7 @@ zfp_field_set_size_2d(zfp_field* field, uint nx, uint ny)
 }
 
 void
-zfp_field_set_size_3d(zfp_field* field, uint nx, uint ny, uint nz)
+zfp_field_set_size_3d(zfp_field* field, size_t nx, size_t ny, size_t nz)
 {
   field->nx = nx;
   field->ny = ny;
@@ -331,7 +378,7 @@ zfp_field_set_size_3d(zfp_field* field, uint nx, uint ny, uint nz)
 }
 
 void
-zfp_field_set_size_4d(zfp_field* field, uint nx, uint ny, uint nz, uint nw)
+zfp_field_set_size_4d(zfp_field* field, size_t nx, size_t ny, size_t nz, size_t nw)
 {
   field->nx = nx;
   field->ny = ny;
@@ -340,7 +387,7 @@ zfp_field_set_size_4d(zfp_field* field, uint nx, uint ny, uint nz, uint nw)
 }
 
 void
-zfp_field_set_stride_1d(zfp_field* field, int sx)
+zfp_field_set_stride_1d(zfp_field* field, ptrdiff_t sx)
 {
   field->sx = sx;
   field->sy = 0;
@@ -349,7 +396,7 @@ zfp_field_set_stride_1d(zfp_field* field, int sx)
 }
 
 void
-zfp_field_set_stride_2d(zfp_field* field, int sx, int sy)
+zfp_field_set_stride_2d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy)
 {
   field->sx = sx;
   field->sy = sy;
@@ -358,7 +405,7 @@ zfp_field_set_stride_2d(zfp_field* field, int sx, int sy)
 }
 
 void
-zfp_field_set_stride_3d(zfp_field* field, int sx, int sy, int sz)
+zfp_field_set_stride_3d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz)
 {
   field->sx = sx;
   field->sy = sy;
@@ -367,7 +414,7 @@ zfp_field_set_stride_3d(zfp_field* field, int sx, int sy, int sz)
 }
 
 void
-zfp_field_set_stride_4d(zfp_field* field, int sx, int sy, int sz, int sw)
+zfp_field_set_stride_4d(zfp_field* field, ptrdiff_t sx, ptrdiff_t sy, ptrdiff_t sz, ptrdiff_t sw)
 {
   field->sx = sx;
   field->sy = sy;
@@ -375,44 +422,113 @@ zfp_field_set_stride_4d(zfp_field* field, int sx, int sy, int sz, int sw)
   field->sw = sw;
 }
 
-int
+zfp_bool
 zfp_field_set_metadata(zfp_field* field, uint64 meta)
 {
   uint64 dims;
   /* ensure value is in range */
   if (meta >> ZFP_META_BITS)
-    return 0;
+    return zfp_false;
   field->type = (zfp_type)((meta & 0x3u) + 1); meta >>= 2;
   dims = (meta & 0x3u) + 1; meta >>= 2;
   switch (dims) {
     case 1:
       /* currently dimensions are limited to 2^32 - 1 */
-      field->nx = (meta & UINT64C(0x0000ffffffff)) + 1; meta >>= 48;
+      field->nx = (size_t)(meta & UINT64C(0x0000ffffffff)) + 1; meta >>= 48;
       field->ny = 0;
       field->nz = 0;
       field->nw = 0;
       break;
     case 2:
-      field->nx = (meta & UINT64C(0xffffff)) + 1; meta >>= 24;
-      field->ny = (meta & UINT64C(0xffffff)) + 1; meta >>= 24;
+      field->nx = (size_t)(meta & UINT64C(0xffffff)) + 1; meta >>= 24;
+      field->ny = (size_t)(meta & UINT64C(0xffffff)) + 1; meta >>= 24;
       field->nz = 0;
       field->nw = 0;
       break;
     case 3:
-      field->nx = (meta & UINT64C(0xffff)) + 1; meta >>= 16;
-      field->ny = (meta & UINT64C(0xffff)) + 1; meta >>= 16;
-      field->nz = (meta & UINT64C(0xffff)) + 1; meta >>= 16;
+      field->nx = (size_t)(meta & UINT64C(0xffff)) + 1; meta >>= 16;
+      field->ny = (size_t)(meta & UINT64C(0xffff)) + 1; meta >>= 16;
+      field->nz = (size_t)(meta & UINT64C(0xffff)) + 1; meta >>= 16;
       field->nw = 0;
       break;
     case 4:
-      field->nx = (meta & UINT64C(0xfff)) + 1; meta >>= 12;
-      field->ny = (meta & UINT64C(0xfff)) + 1; meta >>= 12;
-      field->nz = (meta & UINT64C(0xfff)) + 1; meta >>= 12;
-      field->nw = (meta & UINT64C(0xfff)) + 1; meta >>= 12;
+      field->nx = (size_t)(meta & UINT64C(0xfff)) + 1; meta >>= 12;
+      field->ny = (size_t)(meta & UINT64C(0xfff)) + 1; meta >>= 12;
+      field->nz = (size_t)(meta & UINT64C(0xfff)) + 1; meta >>= 12;
+      field->nw = (size_t)(meta & UINT64C(0xfff)) + 1; meta >>= 12;
       break;
   }
   field->sx = field->sy = field->sz = field->sw = 0;
-  return 1;
+  return zfp_true;
+}
+
+/* public functions: compression mode and parameter settings --------------- */
+
+zfp_config
+zfp_config_none(void)
+{
+  zfp_config config;
+  config.mode = zfp_mode_null;
+  return config;
+}
+
+zfp_config
+zfp_config_rate(
+  double rate,
+  zfp_bool align
+)
+{
+  zfp_config config;
+  config.mode = zfp_mode_fixed_rate;
+  config.arg.rate = align ? -rate : +rate;
+  return config;
+}
+
+zfp_config
+zfp_config_precision(
+  uint precision
+)
+{
+  zfp_config config;
+  config.mode = zfp_mode_fixed_precision;
+  config.arg.precision = precision;
+  return config;
+}
+
+zfp_config
+zfp_config_accuracy(
+  double tolerance
+)
+{
+  zfp_config config;
+  config.mode = zfp_mode_fixed_accuracy;
+  config.arg.tolerance = tolerance;
+  return config;
+}
+
+zfp_config
+zfp_config_reversible(void)
+{
+  zfp_config config;
+  config.mode = zfp_mode_reversible;
+  return config;
+}
+
+zfp_config
+zfp_config_expert(    
+  uint minbits,
+  uint maxbits,
+  uint maxprec,
+  int minexp
+)
+{
+  zfp_config config;
+  config.mode = zfp_mode_expert;
+  config.arg.expert.minbits = minbits;
+  config.arg.expert.maxbits = maxbits;
+  config.arg.expert.maxprec = maxprec;
+  config.arg.expert.minexp = minexp;
+  return config;
 }
 
 /* public functions: zfp compressed stream --------------------------------- */
@@ -428,6 +544,7 @@ zfp_stream_open(bitstream* stream)
     zfp->maxprec = ZFP_MAX_PREC;
     zfp->minexp = ZFP_MIN_EXP;
     zfp->exec.policy = zfp_exec_serial;
+    zfp->exec.params = NULL;
   }
   return zfp;
 }
@@ -435,6 +552,8 @@ zfp_stream_open(bitstream* stream)
 void
 zfp_stream_close(zfp_stream* zfp)
 {
+  if (zfp->exec.params != NULL)
+    free(zfp->exec.params);
   free(zfp);
 }
 
@@ -488,6 +607,30 @@ zfp_stream_compression_mode(const zfp_stream* zfp)
   return zfp_mode_expert;
 }
 
+double
+zfp_stream_rate(const zfp_stream* zfp, uint dims)
+{
+  return (zfp_stream_compression_mode(zfp) == zfp_mode_fixed_rate)
+           ? (double)zfp->maxbits / (1u << (2 * dims))
+           : 0.0;
+}
+
+uint
+zfp_stream_precision(const zfp_stream* zfp)
+{
+  return (zfp_stream_compression_mode(zfp) == zfp_mode_fixed_precision)
+           ? zfp->maxprec
+           : 0;
+}
+
+double
+zfp_stream_accuracy(const zfp_stream* zfp)
+{
+  return (zfp_stream_compression_mode(zfp) == zfp_mode_fixed_accuracy)
+           ? ldexp(1.0, zfp->minexp)
+           : 0.0;
+}
+
 uint64
 zfp_stream_mode(const zfp_stream* zfp)
 {
@@ -520,7 +663,7 @@ zfp_stream_mode(const zfp_stream* zfp)
         /* minexp is [ZFP_MIN_EXP=-1074, 843] */
         /* returns [2177, ZFP_MODE_SHORT_MAX=4094] */
         /* +1 because skipped 2176 */
-        return (zfp->minexp - ZFP_MIN_EXP) + (2048 + 128 + 1);
+        return (uint64)(zfp->minexp - ZFP_MIN_EXP) + (2048 + 128 + 1);
       else
         break;
 
@@ -536,7 +679,7 @@ zfp_stream_mode(const zfp_stream* zfp)
   minbits = MAX(1, MIN(zfp->minbits, 0x8000u)) - 1;
   maxbits = MAX(1, MIN(zfp->maxbits, 0x8000u)) - 1;
   maxprec = MAX(1, MIN(zfp->maxprec, 0x0080u)) - 1;
-  minexp = MAX(0, MIN(zfp->minexp + 16495, 0x7fff));
+  minexp = (uint)MAX(0, MIN(zfp->minexp + 16495, 0x7fff));
   mode <<= 15; mode += minexp;
   mode <<=  7; mode += maxprec;
   mode <<= 15; mode += maxbits;
@@ -568,34 +711,31 @@ zfp_stream_compressed_size(const zfp_stream* zfp)
 size_t
 zfp_stream_maximum_size(const zfp_stream* zfp, const zfp_field* field)
 {
+  zfp_bool reversible = is_reversible(zfp);
   uint dims = zfp_field_dimensionality(field);
-  uint mx = (MAX(field->nx, 1u) + 3) / 4;
-  uint my = (MAX(field->ny, 1u) + 3) / 4;
-  uint mz = (MAX(field->nz, 1u) + 3) / 4;
-  uint mw = (MAX(field->nw, 1u) + 3) / 4;
-  size_t blocks = (size_t)mx * (size_t)my * (size_t)mz * (size_t)mw;
+  size_t blocks = zfp_field_blocks(field);
   uint values = 1u << (2 * dims);
-  uint maxbits = 1;
+  uint maxbits = 0;
 
   if (!dims)
     return 0;
   switch (field->type) {
-    case zfp_type_none:
-      return 0;
+    case zfp_type_int32:
+      maxbits += reversible ? 5 : 0;
+      break;
+    case zfp_type_int64:
+      maxbits += reversible ? 6 : 0;
+      break;
     case zfp_type_float:
-      maxbits += 8;
-      if (is_reversible(zfp))
-        maxbits += 5;
+      maxbits += reversible ? 1 + 1 + 8 + 5 : 1 + 8;
       break;
     case zfp_type_double:
-      maxbits += 11;
-      if (is_reversible(zfp))
-        maxbits += 6;
+      maxbits += reversible ? 1 + 1 + 11 + 6 : 1 + 11;
       break;
     default:
-      break;
+      return 0;
   }
-  maxbits += values - 1 + values * MIN(zfp->maxprec, type_precision(field->type));
+  maxbits += values - 1 + values * MIN(zfp->maxprec, zfp_field_precision(field));
   maxbits = MIN(maxbits, zfp->maxbits);
   maxbits = MAX(maxbits, zfp->minbits);
   return ((ZFP_HEADER_MAX_BITS + blocks * maxbits + stream_word_bits - 1) & ~(stream_word_bits - 1)) / CHAR_BIT;
@@ -617,7 +757,7 @@ zfp_stream_set_reversible(zfp_stream* zfp)
 }
 
 double
-zfp_stream_set_rate(zfp_stream* zfp, double rate, zfp_type type, uint dims, int wra)
+zfp_stream_set_rate(zfp_stream* zfp, double rate, zfp_type type, uint dims, zfp_bool align)
 {
   uint n = 1u << (2 * dims);
   uint bits = (uint)floor(n * rate + 0.5);
@@ -631,7 +771,7 @@ zfp_stream_set_rate(zfp_stream* zfp, double rate, zfp_type type, uint dims, int
     default:
       break;
   }
-  if (wra) {
+  if (align) {
     /* for write random access, round up to next multiple of stream word size */
     bits += (uint)stream_word_bits - 1;
     bits &= ~(stream_word_bits - 1);
@@ -703,15 +843,15 @@ zfp_stream_set_mode(zfp_stream* zfp, uint64 mode)
       minbits = ZFP_MIN_BITS;
       maxbits = ZFP_MAX_BITS;
       maxprec = ZFP_MAX_PREC;
-      minexp = (uint)mode + ZFP_MIN_EXP - (2048 + 128 + 1);
+      minexp = (int)mode + ZFP_MIN_EXP - (2048 + 128 + 1);
     }
   }
   else {
     /* 64-bit encoding */
-    mode >>= 12; minbits = ((uint)mode & 0x7fffu) + 1;
-    mode >>= 15; maxbits = ((uint)mode & 0x7fffu) + 1;
-    mode >>= 15; maxprec = ((uint)mode & 0x007fu) + 1;
-    mode >>=  7; minexp  = ((uint)mode & 0x7fffu) - 16495;
+    mode >>= 12; minbits = (uint)(mode & 0x7fffu) + 1;
+    mode >>= 15; maxbits = (uint)(mode & 0x7fffu) + 1;
+    mode >>= 15; maxprec = (uint)(mode & 0x007fu) + 1;
+    mode >>=  7; minexp  = (int)(mode & 0x7fffu) - 16495;
   }
 
   if (!zfp_stream_set_params(zfp, minbits, maxbits, maxprec, minexp))
@@ -720,16 +860,16 @@ zfp_stream_set_mode(zfp_stream* zfp, uint64 mode)
   return zfp_stream_compression_mode(zfp);
 }
 
-int
+zfp_bool
 zfp_stream_set_params(zfp_stream* zfp, uint minbits, uint maxbits, uint maxprec, int minexp)
 {
   if (minbits > maxbits || !(0 < maxprec && maxprec <= 64))
-    return 0;
+    return zfp_false;
   zfp->minbits = minbits;
   zfp->maxbits = maxbits;
   zfp->maxprec = maxprec;
   zfp->minexp = minexp;
-  return 1;
+  return zfp_true;
 }
 
 size_t
@@ -761,58 +901,75 @@ zfp_stream_execution(const zfp_stream* zfp)
 uint
 zfp_stream_omp_threads(const zfp_stream* zfp)
 {
-  return zfp->exec.params.omp.threads;
+  if (zfp->exec.policy == zfp_exec_omp) 
+    return ((zfp_exec_params_omp*)zfp->exec.params)->threads;
+  return 0u;
 }
 
 uint
 zfp_stream_omp_chunk_size(const zfp_stream* zfp)
 {
-  return zfp->exec.params.omp.chunk_size;
+  if (zfp->exec.policy == zfp_exec_omp) 
+    return ((zfp_exec_params_omp*)zfp->exec.params)->chunk_size;
+  return 0u;
 }
 
-int
+zfp_bool
 zfp_stream_set_execution(zfp_stream* zfp, zfp_exec_policy policy)
 {
   switch (policy) {
     case zfp_exec_serial:
+      if (zfp->exec.policy != policy && zfp->exec.params != NULL) {
+        free(zfp->exec.params);
+        zfp->exec.params = NULL;
+      }
       break;
 #ifdef ZFP_WITH_CUDA
     case zfp_exec_cuda:
+      if (zfp->exec.policy != policy && zfp->exec.params != NULL) {
+        free(zfp->exec.params);
+        zfp->exec.params = NULL;
+      }
       break;
 #endif
     case zfp_exec_omp:
 #ifdef _OPENMP
       if (zfp->exec.policy != policy) {
-        zfp->exec.params.omp.threads = 0;
-        zfp->exec.params.omp.chunk_size = 0;
+        if (zfp->exec.params != NULL) {
+          free(zfp->exec.params);
+        }
+        zfp_exec_params_omp* params = malloc(sizeof(zfp_exec_params_omp));
+        params->threads = 0;
+        params->chunk_size = 0;
+        zfp->exec.params = (void*)params;
       }
       break;
 #else
-      return 0;
+      return zfp_false;
 #endif
     default:
-      return 0;
+      return zfp_false;
   }
   zfp->exec.policy = policy;
-  return 1;
+  return zfp_true;
 }
 
-int
+zfp_bool
 zfp_stream_set_omp_threads(zfp_stream* zfp, uint threads)
 {
   if (!zfp_stream_set_execution(zfp, zfp_exec_omp))
-    return 0;
-  zfp->exec.params.omp.threads = threads;
-  return 1;
+    return zfp_false;
+  ((zfp_exec_params_omp*)zfp->exec.params)->threads = threads;
+  return zfp_true;
 }
 
-int
+zfp_bool
 zfp_stream_set_omp_chunk_size(zfp_stream* zfp, uint chunk_size)
 {
   if (!zfp_stream_set_execution(zfp, zfp_exec_omp))
-    return 0;
-  zfp->exec.params.omp.chunk_size = chunk_size;
-  return 1;
+    return zfp_false;
+  ((zfp_exec_params_omp*)zfp->exec.params)->chunk_size = chunk_size;
+  return zfp_true;
 }
 
 /* public functions: utility functions --------------------------------------*/
@@ -935,7 +1092,7 @@ zfp_compress(zfp_stream* zfp, const zfp_field* field)
 #endif
   };
   uint exec = zfp->exec.policy;
-  uint strided = zfp_field_stride(field, NULL);
+  uint strided = (uint)zfp_field_stride(field, NULL);
   uint dims = zfp_field_dimensionality(field);
   uint type = field->type;
   void (*compress)(zfp_stream*, const zfp_field*);
@@ -995,7 +1152,7 @@ zfp_decompress(zfp_stream* zfp, zfp_field* field)
 #endif
   };
   uint exec = zfp->exec.policy;
-  uint strided = zfp_field_stride(field, NULL);
+  uint strided = (uint)zfp_field_stride(field, NULL);
   uint dims = zfp_field_dimensionality(field);
   uint type = field->type;
   void (*decompress)(zfp_stream*, zfp_field*);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index d15246e5..4134a273 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,21 +1,158 @@
-add_executable(testzfp testzfp.cpp)
-target_link_libraries(testzfp zfp)
-target_compile_definitions(testzfp PRIVATE ${zfp_compressed_array_defs})
-
-option(ZFP_BUILD_TESTING_SMALL "Enable small-sized array testing" ON)
-if(ZFP_BUILD_TESTING_SMALL)
-  foreach(D IN ITEMS 1 2 3 4)
-    foreach(P IN ITEMS 32 64)
-      add_test(NAME small-arrays-${D}d-fp${P} COMMAND testzfp small ${D}d fp${P})
-    endforeach()
-  endforeach()
+if(BUILD_TESTING OR BUILD_TESTING_FULL)
+  # testzfp
+  add_executable(testzfp testzfp.cpp)
+  target_link_libraries(testzfp zfp)
+  target_compile_definitions(testzfp PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME testzfp COMMAND testzfp)
+  
+  # testviews
+  add_executable(testviews testviews.cpp)
+  if(ZFP_WITH_OPENMP)
+    target_link_libraries(testviews zfp OpenMP::OpenMP_C)
+  else()
+    target_link_libraries(testviews zfp)
+  endif()
+  target_compile_definitions(testviews PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME testviews COMMAND testviews)
 endif()
 
-option(ZFP_BUILD_TESTING_LARGE "Enable large-sized array testing" OFF)
-if(ZFP_BUILD_TESTING_LARGE)
-  foreach(D IN ITEMS 1 2 3 4)
-    foreach(P IN ITEMS 32 64)
-      add_test(NAME large-arrays-${D}d-fp${P} COMMAND testzfp large ${D}d fp${P})
+if(BUILD_TESTING_FULL)
+  set(CMAKE_CXX_STANDARD 11)
+  
+  # CMAKE_SH-NOTFOUND needed for mingw builds
+  if(MINGW)
+    list(APPEND CMOCKA_ARGS "-DCMAKE_SH=CMAKE_SH-NOTFOUND")
+    list(APPEND GTEST_ARGS "-DCMAKE_SH=CMAKE_SH-NOTFOUND")
+  endif()
+  
+  # clone cmocka 1.1.0 into /build
+  list(APPEND CMOCKA_ARGS "-DWITH_STATIC_LIB=ON;-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER};-DUNIT_TESTING=OFF")
+  
+  include(ExternalProject)
+  ExternalProject_Add(
+    cmocka_cloned
+    GIT_REPOSITORY    https://gitlab.com/cmocka/cmocka.git
+    GIT_TAG           cmocka-1.1.5
+    SOURCE_DIR        "${CMAKE_BINARY_DIR}/cmocka-src"
+    BINARY_DIR        "${CMAKE_BINARY_DIR}/cmocka-build"
+    CMAKE_ARGS        "${CMOCKA_ARGS}"
+    INSTALL_COMMAND   ""
+    STEP_TARGETS build
+    EXCLUDE_FROM_ALL TRUE
+  )
+  ExternalProject_Get_Property(cmocka_cloned source_dir binary_dir)
+  
+  # name static library cmocka, wire up against cmocka_cloned
+  add_library(cmocka STATIC IMPORTED GLOBAL)
+  
+  # choose proper library path & extension
+  if(MSVC)
+    set(IMPORTED_LOCATION_PATH "${binary_dir}/src/${CMAKE_BUILD_TYPE}/cmocka-static.lib")
+  else()
+    set(IMPORTED_LOCATION_PATH "${binary_dir}/src/libcmocka-static.a")
+  endif()
+  set_property(TARGET cmocka
+    PROPERTY
+    IMPORTED_LOCATION "${IMPORTED_LOCATION_PATH}"
+  )
+  
+  add_dependencies(cmocka cmocka_cloned)
+  include_directories(${source_dir}/include)
+  
+  # include home dir so #include statements are clear in test files
+  include_directories(${ZFP_SOURCE_DIR} ${ZFP_SOURCE_DIR}/include)
+  # access to constants/ and utils/
+  include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+  
+  # suppress warnings for all targets
+  if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_C_COMPILER_ID STREQUAL "Clang")
+    add_compile_options(-Wno-unused-function)
+  endif()
+  # -Wno-variadic-macros was not working for gcc...revisit
+  if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
+    add_compile_options(-Wno-gnu-zero-variadic-macro-arguments)
+  endif()
+  # suppress googletest warning "conversion from 'float' to 'testing::internal::BiggestInt', possible loss of data"
+  if(MSVC)
+    add_compile_options(/wd4244)
+  endif()
+  
+  
+  add_subdirectory(utils)
+  add_subdirectory(src)
+  
+  if(BUILD_CFP)
+    add_subdirectory(cfp)
+  endif()
+  
+  if(BUILD_ZFORP)
+    add_subdirectory(fortran)
+  endif()
+  
+  # needed to compile gtest on MSVC
+  if(MSVC)
+    list(APPEND GTEST_ARGS "/D:_SILENCE_TR1_DEPRECATION_NAMESPACE_WARNING=1")
+  endif()
+  
+  # TODO: spend time getting googletest to compile on MinGW
+  # checksums are generated through C tests, no need to compile C++ tests
+  if((NOT MINGW) AND (NOT DEFINED ZFP_OMP_TESTS_ONLY) AND (NOT PRINT_CHECKSUMS))
+    # clone googletest into build/
+    configure_file(CMakeLists.txt.in ${ZFP_BINARY_DIR}/tests/googletest-download/CMakeLists.txt)
+    execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" ${GTEST_ARGS} .
+      RESULT_VARIABLE result
+      WORKING_DIRECTORY ${ZFP_BINARY_DIR}/tests/googletest-download
+    )
+  
+    if(result)
+      message(FATAL_ERROR "CMake step for googletest failed: ${result}")
+    endif()
+    # build gtest
+    execute_process(COMMAND ${CMAKE_COMMAND} --build .
+      RESULT_VARIABLE result
+      WORKING_DIRECTORY ${ZFP_BINARY_DIR}/tests/googletest-download
+    )
+    if(result)
+      message(FATAL_ERROR "Build step for googletest failed: ${result}")
+    endif()
+  
+    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+  
+    add_subdirectory(${ZFP_BINARY_DIR}/tests/googletest-src
+      ${ZFP_BINARY_DIR}/tests/googletest-build
+    )
+  
+    if(CMAKE_VERSION VERSION_LESS 2.8.11)
+      include_directories("${gtest_SOURCE_DIR}/include")
+    endif()
+  
+    # needed to compile zfp tests with gtest on MSVC
+    if(MSVC)
+      target_compile_definitions(gtest_main INTERFACE GTEST_LINKED_AS_SHARED_LIBRARY=1)
+    endif()
+  
+    add_subdirectory(array)
+  endif()
+  
+  option(ZFP_BUILD_TESTING_SMALL "Enable small-sized array testing" ON)
+  if(ZFP_BUILD_TESTING_SMALL)
+    foreach(D IN ITEMS 1 2 3 4)
+      foreach(P IN ITEMS 32 64)
+        add_test(NAME small-arrays-${D}d-fp${P} COMMAND testzfp small ${D}d fp${P})
+      endforeach()
+    endforeach()
+  endif()
+  
+  option(ZFP_BUILD_TESTING_LARGE "Enable large-sized array testing" OFF)
+  if(ZFP_BUILD_TESTING_LARGE)
+    foreach(D IN ITEMS 1 2 3 4)
+      foreach(P IN ITEMS 32 64)
+        add_test(NAME large-arrays-${D}d-fp${P} COMMAND testzfp large ${D}d fp${P})
+      endforeach()
     endforeach()
-  endforeach()
+  endif()
+  
+  if(BUILD_ZFPY)
+    add_subdirectory(python)
+  endif()
 endif()
diff --git a/tests/CMakeLists.txt.in b/tests/CMakeLists.txt.in
new file mode 100644
index 00000000..46e7b088
--- /dev/null
+++ b/tests/CMakeLists.txt.in
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 2.8.7)
+
+project(googletest-download NONE)
+
+include(ExternalProject)
+ExternalProject_Add(
+  googletest
+  GIT_REPOSITORY    https://github.com/google/googletest.git
+  GIT_TAG           e2239ee6043f73722e7aa812a459f54a28552929 #703bd9caab50b139428cea1aaff9974ebee5742e
+  SOURCE_DIR        "${ZFP_BINARY_DIR}/tests/googletest-src"
+  BINARY_DIR        "${ZFP_BINARY_DIR}/tests/googletest-build"
+  CONFIGURE_COMMAND   ""
+  BUILD_COMMAND   ""
+  INSTALL_COMMAND   ""
+  TEST_COMMAND   ""
+)
diff --git a/tests/Makefile b/tests/Makefile
index 2c496ee3..94339dc5 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,13 +1,17 @@
 include ../Config
 
 BINDIR = ../bin
-TARGETS = $(BINDIR)/testzfp
-CXXLIBS = -L../lib -lzfp
+TARGETS = $(BINDIR)/testzfp $(BINDIR)/testviews
+INCS = -I../include
+LIBS = -L../lib -lzfp $(LDFLAGS)
 
 all: $(TARGETS)
 
 $(BINDIR)/testzfp: testzfp.cpp ../lib/$(LIBZFP)
-	$(CXX) $(CXXFLAGS) -I../array testzfp.cpp $(CXXLIBS) -o $@
+	$(CXX) $(CXXFLAGS) $(INCS) testzfp.cpp $(LIBS) -o $@
+
+$(BINDIR)/testviews: testviews.cpp ../lib/$(LIBZFP)
+	$(CXX) $(CXXFLAGS) $(INCS) testviews.cpp $(LIBS) -o $@
 
 test: $(BINDIR)/testzfp
 	$(BINDIR)/testzfp
diff --git a/tests/array/CMakeLists.txt b/tests/array/CMakeLists.txt
new file mode 100644
index 00000000..81a6c253
--- /dev/null
+++ b/tests/array/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+add_subdirectory(array)
+add_subdirectory(constArray)
+add_subdirectory(decode)
+add_subdirectory(encode)
+add_subdirectory(zfp)
diff --git a/tests/array/array/CMakeLists.txt b/tests/array/array/CMakeLists.txt
new file mode 100644
index 00000000..f47269bb
--- /dev/null
+++ b/tests/array/array/CMakeLists.txt
@@ -0,0 +1,74 @@
+function(zfp_add_cpp_tests dims type bits)
+  # test compressed array class
+  set(test_name testArray${dims}${type})
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp zfpHashLib genSmoothRandNumsLib zfpChecksumsLib)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+  # test class's references
+  set(test_name testArray${dims}${type}Refs)
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp zfpHashLib rand${bits}Lib)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+  # test class's pointers
+  set(test_name testArray${dims}${type}Ptrs)
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+  # test class's iterators
+  set(test_name testArray${dims}${type}Iters)
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+  # test class's views
+  set(test_name testArray${dims}${type}Views)
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+  # test class's view pointers
+  set(test_name testArray${dims}${type}ViewPtrs)
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+  # test class's view iterators
+  set(test_name testArray${dims}${type}ViewIters)
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+endfunction()
+
+zfp_add_cpp_tests(1 f 32)
+zfp_add_cpp_tests(2 f 32)
+zfp_add_cpp_tests(3 f 32)
+zfp_add_cpp_tests(4 f 32)
+zfp_add_cpp_tests(1 d 64)
+zfp_add_cpp_tests(2 d 64)
+zfp_add_cpp_tests(3 d 64)
+zfp_add_cpp_tests(4 d 64)
+
+# test zfp::array::construct() invalid cases
+set(test_name testConstruct)
+add_executable(testConstruct testConstruct.cpp)
+target_link_libraries(testConstruct gtest gtest_main zfp)
+target_compile_definitions(testConstruct PRIVATE ${zfp_compressed_array_defs})
+add_test(NAME testConstruct COMMAND testConstruct)
diff --git a/tests/array/array/testArray1Base.cpp b/tests/array/array/testArray1Base.cpp
new file mode 100644
index 00000000..442b2e46
--- /dev/null
+++ b/tests/array/array/testArray1Base.cpp
@@ -0,0 +1,194 @@
+/* TODO: figure out templated tests (TYPED_TEST) */
+
+/* const_view */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::const_view v(&arr, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0) = 999.;
+  EXPECT_NE(arr(offsetX), arr2(0));
+}
+
+/* view */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::view v(&arr, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0) = 999.;
+  EXPECT_NE(arr(offsetX), arr2(0));
+}
+
+/* private_const_view */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromPrivateConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromPrivateConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromPrivateConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0) = 999.;
+  EXPECT_NE(arr(offsetX), arr2(0));
+}
+
+/* private_view */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromPrivateView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_view v(&arr, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromPrivateView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromPrivateView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, viewLenX);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0) = 999.;
+  EXPECT_NE(arr(offsetX), arr2(0));
+}
+
diff --git a/tests/array/array/testArray1ItersBase.cpp b/tests/array/array/testArray1ItersBase.cpp
new file mode 100644
index 00000000..73c63ec9
--- /dev/null
+++ b/tests/array/array/testArray1ItersBase.cpp
@@ -0,0 +1,395 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_applyBrackets_then_returnsReferenceAtBracketPosition)
+{
+  size_t i = 1, i2 = 2;
+  arr[i] = VAL;
+  iter = arr.begin() + i2;
+
+  EXPECT_EQ(VAL, iter[i - i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_postDecrementIterator_then_advancedAfterEval)
+{
+  arr[1] = VAL;
+  iter = arr.begin();
+  iter++;
+
+  SCALAR d = *iter--;
+
+  EXPECT_EQ(VAL, d);
+  EXPECT_EQ(0u, iter.i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preDecrementIterator_then_advancedBeforeEval)
+{
+  arr[1] = VAL;
+  iter = arr.begin();
+  iter++;
+
+  EXPECT_EQ(0, *--iter);
+  EXPECT_EQ(0u, iter.i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_iteratorPlusEquals_then_iterAdvanced)
+{
+  size_t i = 2;
+  arr[i] = VAL;
+  iter = arr.begin();
+
+  iter += i;
+
+  EXPECT_EQ(i, iter.i());
+  EXPECT_EQ(VAL, *iter);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_iteratorMinusEquals_then_iterAdvanced)
+{
+  size_t iFromEnd = 2;
+  arr[ARRAY_SIZE - iFromEnd] = VAL;
+  iter = arr.end();
+
+  iter -= iFromEnd;
+
+  EXPECT_EQ(ARRAY_SIZE - iFromEnd, iter.i());
+  EXPECT_EQ(VAL, *iter);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_incrementIterator_then_positionTraversesCorrectly)
+{
+  // force partial block
+  EXPECT_NE(0u, arr.size() % BLOCK_SIDE_LEN);
+
+  iter = arr.begin();
+  size_t totalBlocks = (arr.size() + 3) / 4;
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size() - iter.i();
+    size_t blockLen = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndex = iter.i();
+
+    for (size_t i = 0; i < blockLen; i++) {
+      EXPECT_EQ(blockStartIndex + i, iter.i());
+      iter++;
+    }
+  }
+
+//  EXPECT_EQ(arr.end(), iter); // triggers googletest issue #742
+  EXPECT_TRUE(arr.end() == iter);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_decrementIterator_then_positionTraversesCorrectly)
+{
+  // force partial block
+  EXPECT_NE(0u, arr.size() % BLOCK_SIDE_LEN);
+
+  iter = arr.end();
+  size_t totalBlocks = (arr.size() + 3) / 4;
+  for (size_t count = 0; count < totalBlocks; count++) {
+    iter--;
+
+    // determine if block is complete or partial
+    size_t blockEndIndex = iter.i();
+    size_t blockLen = (blockEndIndex % BLOCK_SIDE_LEN) + 1;
+
+    // ensure entries lie in same block
+    for (size_t i = 1; i < blockLen; i++) {
+      iter--;
+      EXPECT_EQ(blockEndIndex - i, iter.i());
+    }
+  }
+
+//  EXPECT_EQ(arr.begin(), iter); // triggers googletest issue #742
+  EXPECT_TRUE(arr.begin() == iter);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_subtractTwoIterators_then_resultIsDifference)
+{
+  iter = arr.begin();
+  iter2 = arr.end();
+
+  EXPECT_EQ(ARRAY_SIZE, iter2 - iter);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_addToIterator_then_returnsAdvancedIter)
+{
+  ptrdiff_t i = 2;
+  arr[i] = VAL;
+  iter = arr.begin();
+
+  EXPECT_EQ(VAL, *(iter + i));
+  EXPECT_EQ(i, (iter + i).i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_subtractFromIterator_then_returnsAdvancedIter)
+{
+  size_t iFromEnd = 1;
+  arr[ARRAY_SIZE - iFromEnd] = VAL;
+  iter = arr.end();
+
+  EXPECT_EQ(VAL, *(iter - iFromEnd));
+  EXPECT_EQ(ARRAY_SIZE - iFromEnd, (iter - iFromEnd).i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayIteratorsWithSecondIndexedHigherThanFirst_when_compareFirstLessThanEqualToSecond_then_resultTrue)
+{
+  iter = arr.begin();
+  iter2 = iter + 1;
+
+  EXPECT_TRUE(iter <= iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexIterators_when_compareLessThanEqualTo_then_resultTrue)
+{
+  iter = arr.begin();
+  iter2 = arr.begin();
+
+  EXPECT_TRUE(iter <= iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayIteratorsWithFirstIndexedHigherThanSecond_when_compareFirstGreaterThanEqualToSecond_then_resultTrue)
+{
+  iter = arr.begin() + 1;
+  iter2 = arr.begin();
+
+  EXPECT_TRUE(iter >= iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexIterators_when_compareGreaterThanEqualTo_then_resultTrue)
+{
+  iter = arr.begin();
+  iter2 = arr.begin();
+
+  EXPECT_TRUE(iter >= iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayIteratorsWithSecondIndexedHigherThanFirst_when_compareFirstLessThanSecond_then_resultTrue)
+{
+  iter = arr.begin();
+  iter2 = iter + 1;
+
+  EXPECT_TRUE(iter < iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexIterators_when_compareLessThan_then_resultFalse)
+{
+  iter = arr.begin();
+  iter2 = arr.begin();
+
+  EXPECT_FALSE(iter < iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayIteratorsWithFirstIndexedHigherThanSecond_when_compareFirstGreaterThanSecond_then_resultTrue)
+{
+  iter = arr.begin() + 1;
+  iter2 = arr.begin();
+
+  EXPECT_TRUE(iter > iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexIterators_when_compareGreaterThan_then_resultFalse)
+{
+  iter = arr.begin();
+  iter2 = arr.begin();
+
+  EXPECT_FALSE(iter > iter2);
+}
+
+// const iterators
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_applyBrackets_then_returnsConstReferenceAtBracketPosition)
+{
+  size_t i = 1, i2 = 2;
+  arr[i] = VAL;
+  citer = arr.cbegin() + i2;
+
+  EXPECT_EQ(VAL, citer[i - i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_postDecrementConstIterator_then_advancedAfterEval)
+{
+  arr[1] = VAL;
+  citer = arr.cbegin();
+  citer++;
+
+  SCALAR d = *citer--;
+
+  EXPECT_EQ(VAL, d);
+  EXPECT_EQ(0u, citer.i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preDecrementConstIterator_then_advancedBeforeEval)
+{
+  arr[1] = VAL;
+  citer = arr.cbegin();
+  citer++;
+
+  EXPECT_EQ(0, *--citer);
+  EXPECT_EQ(0u, citer.i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_const_iteratorPlusEquals_then_iterAdvanced)
+{
+  size_t i = 2;
+  arr[i] = VAL;
+  citer = arr.cbegin();
+
+  citer += i;
+
+  EXPECT_EQ(i, citer.i());
+  EXPECT_EQ(VAL, *citer);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_const_iteratorMinusEquals_then_iterAdvanced)
+{
+  size_t iFromEnd = 2;
+  arr[ARRAY_SIZE - iFromEnd] = VAL;
+  citer = arr.cend();
+
+  citer -= iFromEnd;
+
+  EXPECT_EQ(ARRAY_SIZE - iFromEnd, citer.i());
+  EXPECT_EQ(VAL, *citer);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_incrementConstIterator_then_positionTraversesCorrectly)
+{
+  // force partial block
+  EXPECT_NE(0u, arr.size() % BLOCK_SIDE_LEN);
+
+  citer = arr.cbegin();
+  size_t totalBlocks = (arr.size() + 3) / 4;
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size() - citer.i();
+    size_t blockLen = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndex = citer.i();
+
+    for (size_t i = 0; i < blockLen; i++) {
+      EXPECT_EQ(blockStartIndex + i, citer.i());
+      citer++;
+    }
+  }
+
+//  EXPECT_EQ(arr.cend(), citer); // triggers googletest issue #742
+  EXPECT_TRUE(arr.cend() == citer);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_decrementConstIterator_then_positionTraversesCorrectly)
+{
+  // force partial block
+  EXPECT_NE(0u, arr.size() % BLOCK_SIDE_LEN);
+
+  citer = arr.cend();
+  size_t totalBlocks = (arr.size() + 3) / 4;
+  for (size_t count = 0; count < totalBlocks; count++) {
+    citer--;
+
+    // determine if block is complete or partial
+    size_t blockEndIndex = citer.i();
+    size_t blockLen = (blockEndIndex % BLOCK_SIDE_LEN) + 1;
+
+    // ensure entries lie in same block
+    for (size_t i = 1; i < blockLen; i++) {
+      citer--;
+      EXPECT_EQ(blockEndIndex - i, citer.i());
+    }
+  }
+
+//  EXPECT_EQ(arr.cbegin(), citer); // triggers googletest issue #742
+  EXPECT_TRUE(arr.cbegin() == citer);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_subtractTwoConstIterators_then_resultIsDifference)
+{
+  citer = arr.cbegin();
+  citer2 = arr.cend();
+
+  EXPECT_EQ(ARRAY_SIZE, citer2 - citer);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_addToConstIterator_then_returnsAdvancedIter)
+{
+  size_t i = 2;
+  arr[i] = VAL;
+  citer = arr.cbegin();
+
+  EXPECT_EQ(VAL, *(citer + i));
+  EXPECT_EQ(i, (citer + i).i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_subtractFromConstIterator_then_returnsAdvancedIter)
+{
+  size_t iFromEnd = 1;
+  arr[ARRAY_SIZE - iFromEnd] = VAL;
+  citer = arr.cend();
+
+  EXPECT_EQ(VAL, *(citer - iFromEnd));
+  EXPECT_EQ(ARRAY_SIZE - iFromEnd, (citer - iFromEnd).i());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayConstIteratorsWithSecondIndexedHigherThanFirst_when_compareFirstLessThanEqualToSecond_then_resultTrue)
+{
+  citer = arr.cbegin();
+  citer2 = citer + 1;
+
+  EXPECT_TRUE(citer <= citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexConstIterators_when_compareLessThanEqualTo_then_resultTrue)
+{
+  citer = arr.cbegin();
+  citer2 = arr.cbegin();
+
+  EXPECT_TRUE(citer <= citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayConstIteratorsWithFirstIndexedHigherThanSecond_when_compareFirstGreaterThanEqualToSecond_then_resultTrue)
+{
+  citer = arr.cbegin() + 1;
+  citer2 = arr.cbegin();
+
+  EXPECT_TRUE(citer >= citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexConstIterators_when_compareGreaterThanEqualTo_then_resultTrue)
+{
+  citer = arr.cbegin();
+  citer2 = arr.cbegin();
+
+  EXPECT_TRUE(citer >= citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayConstIteratorsWithSecondIndexedHigherThanFirst_when_compareFirstLessThanSecond_then_resultTrue)
+{
+  citer = arr.cbegin();
+  citer2 = citer + 1;
+
+  EXPECT_TRUE(citer < citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexConstIterators_when_compareLessThan_then_resultFalse)
+{
+  citer = arr.cbegin();
+  citer2 = arr.cbegin();
+
+  EXPECT_FALSE(citer < citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayConstIteratorsWithFirstIndexedHigherThanSecond_when_compareFirstGreaterThanSecond_then_resultTrue)
+{
+  citer = arr.cbegin() + 1;
+  citer2 = arr.cbegin();
+
+  EXPECT_TRUE(citer > citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexConstIterators_when_compareGreaterThan_then_resultFalse)
+{
+  citer = arr.cbegin();
+  citer2 = arr.cbegin();
+
+  EXPECT_FALSE(citer > citer2);
+}
diff --git a/tests/array/array/testArray1RefsBase.cpp b/tests/array/array/testArray1RefsBase.cpp
new file mode 100644
index 00000000..7d361e47
--- /dev/null
+++ b/tests/array/array/testArray1RefsBase.cpp
@@ -0,0 +1,37 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_resize_then_sizeChanges)
+{
+  EXPECT_EQ(ARRAY_SIZE, arr.size());
+
+  size_t newLen = ARRAY_SIZE + 1;
+  arr.resize(newLen);
+
+  EXPECT_EQ(newLen, arr.size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_getIndexWithParentheses_then_refReturned)
+{
+  size_t i = 1;
+  arr(i) = VAL;
+
+  EXPECT_EQ(VAL, arr(i));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithBrackets_then_valReturned)
+{
+  size_t i = 1;
+  arr[i] = VAL;
+
+  const array1<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst[i]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithParentheses_then_valReturned)
+{
+  size_t i = 1;
+  arr[i] = VAL;
+
+  const array1<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst(i));
+}
diff --git a/tests/array/array/testArray1ViewsBase.cpp b/tests/array/array/testArray1ViewsBase.cpp
new file mode 100644
index 00000000..fd3f360b
--- /dev/null
+++ b/tests/array/array/testArray1ViewsBase.cpp
@@ -0,0 +1,192 @@
+/* preview */
+
+/* this also tests const_view */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_previewFullConstructor1D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+
+  EXPECT_EQ(viewLen, v.size());
+  EXPECT_EQ(offset, v.global_x(0));
+}
+
+/* const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_sizeX_then_viewXLenReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+
+  EXPECT_EQ(viewLen, v.size_x());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_accessorBrackets_then_correctEntriesReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+
+  for (size_t i = 0; i < viewLen; i++) {
+    EXPECT_EQ(arr[offset + i], v[i]);
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_accessorParens_then_correctEntriesReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+
+  for (size_t i = 0; i < viewLen; i++) {
+    EXPECT_EQ(arr[offset + i], v(i));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_constViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, 1, 1);
+
+  /* indices of view and arr */
+  size_t vI = 2;
+  size_t aI = v.global_x(vI);
+
+  SCALAR oldVal = arr[aI];
+  EXPECT_EQ(oldVal, v(vI));
+
+  arr[aI] += 1;
+  SCALAR newVal = arr[aI];
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vI));
+}
+
+/* view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+
+  EXPECT_EQ(viewLen, v.size_x());
+  EXPECT_EQ(offset, v.global_x(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_view_when_setEntryWithBrackets_then_originalArrayUpdated)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+  size_t i = 1;
+  SCALAR val = 3.14;
+
+  EXPECT_NE(val, arr(offset + i));
+  v[i] = val;
+
+  EXPECT_EQ(arr(offset + i), v(i));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, 1, 1);
+
+  /* indices of view and arr */
+  size_t vI = 2;
+  size_t aI = v.global_x(vI);
+
+  SCALAR oldVal = arr[aI];
+  EXPECT_EQ(oldVal, v(vI));
+
+  arr[aI] += 1;
+  SCALAR newVal = arr[aI];
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vI));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_view_when_setEntryWithParens_then_originalArrayUpdated)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+  size_t i = 1;
+  SCALAR val = 3.14;
+
+  EXPECT_NE(val, arr(offset + i));
+  v(i) = val;
+
+  EXPECT_EQ(arr(offset + i), v(i));
+}
+
+/* private_const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offset, viewLen);
+
+  EXPECT_EQ(viewLen, v.size());
+  EXPECT_EQ(viewLen, v.size_x());
+
+  EXPECT_EQ(offset, v.global_x(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateConstView_when_sizeX_then_viewLenReturned)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offset, viewLen);
+  EXPECT_EQ(viewLen, v.size_x());
+}
+
+/* private_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_view v(&arr, offset, viewLen);
+
+  EXPECT_EQ(viewLen, v.size());
+  EXPECT_EQ(viewLen, v.size_x());
+
+  EXPECT_EQ(offset, v.global_x(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateView_when_partitionWithLimitOnCount_then_setsUniqueBlockBounds)
+{
+  size_t count = 3;
+  size_t prevOffset, prevLen, offset, len;
+
+  /* partition such that each gets at least 1 block */
+  size_t blockSideLen = 4;
+  size_t arrBlockCount = (arr.size_x() + (blockSideLen - 1)) / blockSideLen;
+  EXPECT_LE(count, arrBlockCount);
+
+  /* base case */
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+  v.partition(0, count);
+
+  /* expect to start at first index, zero */
+  prevOffset = v.global_x(0);
+  EXPECT_EQ(0, prevOffset);
+
+  /* expect to have at least 1 block */
+  prevLen = v.size_x();
+  EXPECT_LE(blockSideLen, prevLen);
+
+  /* successive cases are compared to previous */
+  for (size_t i = 1; i < count - 1; i++) {
+    ZFP_ARRAY_TYPE::private_view v2(&arr);
+    v2.partition(i, count);
+
+    /* expect blocks continue where previous left off */
+    offset = v2.global_x(0);
+    EXPECT_EQ(prevOffset + prevLen, offset);
+
+    /* expect to have at least 1 block */
+    len = v2.size_x();
+    EXPECT_LE(blockSideLen, len);
+
+    prevOffset = offset;
+    prevLen = len;
+  }
+
+  /* last partition case */
+  ZFP_ARRAY_TYPE::private_view v3(&arr);
+  v3.partition(count - 1, count);
+
+  /* expect blocks continue where previous left off */
+  offset = v3.global_x(0);
+  EXPECT_EQ(prevOffset + prevLen, offset);
+
+  /* last partition could hold a partial block */
+  len = v3.size_x();
+  EXPECT_LT(0u, len);
+
+  /* expect to end on final index */
+  EXPECT_EQ(arr.size_x(), offset + len);
+}
diff --git a/tests/array/array/testArray1d.cpp b/tests/array/array/testArray1d.cpp
new file mode 100644
index 00000000..fc192f03
--- /dev/null
+++ b/tests/array/array/testArray1d.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array1.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array1dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 1; }
+};
+
+Array1dTestEnv* const testEnv = new Array1dTestEnv;
+
+class Array1dTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array1dTest
+
+#define ZFP_ARRAY_TYPE array1d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array1f
+#define ZFP_ARRAY_TYPE_WRONG_DIM array2d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array2f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array3d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 1
+
+#include "testArrayBase.cpp"
+#include "testArray1Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray1dIters.cpp b/tests/array/array/testArray1dIters.cpp
new file mode 100644
index 00000000..18276a15
--- /dev/null
+++ b/tests/array/array/testArray1dIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array1dTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array1dTestIters
+
+#include "utils/gtest1dTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray1ItersBase.cpp"
diff --git a/tests/array/array/testArray1dPtrs.cpp b/tests/array/array/testArray1dPtrs.cpp
new file mode 100644
index 00000000..be7dada8
--- /dev/null
+++ b/tests/array/array/testArray1dPtrs.cpp
@@ -0,0 +1,9 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array1dTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array1dTestPtrs
+
+#include "utils/gtest1dTest.h"
+
+#include "testArrayPtrsBase.cpp"
diff --git a/tests/array/array/testArray1dRefs.cpp b/tests/array/array/testArray1dRefs.cpp
new file mode 100644
index 00000000..6206dbb5
--- /dev/null
+++ b/tests/array/array/testArray1dRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1dTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array1dTestRefs
+
+#include "utils/gtest1dTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray1RefsBase.cpp"
diff --git a/tests/array/array/testArray1dViewIters.cpp b/tests/array/array/testArray1dViewIters.cpp
new file mode 100644
index 00000000..b00a9bce
--- /dev/null
+++ b/tests/array/array/testArray1dViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array1dTestViewIters
+
+#include "utils/gtest1dTest.h"
+
+#define ZFP_ARRAY_TYPE array1d
+#define SCALAR double
+#define DIMS 1
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray1dViewPtrs.cpp b/tests/array/array/testArray1dViewPtrs.cpp
new file mode 100644
index 00000000..df2b35ca
--- /dev/null
+++ b/tests/array/array/testArray1dViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array1dTestViewPtrs
+
+#include "utils/gtest1dTest.h"
+
+#define ZFP_ARRAY_TYPE array1d
+#define SCALAR double
+#define DIMS 1
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray1dViews.cpp b/tests/array/array/testArray1dViews.cpp
new file mode 100644
index 00000000..3741a121
--- /dev/null
+++ b/tests/array/array/testArray1dViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array1dTestViews
+
+#include "utils/gtest1dTest.h"
+
+#define ZFP_ARRAY_TYPE array1d
+#define SCALAR double
+#define DIMS 1
+
+#include "testArrayViewsBase.cpp"
+#include "testArray1ViewsBase.cpp"
diff --git a/tests/array/array/testArray1f.cpp b/tests/array/array/testArray1f.cpp
new file mode 100644
index 00000000..9176090c
--- /dev/null
+++ b/tests/array/array/testArray1f.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array1.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array1fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 1; }
+};
+
+Array1fTestEnv* const testEnv = new Array1fTestEnv;
+
+class Array1fTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array1fTest
+
+#define ZFP_ARRAY_TYPE array1f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array1d
+#define ZFP_ARRAY_TYPE_WRONG_DIM array2f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array2d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array3f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 1
+
+#include "testArrayBase.cpp"
+#include "testArray1Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray1fIters.cpp b/tests/array/array/testArray1fIters.cpp
new file mode 100644
index 00000000..7a8653bc
--- /dev/null
+++ b/tests/array/array/testArray1fIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array1fTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array1fTestIters
+
+#include "utils/gtest1fTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray1ItersBase.cpp"
diff --git a/tests/array/array/testArray1fPtrs.cpp b/tests/array/array/testArray1fPtrs.cpp
new file mode 100644
index 00000000..cea33849
--- /dev/null
+++ b/tests/array/array/testArray1fPtrs.cpp
@@ -0,0 +1,9 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array1fTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array1fTestPtrs
+
+#include "utils/gtest1fTest.h"
+
+#include "testArrayPtrsBase.cpp"
diff --git a/tests/array/array/testArray1fRefs.cpp b/tests/array/array/testArray1fRefs.cpp
new file mode 100644
index 00000000..26f2f6ed
--- /dev/null
+++ b/tests/array/array/testArray1fRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1fTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array1fTestRefs
+
+#include "utils/gtest1fTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray1RefsBase.cpp"
diff --git a/tests/array/array/testArray1fViewIters.cpp b/tests/array/array/testArray1fViewIters.cpp
new file mode 100644
index 00000000..6a7aee3a
--- /dev/null
+++ b/tests/array/array/testArray1fViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array1fTestViewIters
+
+#include "utils/gtest1fTest.h"
+
+#define ZFP_ARRAY_TYPE array1f
+#define SCALAR float
+#define DIMS 1
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray1fViewPtrs.cpp b/tests/array/array/testArray1fViewPtrs.cpp
new file mode 100644
index 00000000..60638ce7
--- /dev/null
+++ b/tests/array/array/testArray1fViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array1fTestViewPtrs
+
+#include "utils/gtest1fTest.h"
+
+#define ZFP_ARRAY_TYPE array1f
+#define SCALAR float
+#define DIMS 1
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray1fViews.cpp b/tests/array/array/testArray1fViews.cpp
new file mode 100644
index 00000000..2bad06c9
--- /dev/null
+++ b/tests/array/array/testArray1fViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array1fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array1fTestViews
+
+#include "utils/gtest1fTest.h"
+
+#define ZFP_ARRAY_TYPE array1f
+#define SCALAR float
+#define DIMS 1
+
+#include "testArrayViewsBase.cpp"
+#include "testArray1ViewsBase.cpp"
diff --git a/tests/array/array/testArray2Base.cpp b/tests/array/array/testArray2Base.cpp
new file mode 100644
index 00000000..89ff5ce4
--- /dev/null
+++ b/tests/array/array/testArray2Base.cpp
@@ -0,0 +1,422 @@
+/* TODO: figure out templated tests (TYPED_TEST) */
+
+/* const_view */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::const_view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY), arr2(0, 0));
+}
+
+/* view */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY), arr2(0, 0));
+}
+
+/* flat_view */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromFlatView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::flat_view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromFlatView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromFlatView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY), arr2(0, 0));
+}
+
+/* nested_view */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY), arr2(0, 0));
+}
+
+/* nested_view1 (unique) */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[0];
+
+  array1<SCALAR> arr2(v2);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[0];
+
+  array1<SCALAR> arr2(v2);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  size_t y = 1;
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[y];
+
+  array1<SCALAR> arr2(v2);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i, offsetY + y), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + y) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY + y), arr2(0));
+}
+
+
+/* private_const_view */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromPrivateConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromPrivateConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromPrivateConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY), arr2(0, 0));
+}
+
+/* private_view */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromPrivateView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_view v(&arr, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromPrivateView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromPrivateView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY), arr2(0, 0));
+}
+
diff --git a/tests/array/array/testArray2ItersBase.cpp b/tests/array/array/testArray2ItersBase.cpp
new file mode 100644
index 00000000..bace3180
--- /dev/null
+++ b/tests/array/array/testArray2ItersBase.cpp
@@ -0,0 +1,73 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_partialBlocks_when_incrementIterator_then_positionTraversesCorrectly)
+{
+  // force partial block traversal
+  EXPECT_NE(0u, arr.size_x() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_y() % BLOCK_SIDE_LEN);
+
+  size_t totalBlocksX = (arr.size_x() + 3) / 4;
+  size_t totalBlocksY = (arr.size_y() + 3) / 4;
+  size_t totalBlocks = totalBlocksX * totalBlocksY;
+
+  iter = arr.begin();
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size_x() - iter.i();
+    size_t blockLenX = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_y() - iter.j();
+    size_t blockLenY = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndexI = iter.i();
+    size_t blockStartIndexJ = iter.j();
+
+    for (size_t j = 0; j < blockLenY; j++) {
+      for (size_t i = 0; i < blockLenX; i++) {
+        EXPECT_EQ(blockStartIndexI + i, iter.i());
+        EXPECT_EQ(blockStartIndexJ + j, iter.j());
+        iter++;
+      }
+    }
+  }
+
+//  EXPECT_EQ(arr.end(), iter); // triggers googletest issue #742
+  EXPECT_TRUE(arr.end() == iter);
+}
+
+// const iterators
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_partialBlocks_when_incrementConstIterator_then_positionTraversesCorrectly)
+{
+  // force partial block traversal
+  EXPECT_NE(0u, arr.size_x() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_y() % BLOCK_SIDE_LEN);
+
+  size_t totalBlocksX = (arr.size_x() + 3) / 4;
+  size_t totalBlocksY = (arr.size_y() + 3) / 4;
+  size_t totalBlocks = totalBlocksX * totalBlocksY;
+
+  citer = arr.cbegin();
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size_x() - citer.i();
+    size_t blockLenX = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_y() - citer.j();
+    size_t blockLenY = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndexI = citer.i();
+    size_t blockStartIndexJ = citer.j();
+
+    for (size_t j = 0; j < blockLenY; j++) {
+      for (size_t i = 0; i < blockLenX; i++) {
+        EXPECT_EQ(blockStartIndexI + i, citer.i());
+        EXPECT_EQ(blockStartIndexJ + j, citer.j());
+        citer++;
+      }
+    }
+  }
+
+//  EXPECT_EQ(arr.cend(), citer); // triggers googletest issue #742
+  EXPECT_TRUE(arr.cend() == citer);
+}
diff --git a/tests/array/array/testArray2PtrsBase.cpp b/tests/array/array/testArray2PtrsBase.cpp
new file mode 100644
index 00000000..4a76b00b
--- /dev/null
+++ b/tests/array/array/testArray2PtrsBase.cpp
@@ -0,0 +1,23 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXBoundary_when_increment_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = arr.size_x() - 1;
+  size_t j = 2;
+  arr(0, j+1) = VAL;
+
+  ptr = &arr(i, j);
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXBoundary_when_decrement_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = 0;
+  size_t j = 2;
+
+  size_t iNext = arr.size_x() - 1;
+  arr(iNext, j-1) = VAL;
+
+  ptr = &arr(i, j);
+
+  EXPECT_EQ(VAL, *--ptr);
+}
diff --git a/tests/array/array/testArray2RefsBase.cpp b/tests/array/array/testArray2RefsBase.cpp
new file mode 100644
index 00000000..d09ddde0
--- /dev/null
+++ b/tests/array/array/testArray2RefsBase.cpp
@@ -0,0 +1,55 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_resize_then_sizeChanges)
+{
+  EXPECT_EQ(ARRAY_SIZE_X, arr.size_x());
+  EXPECT_EQ(ARRAY_SIZE_Y, arr.size_y());
+  EXPECT_EQ(ARRAY_SIZE_X * ARRAY_SIZE_Y, arr.size());
+
+  size_t newLenX = ARRAY_SIZE_X + 1;
+  size_t newLenY = ARRAY_SIZE_Y - 2;
+  arr.resize(newLenX, newLenY);
+
+  EXPECT_EQ(newLenX, arr.size_x());
+  EXPECT_EQ(newLenY, arr.size_y());
+  EXPECT_EQ(newLenX * newLenY, arr.size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_getIndexWithParentheses_then_refReturned)
+{
+  size_t i = 1, j = 1;
+  arr(i, j) = VAL;
+
+  EXPECT_EQ(VAL, arr(i, j));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_indexWithBracketsAlongsideParentheses_then_indexedProperly)
+{
+  size_t i = 1, j = 1;
+  size_t absIndex = j * arr.size_x() + i;
+
+  arr[absIndex] = VAL;
+  EXPECT_EQ(VAL, arr(i, j));
+
+  arr(i, j) /= VAL;
+  EXPECT_EQ(1, arr[absIndex]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithBrackets_then_valReturned)
+{
+  size_t i = 1;
+  arr[i] = VAL;
+
+  const array2<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst[i]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithParentheses_then_valReturned)
+{
+  size_t i = 1, j = 1;
+  size_t absIndex = j * arr.size_x() + i;
+  arr[absIndex] = VAL;
+
+  const array2<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst(i, j));
+}
diff --git a/tests/array/array/testArray2ViewsBase.cpp b/tests/array/array/testArray2ViewsBase.cpp
new file mode 100644
index 00000000..2cd2230e
--- /dev/null
+++ b/tests/array/array/testArray2ViewsBase.cpp
@@ -0,0 +1,398 @@
+/* preview */
+
+/* this also tests const_view */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_previewFullConstructor2D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX * viewLenY, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+}
+
+/* const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_sizeXY_then_viewXYLenReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_accessorParens_then_correctEntriesReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      size_t offset = (offsetY + j) * arr.size_x() + offsetX + i;
+      EXPECT_EQ(arr[offset], v(i, j));
+    }
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_constViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, 1, 1, 1, 1);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+
+  SCALAR oldVal = arr(aIX, aIY);
+  EXPECT_EQ(oldVal, v(vIX, vIY));
+
+  arr(aIX, aIY) += 1;
+  SCALAR newVal = arr(aIX, aIY);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY));
+}
+
+/* view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX * viewLenY, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, 1, 1, 1, 1);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+
+  SCALAR oldVal = arr(aIX, aIY);
+  EXPECT_EQ(oldVal, v(vIX, vIY));
+
+  arr(aIX, aIY) += 1;
+  SCALAR newVal = arr(aIX, aIY);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_view_when_setEntryWithParens_then_originalArrayUpdated)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  size_t i = 1, j = 2;
+  SCALAR val = 3.14;
+
+  EXPECT_NE(val, arr(offsetX + i, offsetY + j));
+  v(i, j) = val;
+
+  EXPECT_EQ(arr(offsetX + i, offsetY + j), v(i, j));
+}
+
+/* flat_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX * viewLenY, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, 1, 1, 1, 1);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+
+  SCALAR oldVal = arr(aIX, aIY);
+  EXPECT_EQ(oldVal, v(vIX, vIY));
+
+  arr(aIX, aIY) += 1;
+  SCALAR newVal = arr(aIX, aIY);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_index_then_returnsFlatIndex)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  size_t i = 2, j = 1;
+  EXPECT_EQ(j*viewLenX + i, v.index(i, j));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_ij_then_returnsUnflatIndices)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  size_t i = 2, j = 1;
+  size_t flatIndex = v.index(i, j);
+
+  size_t vI, vJ;
+  v.ij(vI, vJ, flatIndex);
+  EXPECT_EQ(i, vI);
+  EXPECT_EQ(j, vJ);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_bracketAccessor_then_returnsValAtFlattenedIndex)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  size_t i = 2, j = 1;
+  size_t arrOffset = (offsetY + j)*arr.size_x() + (offsetX + i);
+  EXPECT_EQ(arr[arrOffset], v[v.index(i, j)]);
+}
+
+/* nested_view */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_nestedView2FullConstructor2D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX * viewLenY, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_parensAccessor_then_returnsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+
+  arr(aI, aJ) = 5.5;
+  EXPECT_EQ(arr(aI, aJ), v(vI, vJ));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_parensMutator_then_setsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+
+  SCALAR val = 5.5;
+  v(vI, vJ) = val;
+  EXPECT_EQ(val, arr(aI, aJ));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_bracketIndex_then_returnsSliceFromView)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* test slice length */
+  EXPECT_EQ(viewLenX, v[0].size_x());
+}
+
+/* nested_view1 */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_bracketAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t aJ = offsetY + vJ;
+
+  /* initialize values into row that will become slice */
+  for (size_t aI = 0; aI < arr.size_x(); aI++) {
+    arr(aI, aJ) = (SCALAR)aI;
+  }
+
+  EXPECT_EQ(viewLenX, v[vJ].size_x());
+  for (size_t vI = 0; vI < viewLenX; vI++) {
+    EXPECT_EQ(arr(offsetX + vI, aJ), v[vJ][vI]);
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_parensAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t aJ = offsetY + vJ;
+
+  /* initialize values into row that will become slice */
+  for (size_t aI = 0; aI < arr.size_x(); aI++) {
+    arr(aI, aJ) = (SCALAR)aI;
+  }
+
+  EXPECT_EQ(viewLenX, v[vJ].size_x());
+  for (size_t vI = 0; vI < viewLenX; vI++) {
+    EXPECT_EQ(arr(offsetX + vI, aJ), v[vJ](vI));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_bracketMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t aJ = offsetY + vJ;
+
+  /* initialize values into slice */
+  for (size_t vI = 0; vI < v[vJ].size_x(); vI++) {
+    v[vJ][vI] = (SCALAR)vI;
+  }
+
+  for (size_t vI = 0; vI < v[vJ].size_x(); vI++) {
+    EXPECT_EQ(v[vJ][vI], arr(offsetX + vI, aJ));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_parensMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t aJ = offsetY + vJ;
+
+  /* initialize values into slice */
+  for (size_t vI = 0; vI < v[vJ].size_x(); vI++) {
+    v[vJ](vI) = (SCALAR)vI;
+  }
+
+  for (size_t vI = 0; vI < v[vJ].size_x(); vI++) {
+    EXPECT_EQ(v[vJ][vI], arr(offsetX + vI, aJ));
+  }
+}
+
+/* private_const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX * viewLenY, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateConstView_when_sizeXY_then_viewLenReturned)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+}
+
+/* private_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+
+  EXPECT_EQ(viewLenX * viewLenY, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateView_when_partitionWithLimitOnCount_then_setsUniqueBlockBoundsAlongLongerDimension)
+{
+  const size_t count = 3;
+  size_t prevOffsetX, prevLenX, offsetX, lenX;
+
+  /* partition such that each gets at least 1 block */
+  const size_t blockSideLen = 4;
+  size_t arrBlockCountX = (arr.size_x() + (blockSideLen - 1)) / blockSideLen;
+  size_t arrBlockCountY = (arr.size_y() + (blockSideLen - 1)) / blockSideLen;
+  /* ensure partition will happen along X */
+  EXPECT_GT(arrBlockCountX, arrBlockCountY);
+  EXPECT_LE(count, arrBlockCountX);
+
+  /* construct view */
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+  size_t offsetY = v.global_y(0);
+  size_t lenY = v.size_y();
+
+  /* base case */
+  v.partition(0, count);
+
+  /* along X, expect to start at first index, zero */
+  prevOffsetX = v.global_x(0);
+  EXPECT_EQ(0, prevOffsetX);
+  /* expect to have at least 1 block */
+  prevLenX = v.size_x();
+  EXPECT_LE(blockSideLen, prevLenX);
+
+  /* along Y, expect no changes */
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(lenY, v.size_y());
+
+  /* successive cases are compared to previous */
+  for (size_t i = 1; i < count - 1; i++) {
+    ZFP_ARRAY_TYPE::private_view v2(&arr);
+    v2.partition(i, count);
+
+    /* along X, expect blocks continue where previous left off */
+    offsetX = v2.global_x(0);
+    EXPECT_EQ(prevOffsetX + prevLenX, offsetX);
+    /* expect to have at least 1 block */
+    lenX = v2.size_x();
+    EXPECT_LE(blockSideLen, lenX);
+
+    /* along Y, expect no changes */
+    EXPECT_EQ(offsetY, v2.global_y(0));
+    EXPECT_EQ(lenY, v2.size_y());
+
+    prevOffsetX = offsetX;
+    prevLenX = lenX;
+  }
+
+  /* last partition case */
+  ZFP_ARRAY_TYPE::private_view v3(&arr);
+  v3.partition(count - 1, count);
+
+  /* along X, expect blocks continue where previous left off */
+  offsetX = v3.global_x(0);
+  EXPECT_EQ(prevOffsetX + prevLenX, offsetX);
+  /* last partition could hold a partial block */
+  lenX = v3.size_x();
+  EXPECT_LT(0u, lenX);
+  /* expect to end on final index */
+  EXPECT_EQ(arr.size_x(), offsetX + lenX);
+
+  /* along Y, expect no changes */
+  EXPECT_EQ(offsetY, v3.global_y(0));
+  EXPECT_EQ(lenY, v3.size_y());
+}
diff --git a/tests/array/array/testArray2d.cpp b/tests/array/array/testArray2d.cpp
new file mode 100644
index 00000000..2c1fa1c0
--- /dev/null
+++ b/tests/array/array/testArray2d.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array2dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 2; }
+};
+
+Array2dTestEnv* const testEnv = new Array2dTestEnv;
+
+class Array2dTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array2dTest
+
+#define ZFP_ARRAY_TYPE array2d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array2f
+#define ZFP_ARRAY_TYPE_WRONG_DIM array3d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array3f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array1d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 2
+
+#include "testArrayBase.cpp"
+#include "testArray2Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray2dIters.cpp b/tests/array/array/testArray2dIters.cpp
new file mode 100644
index 00000000..66bb1c0e
--- /dev/null
+++ b/tests/array/array/testArray2dIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array2dTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array2dTestIters
+
+#include "utils/gtest2dTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray2ItersBase.cpp"
diff --git a/tests/array/array/testArray2dPtrs.cpp b/tests/array/array/testArray2dPtrs.cpp
new file mode 100644
index 00000000..0f021678
--- /dev/null
+++ b/tests/array/array/testArray2dPtrs.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array2dTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array2dTestPtrs
+
+#include "utils/gtest2dTest.h"
+
+#include "testArrayPtrsBase.cpp"
+#include "testArray2PtrsBase.cpp"
diff --git a/tests/array/array/testArray2dRefs.cpp b/tests/array/array/testArray2dRefs.cpp
new file mode 100644
index 00000000..ff85149c
--- /dev/null
+++ b/tests/array/array/testArray2dRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2dTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array2dTestRefs
+
+#include "utils/gtest2dTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray2RefsBase.cpp"
diff --git a/tests/array/array/testArray2dViewIters.cpp b/tests/array/array/testArray2dViewIters.cpp
new file mode 100644
index 00000000..9e1ca9ee
--- /dev/null
+++ b/tests/array/array/testArray2dViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array2dTestViewIters
+
+#include "utils/gtest2dTest.h"
+
+#define ZFP_ARRAY_TYPE array2d
+#define SCALAR double
+#define DIMS 2
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray2dViewPtrs.cpp b/tests/array/array/testArray2dViewPtrs.cpp
new file mode 100644
index 00000000..008e9eb3
--- /dev/null
+++ b/tests/array/array/testArray2dViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array2dTestViewPtrs
+
+#include "utils/gtest2dTest.h"
+
+#define ZFP_ARRAY_TYPE array2d
+#define SCALAR double
+#define DIMS 2
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray2dViews.cpp b/tests/array/array/testArray2dViews.cpp
new file mode 100644
index 00000000..ced06cf7
--- /dev/null
+++ b/tests/array/array/testArray2dViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array2dTestViews
+
+#include "utils/gtest2dTest.h"
+
+#define ZFP_ARRAY_TYPE array2d
+#define SCALAR double
+#define DIMS 2
+
+#include "testArrayViewsBase.cpp"
+#include "testArray2ViewsBase.cpp"
diff --git a/tests/array/array/testArray2f.cpp b/tests/array/array/testArray2f.cpp
new file mode 100644
index 00000000..6acb95c6
--- /dev/null
+++ b/tests/array/array/testArray2f.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array2fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 2; }
+};
+
+Array2fTestEnv* const testEnv = new Array2fTestEnv;
+
+class Array2fTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array2fTest
+
+#define ZFP_ARRAY_TYPE array2f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array2d
+#define ZFP_ARRAY_TYPE_WRONG_DIM array3f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array3d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array1f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 2
+
+#include "testArrayBase.cpp"
+#include "testArray2Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray2fIters.cpp b/tests/array/array/testArray2fIters.cpp
new file mode 100644
index 00000000..dc5b42f3
--- /dev/null
+++ b/tests/array/array/testArray2fIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array2fTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array2fTestIters
+
+#include "utils/gtest2fTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray2ItersBase.cpp"
diff --git a/tests/array/array/testArray2fPtrs.cpp b/tests/array/array/testArray2fPtrs.cpp
new file mode 100644
index 00000000..bdf956cd
--- /dev/null
+++ b/tests/array/array/testArray2fPtrs.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array2fTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array2fTestPtrs
+
+#include "utils/gtest2fTest.h"
+
+#include "testArrayPtrsBase.cpp"
+#include "testArray2PtrsBase.cpp"
diff --git a/tests/array/array/testArray2fRefs.cpp b/tests/array/array/testArray2fRefs.cpp
new file mode 100644
index 00000000..b3844abc
--- /dev/null
+++ b/tests/array/array/testArray2fRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2fTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array2fTestRefs
+
+#include "utils/gtest2fTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray2RefsBase.cpp"
diff --git a/tests/array/array/testArray2fViewIters.cpp b/tests/array/array/testArray2fViewIters.cpp
new file mode 100644
index 00000000..83e64552
--- /dev/null
+++ b/tests/array/array/testArray2fViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array2fTestViewIters
+
+#include "utils/gtest2fTest.h"
+
+#define ZFP_ARRAY_TYPE array2f
+#define SCALAR float
+#define DIMS 2
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray2fViewPtrs.cpp b/tests/array/array/testArray2fViewPtrs.cpp
new file mode 100644
index 00000000..b0e9f2d3
--- /dev/null
+++ b/tests/array/array/testArray2fViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array2fTestViewPtrs
+
+#include "utils/gtest2fTest.h"
+
+#define ZFP_ARRAY_TYPE array2f
+#define SCALAR float
+#define DIMS 2
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray2fViews.cpp b/tests/array/array/testArray2fViews.cpp
new file mode 100644
index 00000000..891eb1f8
--- /dev/null
+++ b/tests/array/array/testArray2fViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array2.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array2fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array2fTestViews
+
+#include "utils/gtest2fTest.h"
+
+#define ZFP_ARRAY_TYPE array2f
+#define SCALAR float
+#define DIMS 2
+
+#include "testArrayViewsBase.cpp"
+#include "testArray2ViewsBase.cpp"
diff --git a/tests/array/array/testArray3Base.cpp b/tests/array/array/testArray3Base.cpp
new file mode 100644
index 00000000..81215213
--- /dev/null
+++ b/tests/array/array/testArray3Base.cpp
@@ -0,0 +1,571 @@
+/* TODO: figure out templated tests (TYPED_TEST) */
+
+/* const_view */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::const_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ), arr2(0, 0, 0));
+}
+
+/* view */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ), arr2(0, 0, 0));
+}
+
+/* flat_view */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromFlatView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::flat_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromFlatView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromFlatView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ), arr2(0, 0, 0));
+}
+
+/* nested_view */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromNestedView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromNestedView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromNestedView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ), arr2(0, 0, 0));
+}
+
+/* nested_view2 (unique) */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView2_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE::nested_view2 v2 = v[0];
+
+  array2<SCALAR> arr2(v2);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView2_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE::nested_view2 v2 = v[0];
+
+  array2<SCALAR> arr2(v2);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView2_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  size_t z = 1;
+  ZFP_ARRAY_TYPE::nested_view2 v2 = v[z];
+
+  array2<SCALAR> arr2(v2);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + z), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + z) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ + z), arr2(0, 0));
+}
+
+/* nested_view1 (unique) */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[0][0];
+
+  array1<SCALAR> arr2(v2);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[0][0];
+
+  array1<SCALAR> arr2(v2);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  size_t y = 1;
+  size_t z = 0;
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[z][y];
+
+  array1<SCALAR> arr2(v2);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i, offsetY + y, offsetZ + z), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + y, offsetZ + z) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY + y, offsetZ + z), arr2(0));
+}
+
+/* private_const_view */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromPrivateConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromPrivateConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromPrivateConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ), arr2(0, 0, 0));
+}
+
+/* private_view */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromPrivateView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_view v(&arr, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromPrivateView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromPrivateView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ), arr2(0, 0, 0));
+}
+
diff --git a/tests/array/array/testArray3ItersBase.cpp b/tests/array/array/testArray3ItersBase.cpp
new file mode 100644
index 00000000..3d0ce440
--- /dev/null
+++ b/tests/array/array/testArray3ItersBase.cpp
@@ -0,0 +1,91 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_partialBlocks_when_incrementIterator_then_positionTraversesCorrectly)
+{
+  // force partial block traversal
+  EXPECT_NE(0u, arr.size_x() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_y() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_z() % BLOCK_SIDE_LEN);
+
+  size_t totalBlocksX = (arr.size_x() + 3) / 4;
+  size_t totalBlocksY = (arr.size_y() + 3) / 4;
+  size_t totalBlocksZ = (arr.size_z() + 3) / 4;
+  size_t totalBlocks = totalBlocksX * totalBlocksY * totalBlocksZ;
+
+  iter = arr.begin();
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size_x() - iter.i();
+    size_t blockLenX = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_y() - iter.j();
+    size_t blockLenY = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_z() - iter.k();
+    size_t blockLenZ = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndexI = iter.i();
+    size_t blockStartIndexJ = iter.j();
+    size_t blockStartIndexK = iter.k();
+
+    for (size_t k = 0; k < blockLenZ; k++) {
+      for (size_t j = 0; j < blockLenY; j++) {
+        for (size_t i = 0; i < blockLenX; i++) {
+          EXPECT_EQ(blockStartIndexI + i, iter.i());
+          EXPECT_EQ(blockStartIndexJ + j, iter.j());
+          EXPECT_EQ(blockStartIndexK + k, iter.k());
+          iter++;
+        }
+      }
+    }
+  }
+
+//  EXPECT_EQ(arr.end(), iter); // triggers googletest issue #742
+  EXPECT_TRUE(arr.end() == iter);
+}
+
+// const iterators
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_partialBlocks_when_incrementConstIterator_then_positionTraversesCorrectly)
+{
+  // force partial block traversal
+  EXPECT_NE(0u, arr.size_x() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_y() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_z() % BLOCK_SIDE_LEN);
+
+  size_t totalBlocksX = (arr.size_x() + 3) / 4;
+  size_t totalBlocksY = (arr.size_y() + 3) / 4;
+  size_t totalBlocksZ = (arr.size_z() + 3) / 4;
+  size_t totalBlocks = totalBlocksX * totalBlocksY * totalBlocksZ;
+
+  citer = arr.cbegin();
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size_x() - citer.i();
+    size_t blockLenX = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_y() - citer.j();
+    size_t blockLenY = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_z() - citer.k();
+    size_t blockLenZ = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndexI = citer.i();
+    size_t blockStartIndexJ = citer.j();
+    size_t blockStartIndexK = citer.k();
+
+    for (size_t k = 0; k < blockLenZ; k++) {
+      for (size_t j = 0; j < blockLenY; j++) {
+        for (size_t i = 0; i < blockLenX; i++) {
+          EXPECT_EQ(blockStartIndexI + i, citer.i());
+          EXPECT_EQ(blockStartIndexJ + j, citer.j());
+          EXPECT_EQ(blockStartIndexK + k, citer.k());
+          citer++;
+        }
+      }
+    }
+  }
+
+//  EXPECT_EQ(arr.cend(), citer); // triggers googletest issue #742
+  EXPECT_TRUE(arr.cend() == citer);
+}
diff --git a/tests/array/array/testArray3PtrsBase.cpp b/tests/array/array/testArray3PtrsBase.cpp
new file mode 100644
index 00000000..86449548
--- /dev/null
+++ b/tests/array/array/testArray3PtrsBase.cpp
@@ -0,0 +1,52 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXBoundary_when_increment_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = arr.size_x() - 1;
+  size_t j = 2;
+  size_t k = 4;
+  arr(0, j+1, k) = VAL;
+
+  ptr = &arr(i, j, k);
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXBoundary_when_decrement_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = 0;
+  size_t j = 2;
+  size_t k = 3;
+
+  size_t iNext = arr.size_x() - 1;
+  arr(iNext, j-1, k) = VAL;
+
+  ptr = &arr(i, j, k);
+
+  EXPECT_EQ(VAL, *--ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXYBoundary_when_increment_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = arr.size_x() - 1;
+  size_t j = arr.size_y() - 1;
+  size_t k = 4;
+  arr(0, 0, k+1) = VAL;
+
+  ptr = &arr(i, j, k);
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXYBoundary_when_decrement_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = 0;
+  size_t j = 0;
+  size_t k = 3;
+
+  size_t iNext = arr.size_x() - 1;
+  size_t jNext = arr.size_y() - 1;
+  arr(iNext, jNext, k-1) = VAL;
+
+  ptr = &arr(i, j, k);
+
+  EXPECT_EQ(VAL, *--ptr);
+}
diff --git a/tests/array/array/testArray3RefsBase.cpp b/tests/array/array/testArray3RefsBase.cpp
new file mode 100644
index 00000000..af8f0608
--- /dev/null
+++ b/tests/array/array/testArray3RefsBase.cpp
@@ -0,0 +1,58 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_resize_then_sizeChanges)
+{
+  EXPECT_EQ(ARRAY_SIZE_X, arr.size_x());
+  EXPECT_EQ(ARRAY_SIZE_Y, arr.size_y());
+  EXPECT_EQ(ARRAY_SIZE_Z, arr.size_z());
+  EXPECT_EQ(ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z, arr.size());
+
+  size_t newLenX = ARRAY_SIZE_X + 1;
+  size_t newLenY = ARRAY_SIZE_Y - 2;
+  size_t newLenZ = ARRAY_SIZE_Z + 5;
+  arr.resize(newLenX, newLenY, newLenZ);
+
+  EXPECT_EQ(newLenX, arr.size_x());
+  EXPECT_EQ(newLenY, arr.size_y());
+  EXPECT_EQ(newLenZ, arr.size_z());
+  EXPECT_EQ(newLenX * newLenY * newLenZ, arr.size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_getIndexWithParentheses_then_refReturned)
+{
+  size_t i = 1, j = 1, k = 1;
+  arr(i, j, k) = VAL;
+
+  EXPECT_EQ(VAL, arr(i, j, k));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_indexWithBracketsAlongsideParentheses_then_indexedProperly)
+{
+  size_t i = 1, j = 1, k = 1;
+  size_t absIndex = k * arr.size_x() * arr.size_y() + j * arr.size_x() + i;
+
+  arr[absIndex] = VAL;
+  EXPECT_EQ(VAL, arr(i, j, k));
+
+  arr(i, j, k) /= VAL;
+  EXPECT_EQ(1, arr[absIndex]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithBrackets_then_valReturned)
+{
+  size_t i = 1;
+  arr[i] = VAL;
+
+  const array3<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst[i]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithParentheses_then_valReturned)
+{
+  size_t i = 1, j = 1, k = 1;
+  size_t absIndex = k * arr.size_x() * arr.size_y() + j * arr.size_x() + i;
+  arr[absIndex] = VAL;
+
+  const array3<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst(i, j, k));
+}
diff --git a/tests/array/array/testArray3ViewsBase.cpp b/tests/array/array/testArray3ViewsBase.cpp
new file mode 100644
index 00000000..d927f8f9
--- /dev/null
+++ b/tests/array/array/testArray3ViewsBase.cpp
@@ -0,0 +1,489 @@
+/* preview */
+
+/* this also tests const_view */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_previewFullConstructor3D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+}
+
+/* const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_sizeXYZ_then_viewXYZLenReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_accessorParens_then_correctEntriesReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        size_t offset = (offsetZ + k)*arr.size_x()*arr.size_y() + (offsetY + j)*arr.size_x() + offsetX + i;
+        EXPECT_EQ(arr[offset], v(i, j, k));
+      }
+    }
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_constViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+  size_t vIZ = 1;
+  size_t aIZ = v.global_z(vIZ);
+
+  SCALAR oldVal = arr(aIX, aIY, aIZ);
+  EXPECT_EQ(oldVal, v(vIX, vIY, vIZ));
+
+  arr(aIX, aIY, aIZ) += 1;
+  SCALAR newVal = arr(aIX, aIY, aIZ);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY, vIZ));
+}
+
+/* view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+  size_t vIZ = 1;
+  size_t aIZ = v.global_z(vIZ);
+
+  SCALAR oldVal = arr(aIX, aIY, aIZ);
+  EXPECT_EQ(oldVal, v(vIX, vIY, vIZ));
+
+  arr(aIX, aIY, aIZ) += 1;
+  SCALAR newVal = arr(aIX, aIY, aIZ);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY, vIZ));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_view_when_setEntryWithParens_then_originalArrayUpdated)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  size_t i = 1, j = 2, k = 1;
+  SCALAR val = 3.14;
+
+  EXPECT_NE(val, arr(offsetX + i, offsetY + j, offsetZ + k));
+  v(i, j, k) = val;
+
+  EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k), v(i, j, k));
+}
+
+/* flat_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, 1, 1, 1, 1, 1, 1);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+  size_t vIZ = 1;
+  size_t aIZ = v.global_z(vIZ);
+
+  SCALAR oldVal = arr(aIX, aIY, aIZ);
+  EXPECT_EQ(oldVal, v(vIX, vIY, vIZ));
+
+  arr(aIX, aIY, aIZ) += 1;
+  SCALAR newVal = arr(aIX, aIY, aIZ);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY, vIZ));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_index_then_returnsFlatIndex)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  size_t i = 2, j = 1, k = 2;
+  EXPECT_EQ(k*viewLenX*viewLenY + j*viewLenX + i, v.index(i, j, k));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_ijk_then_returnsUnflatIndices)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  size_t i = 2, j = 1, k = 2;
+  size_t flatIndex = v.index(i, j, k);
+
+  size_t vI, vJ, vK;
+  v.ijk(vI, vJ, vK, flatIndex);
+  EXPECT_EQ(i, vI);
+  EXPECT_EQ(j, vJ);
+  EXPECT_EQ(k, vK);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_bracketAccessor_then_returnsValAtFlattenedIndex)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  size_t i = 2, j = 1, k = 2;
+  size_t arrOffset = (offsetZ + k)*arr.size_x()*arr.size_y() + (offsetY + j)*arr.size_x() + (offsetX + i);
+  EXPECT_EQ(arr[arrOffset], v[v.index(i, j, k)]);
+}
+
+/* nested_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_nestedViewFullConstructor3D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_parensAccessor_then_returnsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  arr(aI, aJ, aK) = 5.5;
+  EXPECT_EQ(arr(aI, aJ, aK), v(vI, vJ, vK));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_parensMutator_then_setsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  SCALAR val = 5.5;
+  v(vI, vJ, vK) = val;
+  EXPECT_EQ(val, arr(aI, aJ, aK));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_bracketIndex_then_returnsSliceFromView)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* test slice length */
+  EXPECT_EQ(viewLenX, v[0].size_x());
+  EXPECT_EQ(viewLenY, v[0].size_y());
+}
+
+/* nested_view2 */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_parensAccessor_then_returnsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  arr(aI, aJ, aK) = 5.5;
+  EXPECT_EQ(arr(aI, aJ, aK), v[vK](vI, vJ));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_parensMutator_then_setsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  SCALAR val = 5.5;
+  v[vK](vI, vJ) = val;
+  EXPECT_EQ(val, arr(aI, aJ, aK));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_bracketIndex_then_returnsSliceFromView)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* test slice length */
+  EXPECT_EQ(viewLenX, v[0][0].size_x());
+}
+
+/* nested_view1 */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_bracketAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  /* initialize values into row that will become slice */
+  for (size_t aI = 0; aI < arr.size_x(); aI++) {
+    arr(aI, aJ, aK) = (SCALAR)aI;
+  }
+
+  EXPECT_EQ(viewLenX, v[vK][vJ].size_x());
+  for (size_t vI = 0; vI < viewLenX; vI++) {
+    EXPECT_EQ(arr(offsetX + vI, aJ, aK), v[vK][vJ][vI]);
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_parensAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  /* initialize values into row that will become slice */
+  for (size_t aI = 0; aI < arr.size_x(); aI++) {
+    arr(aI, aJ, aK) = (SCALAR)aI;
+  }
+
+  EXPECT_EQ(viewLenX, v[vK][vJ].size_x());
+  for (size_t vI = 0; vI < viewLenX; vI++) {
+    EXPECT_EQ(arr(offsetX + vI, aJ, aK), v[vK][vJ](vI));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_bracketMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  /* initialize values into slice */
+  for (size_t vI = 0; vI < v[vK][vJ].size_x(); vI++) {
+    v[vK][vJ][vI] = (SCALAR)vI;
+  }
+
+  for (size_t vI = 0; vI < v[vK][vJ].size_x(); vI++) {
+    EXPECT_EQ(v[vK][vJ][vI], arr(offsetX + vI, aJ, aK));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_parensMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+
+  /* initialize values into slice */
+  for (size_t vI = 0; vI < v[vK][vJ].size_x(); vI++) {
+    v[vK][vJ](vI) = (SCALAR)vI;
+  }
+
+  for (size_t vI = 0; vI < v[vK][vJ].size_x(); vI++) {
+    EXPECT_EQ(v[vK][vJ][vI], arr(offsetX + vI, aJ, aK));
+  }
+}
+
+/* private_const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateConstView_when_sizeXYZ_then_viewLenReturned)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+}
+
+/* private_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateView_when_partitionWithLimitOnCount_then_setsUniqueBlockBoundsAlongLongestDimension)
+{
+  const size_t count = 3;
+  size_t prevOffsetY, prevLenY, offsetY, lenY;
+
+  /* partition such that each gets at least 1 block */
+  const size_t blockSideLen = 4;
+  size_t arrBlockCountX = (arr.size_x() + (blockSideLen - 1)) / blockSideLen;
+  size_t arrBlockCountY = (arr.size_y() + (blockSideLen - 1)) / blockSideLen;
+  size_t arrBlockCountZ = (arr.size_z() + (blockSideLen - 1)) / blockSideLen;
+  /* ensure partition will happen along Y */
+  EXPECT_GT(arrBlockCountY, std::max(arrBlockCountX, arrBlockCountZ));
+  EXPECT_LE(count, arrBlockCountY);
+
+  /* construct view */
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+
+  /* get original dimensions that should stay constant */
+  size_t offsetX = v.global_x(0);
+  size_t offsetZ = v.global_z(0);
+  size_t lenX = v.size_x();
+  size_t lenZ = v.size_z();
+
+  /* base case */
+  v.partition(0, count);
+
+  /* along Y, expect to start at first index, zero */
+  prevOffsetY = v.global_y(0);
+  EXPECT_EQ(0, prevOffsetY);
+  /* expect to have at least 1 block */
+  prevLenY = v.size_y();
+  EXPECT_LE(blockSideLen, prevLenY);
+
+  /* along X and Z, expect no changes */
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(lenX, v.size_x());
+  EXPECT_EQ(lenZ, v.size_z());
+
+  /* successive cases are compared to previous */
+  for (size_t i = 1; i < count - 1; i++) {
+    ZFP_ARRAY_TYPE::private_view v2(&arr);
+    v2.partition(i, count);
+
+    /* along Y, expect blocks continue where previous left off */
+    offsetY = v2.global_y(0);
+    EXPECT_EQ(prevOffsetY + prevLenY, offsetY);
+    /* expect to have at least 1 block */
+    lenY = v2.size_y();
+    EXPECT_LE(blockSideLen, lenY);
+
+    /* along X and Z, expect no changes */
+    EXPECT_EQ(offsetX, v2.global_x(0));
+    EXPECT_EQ(offsetZ, v2.global_z(0));
+    EXPECT_EQ(lenX, v2.size_x());
+    EXPECT_EQ(lenZ, v2.size_z());
+
+    prevOffsetY = offsetY;
+    prevLenY = lenY;
+  }
+
+  /* last partition case */
+  ZFP_ARRAY_TYPE::private_view v3(&arr);
+  v3.partition(count - 1, count);
+
+  /* along Y, expect blocks continue where previous left off */
+  offsetY = v3.global_y(0);
+  EXPECT_EQ(prevOffsetY + prevLenY, offsetY);
+  /* last partition could hold a partial block */
+  lenY = v3.size_y();
+  EXPECT_LT(0u, lenY);
+  /* expect to end on final index */
+  EXPECT_EQ(arr.size_y(), offsetY + lenY);
+
+  /* along X and Z, expect no changes */
+  EXPECT_EQ(offsetX, v3.global_x(0));
+  EXPECT_EQ(offsetZ, v3.global_z(0));
+  EXPECT_EQ(lenX, v3.size_x());
+  EXPECT_EQ(lenZ, v3.size_z());
+}
diff --git a/tests/array/array/testArray3d.cpp b/tests/array/array/testArray3d.cpp
new file mode 100644
index 00000000..0287f4ad
--- /dev/null
+++ b/tests/array/array/testArray3d.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array3dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 3; }
+};
+
+Array3dTestEnv* const testEnv = new Array3dTestEnv;
+
+class Array3dTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array3dTest
+
+#define ZFP_ARRAY_TYPE array3d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array3f
+#define ZFP_ARRAY_TYPE_WRONG_DIM array4d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array4f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array2d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 3
+
+#include "testArrayBase.cpp"
+#include "testArray3Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray3dIters.cpp b/tests/array/array/testArray3dIters.cpp
new file mode 100644
index 00000000..3c096c08
--- /dev/null
+++ b/tests/array/array/testArray3dIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array3dTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array3dTestIters
+
+#include "utils/gtest3dTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray3ItersBase.cpp"
diff --git a/tests/array/array/testArray3dPtrs.cpp b/tests/array/array/testArray3dPtrs.cpp
new file mode 100644
index 00000000..ebf9ac6a
--- /dev/null
+++ b/tests/array/array/testArray3dPtrs.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array3dTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array3dTestPtrs
+
+#include "utils/gtest3dTest.h"
+
+#include "testArrayPtrsBase.cpp"
+#include "testArray3PtrsBase.cpp"
diff --git a/tests/array/array/testArray3dRefs.cpp b/tests/array/array/testArray3dRefs.cpp
new file mode 100644
index 00000000..9e47931b
--- /dev/null
+++ b/tests/array/array/testArray3dRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3dTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array3dTestRefs
+
+#include "utils/gtest3dTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray3RefsBase.cpp"
diff --git a/tests/array/array/testArray3dViewIters.cpp b/tests/array/array/testArray3dViewIters.cpp
new file mode 100644
index 00000000..fcdd65a9
--- /dev/null
+++ b/tests/array/array/testArray3dViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array3dTestViewIters
+
+#include "utils/gtest3dTest.h"
+
+#define ZFP_ARRAY_TYPE array3d
+#define SCALAR double
+#define DIMS 3
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray3dViewPtrs.cpp b/tests/array/array/testArray3dViewPtrs.cpp
new file mode 100644
index 00000000..5eafb769
--- /dev/null
+++ b/tests/array/array/testArray3dViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array3dTestViewPtrs
+
+#include "utils/gtest3dTest.h"
+
+#define ZFP_ARRAY_TYPE array3d
+#define SCALAR double
+#define DIMS 3
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray3dViews.cpp b/tests/array/array/testArray3dViews.cpp
new file mode 100644
index 00000000..117a49c4
--- /dev/null
+++ b/tests/array/array/testArray3dViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array3dTestViews
+
+#include "utils/gtest3dTest.h"
+
+#define ZFP_ARRAY_TYPE array3d
+#define SCALAR double
+#define DIMS 3
+
+#include "testArrayViewsBase.cpp"
+#include "testArray3ViewsBase.cpp"
diff --git a/tests/array/array/testArray3f.cpp b/tests/array/array/testArray3f.cpp
new file mode 100644
index 00000000..22bb3d45
--- /dev/null
+++ b/tests/array/array/testArray3f.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array3fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 3; }
+};
+
+Array3fTestEnv* const testEnv = new Array3fTestEnv;
+
+class Array3fTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array3fTest
+
+#define ZFP_ARRAY_TYPE array3f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array3d
+#define ZFP_ARRAY_TYPE_WRONG_DIM array4f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array4d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array2f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 3
+
+#include "testArrayBase.cpp"
+#include "testArray3Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray3fIters.cpp b/tests/array/array/testArray3fIters.cpp
new file mode 100644
index 00000000..4eaf6376
--- /dev/null
+++ b/tests/array/array/testArray3fIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array3fTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array3fTestIters
+
+#include "utils/gtest3fTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray3ItersBase.cpp"
diff --git a/tests/array/array/testArray3fPtrs.cpp b/tests/array/array/testArray3fPtrs.cpp
new file mode 100644
index 00000000..d1a7801c
--- /dev/null
+++ b/tests/array/array/testArray3fPtrs.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array3fTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array3fTestPtrs
+
+#include "utils/gtest3fTest.h"
+
+#include "testArrayPtrsBase.cpp"
+#include "testArray3PtrsBase.cpp"
diff --git a/tests/array/array/testArray3fRefs.cpp b/tests/array/array/testArray3fRefs.cpp
new file mode 100644
index 00000000..1f27c178
--- /dev/null
+++ b/tests/array/array/testArray3fRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3fTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array3fTestRefs
+
+#include "utils/gtest3fTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray3RefsBase.cpp"
diff --git a/tests/array/array/testArray3fViewIters.cpp b/tests/array/array/testArray3fViewIters.cpp
new file mode 100644
index 00000000..74a56fb3
--- /dev/null
+++ b/tests/array/array/testArray3fViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array3fTestViewIters
+
+#include "utils/gtest3fTest.h"
+
+#define ZFP_ARRAY_TYPE array3f
+#define SCALAR float
+#define DIMS 3
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray3fViewPtrs.cpp b/tests/array/array/testArray3fViewPtrs.cpp
new file mode 100644
index 00000000..fa14e681
--- /dev/null
+++ b/tests/array/array/testArray3fViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array3fTestViewPtrs
+
+#include "utils/gtest3fTest.h"
+
+#define ZFP_ARRAY_TYPE array3f
+#define SCALAR float
+#define DIMS 3
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray3fViews.cpp b/tests/array/array/testArray3fViews.cpp
new file mode 100644
index 00000000..7ddd0874
--- /dev/null
+++ b/tests/array/array/testArray3fViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array3fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array3fTestViews
+
+#include "utils/gtest3fTest.h"
+
+#define ZFP_ARRAY_TYPE array3f
+#define SCALAR float
+#define DIMS 3
+
+#include "testArrayViewsBase.cpp"
+#include "testArray3ViewsBase.cpp"
diff --git a/tests/array/array/testArray4Base.cpp b/tests/array/array/testArray4Base.cpp
new file mode 100644
index 00000000..3978d2ad
--- /dev/null
+++ b/tests/array/array/testArray4Base.cpp
@@ -0,0 +1,741 @@
+/* TODO: figure out templated tests (TYPED_TEST) */
+
+/* const_view */
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::const_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+  EXPECT_EQ(v.size_w(), arr2.size_w());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), arr2(i, j, k, l));
+        }
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW), arr2(0, 0, 0, 0));
+}
+
+/* view */
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+  EXPECT_EQ(v.size_w(), arr2.size_w());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), arr2(i, j, k, l));
+        }
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW), arr2(0, 0, 0, 0));
+}
+
+/* flat_view */
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromFlatView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::flat_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromFlatView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+  EXPECT_EQ(v.size_w(), arr2.size_w());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromFlatView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), arr2(i, j, k, l));
+        }
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW), arr2(0, 0, 0, 0));
+}
+
+/* nested_view */
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromNestedView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromNestedView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+  EXPECT_EQ(v.size_w(), arr2.size_w());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromNestedView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), arr2(i, j, k, l));
+        }
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW), arr2(0, 0, 0, 0));
+}
+
+/* nested_view3 (unique) */
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromNestedView3_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE::nested_view3 v2 = v[0];
+
+  array3<SCALAR> arr2(v2);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromNestedView3_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::nested_view3 v2 = v[0];
+
+  array3<SCALAR> arr2(v2);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+}
+
+TEST_P(TEST_FIXTURE, when_construct3dCompressedArrayFromNestedView3_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  size_t w = 1;
+  ZFP_ARRAY_TYPE::nested_view3 v2 = v[w];
+
+  array3<SCALAR> arr2(v2);
+
+  /* verify array entries */
+  for (size_t k = 0; k < viewLenZ; k++) {
+    for (size_t j = 0; j < viewLenY; j++) {
+      for (size_t i = 0; i < viewLenX; i++) {
+        EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + w), arr2(i, j, k));
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + w) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW + w), arr2(0, 0, 0));
+}
+
+/* nested_view2 (unique) */
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView2_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE::nested_view2 v2 = v[0][0];
+
+  array2<SCALAR> arr2(v2);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView2_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::nested_view2 v2 = v[0][0];
+
+  array2<SCALAR> arr2(v2);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+}
+
+TEST_P(TEST_FIXTURE, when_construct2dCompressedArrayFromNestedView2_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  size_t z = 1;
+  size_t w = 0;
+  ZFP_ARRAY_TYPE::nested_view2 v2 = v[w][z];
+
+  array2<SCALAR> arr2(v2);
+
+  /* verify array entries */
+  for (size_t j = 0; j < viewLenY; j++) {
+    for (size_t i = 0; i < viewLenX; i++) {
+      EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + z, offsetW + w), arr2(i, j));
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + z, offsetW + w) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ + z, offsetW + w), arr2(0, 0));
+}
+
+/* nested_view1 (unique) */
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::nested_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[0][0][0];
+
+  array1<SCALAR> arr2(v2);
+
+  /* rate may be increased when moving to lower dimension compressed array */
+  EXPECT_LE(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[0][0][0];
+
+  array1<SCALAR> arr2(v2);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+}
+
+TEST_P(TEST_FIXTURE, when_construct1dCompressedArrayFromNestedView1_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  size_t y = 2;
+  size_t z = 1;
+  size_t w = 0;
+  ZFP_ARRAY_TYPE::nested_view1 v2 = v[w][z][y];
+
+  array1<SCALAR> arr2(v2);
+
+  /* verify array entries */
+  for (size_t i = 0; i < viewLenX; i++) {
+    EXPECT_EQ(arr(offsetX + i, offsetY + y, offsetZ + z, offsetW + w), arr2(i));
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + y, offsetZ + z, offsetW + w) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY + y, offsetZ + z, offsetW + w), arr2(0));
+}
+
+/* private_const_view */
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromPrivateConstView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromPrivateConstView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+  EXPECT_EQ(v.size_w(), arr2.size_w());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromPrivateConstView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), arr2(i, j, k, l));
+        }
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW), arr2(0, 0, 0, 0));
+}
+
+/* private_view */
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromPrivateView_then_rateConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+  ZFP_ARRAY_TYPE::private_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(arr.rate(), arr2.rate());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromPrivateView_then_sizeConserved)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  EXPECT_EQ(v.size_x(), arr2.size_x());
+  EXPECT_EQ(v.size_y(), arr2.size_y());
+  EXPECT_EQ(v.size_z(), arr2.size_z());
+  EXPECT_EQ(v.size_w(), arr2.size_w());
+}
+
+TEST_P(TEST_FIXTURE, when_construct4dCompressedArrayFromPrivateView_then_performsDeepCopy)
+{
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+
+  size_t offsetX = 5;
+  size_t viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+  size_t offsetY = 1;
+  size_t viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+  size_t offsetZ = 0;
+  size_t viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+  size_t offsetW = 1;
+  size_t viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+
+  /* create view and construct from it */
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE arr2(v);
+
+  /* verify array entries */
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), arr2(i, j, k, l));
+        }
+      }
+    }
+  }
+
+  /* verify it's a deep copy */
+  arr(offsetX + 0, offsetY + 0, offsetZ + 0, offsetW + 0) = 999.;
+  EXPECT_NE(arr(offsetX, offsetY, offsetZ, offsetW), arr2(0, 0, 0, 0));
+}
+
diff --git a/tests/array/array/testArray4ItersBase.cpp b/tests/array/array/testArray4ItersBase.cpp
new file mode 100644
index 00000000..6b10d6ad
--- /dev/null
+++ b/tests/array/array/testArray4ItersBase.cpp
@@ -0,0 +1,109 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_partialBlocks_when_incrementIterator_then_positionTraversesCorrectly)
+{
+  // force partial block traversal
+  EXPECT_NE(0u, arr.size_x() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_y() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_z() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_w() % BLOCK_SIDE_LEN);
+
+  size_t totalBlocksX = (arr.size_x() + 3) / 4;
+  size_t totalBlocksY = (arr.size_y() + 3) / 4;
+  size_t totalBlocksZ = (arr.size_z() + 3) / 4;
+  size_t totalBlocksW = (arr.size_w() + 3) / 4;
+  size_t totalBlocks = totalBlocksX * totalBlocksY * totalBlocksZ * totalBlocksW;
+
+  iter = arr.begin();
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size_x() - iter.i();
+    size_t blockLenX = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_y() - iter.j();
+    size_t blockLenY = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_z() - iter.k();
+    size_t blockLenZ = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_w() - iter.l();
+    size_t blockLenW = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndexI = iter.i();
+    size_t blockStartIndexJ = iter.j();
+    size_t blockStartIndexK = iter.k();
+    size_t blockStartIndexL = iter.l();
+
+    for (size_t l = 0; l < blockLenW; l++) {
+      for (size_t k = 0; k < blockLenZ; k++) {
+        for (size_t j = 0; j < blockLenY; j++) {
+          for (size_t i = 0; i < blockLenX; i++) {
+            EXPECT_EQ(blockStartIndexI + i, iter.i());
+            EXPECT_EQ(blockStartIndexJ + j, iter.j());
+            EXPECT_EQ(blockStartIndexK + k, iter.k());
+            EXPECT_EQ(blockStartIndexL + l, iter.l());
+            iter++;
+          }
+        }
+      }
+    }
+  }
+
+//  EXPECT_EQ(arr.end(), iter); // triggers googletest issue #742
+  EXPECT_TRUE(arr.end() == iter);
+}
+
+// const iterators
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_partialBlocks_when_incrementConstIterator_then_positionTraversesCorrectly)
+{
+  // force partial block traversal
+  EXPECT_NE(0u, arr.size_x() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_y() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_z() % BLOCK_SIDE_LEN);
+  EXPECT_NE(0u, arr.size_w() % BLOCK_SIDE_LEN);
+
+  size_t totalBlocksX = (arr.size_x() + 3) / 4;
+  size_t totalBlocksY = (arr.size_y() + 3) / 4;
+  size_t totalBlocksZ = (arr.size_z() + 3) / 4;
+  size_t totalBlocksW = (arr.size_w() + 3) / 4;
+  size_t totalBlocks = totalBlocksX * totalBlocksY * totalBlocksZ * totalBlocksW;
+
+  citer = arr.cbegin();
+  for (size_t count = 0; count < totalBlocks; count++) {
+    // determine if block is complete or partial
+    size_t distanceFromEnd = arr.size_x() - citer.i();
+    size_t blockLenX = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_y() - citer.j();
+    size_t blockLenY = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_z() - citer.k();
+    size_t blockLenZ = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    distanceFromEnd = arr.size_w() - citer.l();
+    size_t blockLenW = distanceFromEnd < BLOCK_SIDE_LEN ? distanceFromEnd : BLOCK_SIDE_LEN;
+
+    // ensure entries lie in same block
+    size_t blockStartIndexI = citer.i();
+    size_t blockStartIndexJ = citer.j();
+    size_t blockStartIndexK = citer.k();
+    size_t blockStartIndexL = citer.l();
+
+    for (size_t l = 0; l < blockLenW; l++) {
+      for (size_t k = 0; k < blockLenZ; k++) {
+        for (size_t j = 0; j < blockLenY; j++) {
+          for (size_t i = 0; i < blockLenX; i++) {
+            EXPECT_EQ(blockStartIndexI + i, citer.i());
+            EXPECT_EQ(blockStartIndexJ + j, citer.j());
+            EXPECT_EQ(blockStartIndexK + k, citer.k());
+            EXPECT_EQ(blockStartIndexL + l, citer.l());
+            citer++;
+          }
+        }
+      }
+    }
+  }
+
+//  EXPECT_EQ(arr.cend(), citer); // triggers googletest issue #742
+  EXPECT_TRUE(arr.cend() == citer);
+}
diff --git a/tests/array/array/testArray4PtrsBase.cpp b/tests/array/array/testArray4PtrsBase.cpp
new file mode 100644
index 00000000..357f1320
--- /dev/null
+++ b/tests/array/array/testArray4PtrsBase.cpp
@@ -0,0 +1,86 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXBoundary_when_increment_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = arr.size_x() - 1;
+  size_t j = 2;
+  size_t k = 4;
+  size_t l = 3;
+  arr(0, j+1, k, l) = VAL;
+
+  ptr = &arr(i, j, k, l);
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXBoundary_when_decrement_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = 0;
+  size_t j = 2;
+  size_t k = 3;
+  size_t l = 1;
+
+  size_t iNext = arr.size_x() - 1;
+  arr(iNext, j-1, k, l) = VAL;
+
+  ptr = &arr(i, j, k, l);
+
+  EXPECT_EQ(VAL, *--ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXYBoundary_when_increment_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = arr.size_x() - 1;
+  size_t j = arr.size_y() - 1;
+  size_t k = 4;
+  size_t l = 3;
+  arr(0, 0, k+1, l) = VAL;
+
+  ptr = &arr(i, j, k, l);
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXYBoundary_when_decrement_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = 0;
+  size_t j = 0;
+  size_t k = 3;
+  size_t l = 1;
+
+  size_t iNext = arr.size_x() - 1;
+  size_t jNext = arr.size_y() - 1;
+  arr(iNext, jNext, k-1, l) = VAL;
+
+  ptr = &arr(i, j, k, l);
+
+  EXPECT_EQ(VAL, *--ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXYZBoundary_when_increment_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = arr.size_x() - 1;
+  size_t j = arr.size_y() - 1;
+  size_t k = arr.size_z() - 1;
+  size_t l = 3;
+  arr(0, 0, 0, l+1) = VAL;
+
+  ptr = &arr(i, j, k, l);
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_pointerAtXYZBoundary_when_decrement_then_pointerPositionTraversesCorrectly)
+{
+  size_t i = 0;
+  size_t j = 0;
+  size_t k = 0;
+  size_t l = 1;
+
+  size_t iNext = arr.size_x() - 1;
+  size_t jNext = arr.size_y() - 1;
+  size_t kNext = arr.size_z() - 1;
+  arr(iNext, jNext, kNext, l-1) = VAL;
+
+  ptr = &arr(i, j, k, l);
+
+  EXPECT_EQ(VAL, *--ptr);
+}
diff --git a/tests/array/array/testArray4RefsBase.cpp b/tests/array/array/testArray4RefsBase.cpp
new file mode 100644
index 00000000..ace7ae60
--- /dev/null
+++ b/tests/array/array/testArray4RefsBase.cpp
@@ -0,0 +1,61 @@
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_resize_then_sizeChanges)
+{
+  EXPECT_EQ(ARRAY_SIZE_X, arr.size_x());
+  EXPECT_EQ(ARRAY_SIZE_Y, arr.size_y());
+  EXPECT_EQ(ARRAY_SIZE_Z, arr.size_z());
+  EXPECT_EQ(ARRAY_SIZE_W, arr.size_w());
+  EXPECT_EQ(ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z * ARRAY_SIZE_W, arr.size());
+
+  size_t newLenX = ARRAY_SIZE_X + 1;
+  size_t newLenY = ARRAY_SIZE_Y - 2;
+  size_t newLenZ = ARRAY_SIZE_Z + 5;
+  size_t newLenW = ARRAY_SIZE_W - 3;
+  arr.resize(newLenX, newLenY, newLenZ, newLenW);
+
+  EXPECT_EQ(newLenX, arr.size_x());
+  EXPECT_EQ(newLenY, arr.size_y());
+  EXPECT_EQ(newLenZ, arr.size_z());
+  EXPECT_EQ(newLenW, arr.size_w());
+  EXPECT_EQ(newLenX * newLenY * newLenZ * newLenW, arr.size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_getIndexWithParentheses_then_refReturned)
+{
+  size_t i = 1, j = 1, k = 1, l = 1;
+  arr(i, j, k, l) = VAL;
+
+  EXPECT_EQ(VAL, arr(i, j, k, l));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_indexWithBracketsAlongsideParentheses_then_indexedProperly)
+{
+  size_t i = 1, j = 1, k = 1, l = 1;
+  size_t absIndex = l * arr.size_x() * arr.size_y() * arr.size_z() + k * arr.size_x() * arr.size_y() + j * arr.size_x() + i;
+
+  arr[absIndex] = VAL;
+  EXPECT_EQ(VAL, arr(i, j, k, l));
+
+  arr(i, j, k, l) /= VAL;
+  EXPECT_EQ(1, arr[absIndex]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithBrackets_then_valReturned)
+{
+  size_t i = 1;
+  arr[i] = VAL;
+
+  const array4<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst[i]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_constCompressedArray_when_getIndexWithParentheses_then_valReturned)
+{
+  size_t i = 1, j = 1, k = 1, l = 1;
+  size_t absIndex = l * arr.size_x() * arr.size_y() * arr.size_z() + k * arr.size_x() * arr.size_y() + j * arr.size_x() + i;
+  arr[absIndex] = VAL;
+
+  const array4<SCALAR> arrConst = arr;
+
+  EXPECT_EQ(VAL, arrConst(i, j, k, l));
+}
diff --git a/tests/array/array/testArray4ViewsBase.cpp b/tests/array/array/testArray4ViewsBase.cpp
new file mode 100644
index 00000000..b1ce5c99
--- /dev/null
+++ b/tests/array/array/testArray4ViewsBase.cpp
@@ -0,0 +1,639 @@
+/* preview */
+
+/* this also tests const_view */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_previewFullConstructor4D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ * viewLenW, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+}
+
+/* const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_sizeXYZ_then_viewXYZLenReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_accessorParens_then_correctEntriesReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  for (size_t l = 0; l < viewLenW; l++) {
+    for (size_t k = 0; k < viewLenZ; k++) {
+      for (size_t j = 0; j < viewLenY; j++) {
+        for (size_t i = 0; i < viewLenX; i++) {
+          size_t offset = (offsetW + l)*arr.size_x()*arr.size_y()*arr.size_z() + (offsetZ + k)*arr.size_x()*arr.size_y() + (offsetY + j)*arr.size_x() + offsetX + i;
+          EXPECT_EQ(arr[offset], v(i, j, k, l));
+        }
+      }
+    }
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_constViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+  size_t vIZ = 1;
+  size_t aIZ = v.global_z(vIZ);
+  size_t vIW = 1;
+  size_t aIW = v.global_w(vIW);
+
+  SCALAR oldVal = arr(aIX, aIY, aIZ, aIW);
+  EXPECT_EQ(oldVal, v(vIX, vIY, vIZ, vIW));
+
+  arr(aIX, aIY, aIZ, aIW) += 1;
+  SCALAR newVal = arr(aIX, aIY, aIZ, aIW);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY, vIZ, vIW));
+}
+
+/* view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ * viewLenW, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+  size_t vIZ = 1;
+  size_t aIZ = v.global_z(vIZ);
+  size_t vIW = 1;
+  size_t aIW = v.global_w(vIW);
+
+  SCALAR oldVal = arr(aIX, aIY, aIZ, aIW);
+  EXPECT_EQ(oldVal, v(vIX, vIY, vIZ, vIW));
+
+  arr(aIX, aIY, aIZ, aIW) += 1;
+  SCALAR newVal = arr(aIX, aIY, aIZ, aIW);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY, vIZ, vIW));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_view_when_setEntryWithParens_then_originalArrayUpdated)
+{
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  size_t i = 1, j = 2, k = 1, l = 2;
+  SCALAR val = 3.14;
+
+  EXPECT_NE(val, arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l));
+  v(i, j, k, l) = val;
+
+  EXPECT_EQ(arr(offsetX + i, offsetY + j, offsetZ + k, offsetW + l), v(i, j, k, l));
+}
+
+/* flat_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ * viewLenW, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewFullConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, 1, 1, 1, 1, 1, 1, 1, 1);
+
+  /* indices of view and arr */
+  size_t vIX = 2;
+  size_t aIX = v.global_x(vIX);
+  size_t vIY = 2;
+  size_t aIY = v.global_y(vIY);
+  size_t vIZ = 1;
+  size_t aIZ = v.global_z(vIZ);
+  size_t vIW = 1;
+  size_t aIW = v.global_w(vIW);
+
+  SCALAR oldVal = arr(aIX, aIY, aIZ, aIW);
+  EXPECT_EQ(oldVal, v(vIX, vIY, vIZ, vIW));
+
+  arr(aIX, aIY, aIZ, aIW) += 1;
+  SCALAR newVal = arr(aIX, aIY, aIZ, aIW);
+  EXPECT_NE(oldVal, newVal);
+
+  EXPECT_EQ(newVal, v(vIX, vIY, vIZ, vIW));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_index_then_returnsFlatIndex)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  size_t i = 2, j = 1, k = 1, l = 2;
+  EXPECT_EQ(l*viewLenX*viewLenY*viewLenZ + k*viewLenX*viewLenY + j*viewLenX + i, v.index(i, j, k, l));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_ijkl_then_returnsUnflatIndices)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  size_t i = 2, j = 1, k = 1, l = 2;
+  size_t flatIndex = v.index(i, j, k, l);
+
+  size_t vI, vJ, vK, vL;
+  v.ijkl(vI, vJ, vK, vL, flatIndex);
+  EXPECT_EQ(i, vI);
+  EXPECT_EQ(j, vJ);
+  EXPECT_EQ(k, vK);
+  EXPECT_EQ(l, vL);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_flatView_when_bracketAccessor_then_returnsValAtFlattenedIndex)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  size_t i = 2, j = 1, k = 1, l = 2;
+  size_t arrOffset = (offsetW + l)*arr.size_x()*arr.size_y()*arr.size_z() + (offsetZ + k)*arr.size_x()*arr.size_y() + (offsetY + j)*arr.size_x() + (offsetX + i);
+  EXPECT_EQ(arr[arrOffset], v[v.index(i, j, k, l)]);
+}
+
+/* nested_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_nestedViewFullConstructor4D_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ * viewLenW, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_parensAccessor_then_returnsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  arr(aI, aJ, aK, aL) = 5.5;
+  EXPECT_EQ(arr(aI, aJ, aK, aL), v(vI, vJ, vK, vL));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_parensMutator_then_setsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  SCALAR val = 5.5;
+  v(vI, vJ, vK, vL) = val;
+  EXPECT_EQ(val, arr(aI, aJ, aK, aL));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView_when_bracketIndex_then_returnsSliceFromView)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* test slice length */
+  EXPECT_EQ(viewLenX, v[0].size_x());
+  EXPECT_EQ(viewLenY, v[0].size_y());
+  EXPECT_EQ(viewLenZ, v[0].size_z());
+}
+
+/* nested_view3 */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView3_when_parensAccessor_then_returnsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  arr(aI, aJ, aK, aL) = 5.5;
+  EXPECT_EQ(arr(aI, aJ, aK, aL), v[vL](vI, vJ, vK));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView3_when_parensMutator_then_setsValue)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vI = 1;
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aI = offsetX + vI;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  SCALAR val = 5.5;
+  v[vL](vI, vJ, vK) = val;
+  EXPECT_EQ(val, arr(aI, aJ, aK, aL));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView3_when_bracketIndex_then_returnsSliceFromView)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* test slice length */
+  EXPECT_EQ(viewLenX, v[0][0][0].size_x());
+}
+
+/* nested_view2 */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_bracketAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into row that will become slice */
+  for (size_t aJ = 0; aJ < arr.size_y(); aJ++) {
+    for (size_t aI = 0; aI < arr.size_x(); aI++) {
+      arr(aI, aJ, aK, aL) = (SCALAR)(aI + aJ);
+    }
+  }
+
+  EXPECT_EQ(viewLenX, v[vL][vK].size_x());
+  EXPECT_EQ(viewLenY, v[vL][vK].size_y());
+  for (size_t vJ = 0; vJ < viewLenY; vJ++) {
+    for (size_t vI = 0; vI < viewLenX; vI++) {
+      EXPECT_EQ(arr(offsetX + vI, offsetY + vJ, aK, aL), v[vL][vK][vJ][vI]);
+    }
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_parensAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into row that will become slice */
+  for (size_t aJ = 0; aJ < arr.size_y(); aJ++) {
+    for (size_t aI = 0; aI < arr.size_x(); aI++) {
+      arr(aI, aJ, aK, aL) = (SCALAR)(aI + aJ);
+    }
+  }
+
+  EXPECT_EQ(viewLenX, v[vL][vK].size_x());
+  EXPECT_EQ(viewLenY, v[vL][vK].size_y());
+  for (size_t vJ = 0; vJ < viewLenY; vJ++) {
+    for (size_t vI = 0; vI < viewLenX; vI++) {
+      EXPECT_EQ(arr(offsetX + vI, offsetY + vJ, aK, aL), v[vL][vK](vI, vJ));
+    }
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_bracketMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into slice */
+  for (size_t vJ = 0; vJ < v[vL][vK].size_y(); vJ++) {
+    for (size_t vI = 0; vI < v[vL][vK].size_x(); vI++) {
+      v[vL][vK][vJ][vI] = (SCALAR)(vI + vJ);
+    }
+  }
+
+  for (size_t vJ = 0; vJ < v[vL][vK].size_y(); vJ++) {
+    for (size_t vI = 0; vI < v[vL][vK].size_x(); vI++) {
+      EXPECT_EQ(v[vL][vK][vJ][vI], arr(offsetX + vI, offsetY + vJ, aK, aL));
+    }
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView2_when_parensMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into slice */
+  for (size_t vJ = 0; vJ < v[vL][vK].size_y(); vJ++) {
+    for (size_t vI = 0; vI < v[vL][vK].size_x(); vI++) {
+      v[vL][vK][vJ](vI) = (SCALAR)(vI + vJ);
+    }
+  }
+
+  for (size_t vJ = 0; vJ < v[vL][vK].size_y(); vJ++) {
+    for (size_t vI = 0; vI < v[vL][vK].size_x(); vI++) {
+      EXPECT_EQ(v[vL][vK][vJ][vI], arr(offsetX + vI, offsetY + vJ, aK, aL));
+    }
+  }
+}
+
+/* nested_view1 */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_bracketAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 1;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into row that will become slice */
+  for (size_t aI = 0; aI < arr.size_x(); aI++) {
+    arr(aI, aJ, aK, aL) = (SCALAR)aI;
+  }
+
+  EXPECT_EQ(viewLenX, v[vL][vK][vJ].size_x());
+  for (size_t vI = 0; vI < viewLenX; vI++) {
+    EXPECT_EQ(arr(offsetX + vI, aJ, aK, aL), v[vL][vK][vJ][vI]);
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_parensAccessor_then_returnsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into row that will become slice */
+  for (size_t aI = 0; aI < arr.size_x(); aI++) {
+    arr(aI, aJ, aK, aL) = (SCALAR)aI;
+  }
+
+  EXPECT_EQ(viewLenX, v[vL][vK][vJ].size_x());
+  for (size_t vI = 0; vI < viewLenX; vI++) {
+    EXPECT_EQ(arr(offsetX + vI, aJ, aK, aL), v[vL][vK][vJ](vI));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_bracketMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into slice */
+  for (size_t vI = 0; vI < v[vL][vK][vJ].size_x(); vI++) {
+    v[vL][vK][vJ][vI] = (SCALAR)vI;
+  }
+
+  for (size_t vI = 0; vI < v[vL][vK][vJ].size_x(); vI++) {
+    EXPECT_EQ(v[vL][vK][vJ][vI], arr(offsetX + vI, aJ, aK, aL));
+  }
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_nestedView1_when_parensMutator_then_setsVal)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  /* indices for view and array */
+  size_t vJ = 2;
+  size_t vK = 1;
+  size_t vL = 2;
+  size_t aJ = offsetY + vJ;
+  size_t aK = offsetZ + vK;
+  size_t aL = offsetW + vL;
+
+  /* initialize values into slice */
+  for (size_t vI = 0; vI < v[vL][vK][vJ].size_x(); vI++) {
+    v[vL][vK][vJ](vI) = (SCALAR)vI;
+  }
+
+  for (size_t vI = 0; vI < v[vL][vK][vJ].size_x(); vI++) {
+    EXPECT_EQ(v[vL][vK][vJ][vI], arr(offsetX + vI, aJ, aK, aL));
+  }
+}
+
+/* private_const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ * viewLenW, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateConstView_when_sizeXYZ_then_viewLenReturned)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+}
+
+/* private_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewFullConstructor_then_lengthAndOffsetSet)
+{
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+
+  EXPECT_EQ(viewLenX * viewLenY * viewLenZ * viewLenW, v.size());
+  EXPECT_EQ(viewLenX, v.size_x());
+  EXPECT_EQ(viewLenY, v.size_y());
+  EXPECT_EQ(viewLenZ, v.size_z());
+  EXPECT_EQ(viewLenW, v.size_w());
+
+  EXPECT_EQ(offsetX, v.global_x(0));
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateView_when_partitionWithLimitOnCount_then_setsUniqueBlockBoundsAlongLongestDimension)
+{
+  const size_t count = 3;
+  size_t prevOffsetX, prevLenX, offsetX, lenX;
+
+  /* partition such that each gets at least 1 block */
+  const size_t blockSideLen = 4;
+  size_t arrBlockCountX = (arr.size_x() + (blockSideLen - 1)) / blockSideLen;
+  size_t arrBlockCountY = (arr.size_y() + (blockSideLen - 1)) / blockSideLen;
+  size_t arrBlockCountZ = (arr.size_z() + (blockSideLen - 1)) / blockSideLen;
+  size_t arrBlockCountW = (arr.size_w() + (blockSideLen - 1)) / blockSideLen;
+  /* ensure partition will happen along X */
+  EXPECT_GT(arrBlockCountX, std::max(std::max(arrBlockCountY, arrBlockCountZ), arrBlockCountW));
+  EXPECT_LE(count, arrBlockCountY);
+
+  /* construct view */
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+
+  /* get original dimensions that should stay constant */
+  size_t offsetY = v.global_y(0);
+  size_t offsetZ = v.global_z(0);
+  size_t offsetW = v.global_w(0);
+  size_t lenY = v.size_y();
+  size_t lenZ = v.size_z();
+  size_t lenW = v.size_w();
+
+  /* base case */
+  v.partition(0, count);
+
+  /* along X, expect to start at first index, zero */
+  prevOffsetX = v.global_x(0);
+  EXPECT_EQ(0, prevOffsetX);
+  /* expect to have at least 1 block */
+  prevLenX = v.size_x();
+  EXPECT_LE(blockSideLen, prevLenX);
+
+  /* along Y, Z, and W, expect no changes */
+  EXPECT_EQ(offsetY, v.global_y(0));
+  EXPECT_EQ(offsetZ, v.global_z(0));
+  EXPECT_EQ(offsetW, v.global_w(0));
+  EXPECT_EQ(lenY, v.size_y());
+  EXPECT_EQ(lenZ, v.size_z());
+  EXPECT_EQ(lenW, v.size_w());
+
+  /* successive cases are compared to previous */
+  for (size_t i = 1; i < count - 1; i++) {
+    ZFP_ARRAY_TYPE::private_view v2(&arr);
+    v2.partition(i, count);
+
+    /* along X, expect blocks continue where previous left off */
+    offsetX = v2.global_x(0);
+    EXPECT_EQ(prevOffsetX + prevLenX, offsetX);
+    /* expect to have at least 1 block */
+    lenX = v2.size_x();
+    EXPECT_LE(blockSideLen, lenX);
+
+    /* along Y, Z, and W, expect no changes */
+    EXPECT_EQ(offsetY, v2.global_y(0));
+    EXPECT_EQ(offsetZ, v2.global_z(0));
+    EXPECT_EQ(offsetW, v2.global_w(0));
+    EXPECT_EQ(lenY, v2.size_y());
+    EXPECT_EQ(lenZ, v2.size_z());
+    EXPECT_EQ(lenW, v2.size_w());
+
+    prevOffsetX = offsetX;
+    prevLenX = lenX;
+  }
+
+  /* last partition case */
+  ZFP_ARRAY_TYPE::private_view v3(&arr);
+  v3.partition(count - 1, count);
+
+  /* along X, expect blocks continue where previous left off */
+  offsetX = v3.global_x(0);
+  EXPECT_EQ(prevOffsetX + prevLenX, offsetX);
+  /* last partition could hold a partial block */
+  lenX = v3.size_x();
+  EXPECT_LT(0u, lenX);
+  /* expect to end on final index */
+  EXPECT_EQ(arr.size_x(), offsetX + lenX);
+
+  /* along Y, Z, and W, expect no changes */
+  EXPECT_EQ(offsetY, v3.global_y(0));
+  EXPECT_EQ(offsetZ, v3.global_z(0));
+  EXPECT_EQ(offsetW, v3.global_w(0));
+  EXPECT_EQ(lenY, v3.size_y());
+  EXPECT_EQ(lenZ, v3.size_z());
+  EXPECT_EQ(lenW, v3.size_w());
+}
diff --git a/tests/array/array/testArray4d.cpp b/tests/array/array/testArray4d.cpp
new file mode 100644
index 00000000..5fb688f1
--- /dev/null
+++ b/tests/array/array/testArray4d.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array4dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 4; }
+};
+
+Array4dTestEnv* const testEnv = new Array4dTestEnv;
+
+class Array4dTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array4dTest
+
+#define ZFP_ARRAY_TYPE array4d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array4f
+#define ZFP_ARRAY_TYPE_WRONG_DIM array1d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array1f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array2d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 4
+
+#include "testArrayBase.cpp"
+#include "testArray4Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray4dIters.cpp b/tests/array/array/testArray4dIters.cpp
new file mode 100644
index 00000000..9be8d7c8
--- /dev/null
+++ b/tests/array/array/testArray4dIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array4dTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array4dTestIters
+
+#include "utils/gtest4dTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray4ItersBase.cpp"
diff --git a/tests/array/array/testArray4dPtrs.cpp b/tests/array/array/testArray4dPtrs.cpp
new file mode 100644
index 00000000..5eae996a
--- /dev/null
+++ b/tests/array/array/testArray4dPtrs.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array4dTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array4dTestPtrs
+
+#include "utils/gtest4dTest.h"
+
+#include "testArrayPtrsBase.cpp"
+#include "testArray4PtrsBase.cpp"
diff --git a/tests/array/array/testArray4dRefs.cpp b/tests/array/array/testArray4dRefs.cpp
new file mode 100644
index 00000000..4560c83b
--- /dev/null
+++ b/tests/array/array/testArray4dRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4dTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array4dTestRefs
+
+#include "utils/gtest4dTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray4RefsBase.cpp"
diff --git a/tests/array/array/testArray4dViewIters.cpp b/tests/array/array/testArray4dViewIters.cpp
new file mode 100644
index 00000000..d0be737c
--- /dev/null
+++ b/tests/array/array/testArray4dViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array4dTestViewIters
+
+#include "utils/gtest4dTest.h"
+
+#define ZFP_ARRAY_TYPE array4d
+#define SCALAR double
+#define DIMS 4
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray4dViewPtrs.cpp b/tests/array/array/testArray4dViewPtrs.cpp
new file mode 100644
index 00000000..0eee3c46
--- /dev/null
+++ b/tests/array/array/testArray4dViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array4dTestViewPtrs
+
+#include "utils/gtest4dTest.h"
+
+#define ZFP_ARRAY_TYPE array4d
+#define SCALAR double
+#define DIMS 4
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray4dViews.cpp b/tests/array/array/testArray4dViews.cpp
new file mode 100644
index 00000000..d53c3cd1
--- /dev/null
+++ b/tests/array/array/testArray4dViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4dTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array4dTestViews
+
+#include "utils/gtest4dTest.h"
+
+#define ZFP_ARRAY_TYPE array4d
+#define SCALAR double
+#define DIMS 4
+
+#include "testArrayViewsBase.cpp"
+#include "testArray4ViewsBase.cpp"
diff --git a/tests/array/array/testArray4f.cpp b/tests/array/array/testArray4f.cpp
new file mode 100644
index 00000000..dbd8c070
--- /dev/null
+++ b/tests/array/array/testArray4f.cpp
@@ -0,0 +1,45 @@
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/array4.hpp"
+#include "zfp/factory.hpp"
+#include "zfp/array1.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class Array4fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 4; }
+};
+
+Array4fTestEnv* const testEnv = new Array4fTestEnv;
+
+class Array4fTest : public ArrayNdTestFixture {};
+
+#define TEST_FIXTURE Array4fTest
+
+#define ZFP_ARRAY_TYPE array4f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR array4d
+#define ZFP_ARRAY_TYPE_WRONG_DIM array1f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM array1d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE array2f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 4
+
+#include "testArrayBase.cpp"
+#include "testArray4Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArray4fIters.cpp b/tests/array/array/testArray4fIters.cpp
new file mode 100644
index 00000000..ddcdb42b
--- /dev/null
+++ b/tests/array/array/testArray4fIters.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array4fTest
+#define ARRAY_DIMS_SCALAR_TEST_ITERS Array4fTestIters
+
+#include "utils/gtest4fTest.h"
+
+#include "testArrayItersBase.cpp"
+#include "testArray4ItersBase.cpp"
diff --git a/tests/array/array/testArray4fPtrs.cpp b/tests/array/array/testArray4fPtrs.cpp
new file mode 100644
index 00000000..7a726b84
--- /dev/null
+++ b/tests/array/array/testArray4fPtrs.cpp
@@ -0,0 +1,10 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+#define ARRAY_DIMS_SCALAR_TEST Array4fTest
+#define ARRAY_DIMS_SCALAR_TEST_PTRS Array4fTestPtrs
+
+#include "utils/gtest4fTest.h"
+
+#include "testArrayPtrsBase.cpp"
+#include "testArray4PtrsBase.cpp"
diff --git a/tests/array/array/testArray4fRefs.cpp b/tests/array/array/testArray4fRefs.cpp
new file mode 100644
index 00000000..c20c305b
--- /dev/null
+++ b/tests/array/array/testArray4fRefs.cpp
@@ -0,0 +1,14 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4fTest
+#define ARRAY_DIMS_SCALAR_TEST_REFS Array4fTestRefs
+
+#include "utils/gtest4fTest.h"
+
+#include "testArrayRefsBase.cpp"
+#include "testArray4RefsBase.cpp"
diff --git a/tests/array/array/testArray4fViewIters.cpp b/tests/array/array/testArray4fViewIters.cpp
new file mode 100644
index 00000000..28f6fb7e
--- /dev/null
+++ b/tests/array/array/testArray4fViewIters.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS Array4fTestViewIters
+
+#include "utils/gtest4fTest.h"
+
+#define ZFP_ARRAY_TYPE array4f
+#define SCALAR float
+#define DIMS 4
+
+#include "testArrayViewItersBase.cpp"
diff --git a/tests/array/array/testArray4fViewPtrs.cpp b/tests/array/array/testArray4fViewPtrs.cpp
new file mode 100644
index 00000000..b4a475a9
--- /dev/null
+++ b/tests/array/array/testArray4fViewPtrs.cpp
@@ -0,0 +1,17 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand64.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS Array4fTestViewPtrs
+
+#include "utils/gtest4fTest.h"
+
+#define ZFP_ARRAY_TYPE array4f
+#define SCALAR float
+#define DIMS 4
+
+#include "testArrayViewPtrsBase.cpp"
diff --git a/tests/array/array/testArray4fViews.cpp b/tests/array/array/testArray4fViews.cpp
new file mode 100644
index 00000000..c0e44137
--- /dev/null
+++ b/tests/array/array/testArray4fViews.cpp
@@ -0,0 +1,18 @@
+#include "zfp/array4.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "utils/rand32.h"
+}
+
+#define ARRAY_DIMS_SCALAR_TEST Array4fTest
+#define ARRAY_DIMS_SCALAR_TEST_VIEWS Array4fTestViews
+
+#include "utils/gtest4fTest.h"
+
+#define ZFP_ARRAY_TYPE array4f
+#define SCALAR float
+#define DIMS 4
+
+#include "testArrayViewsBase.cpp"
+#include "testArray4ViewsBase.cpp"
diff --git a/tests/array/array/testArrayBase.cpp b/tests/array/array/testArrayBase.cpp
new file mode 100644
index 00000000..d0ca5afe
--- /dev/null
+++ b/tests/array/array/testArrayBase.cpp
@@ -0,0 +1,911 @@
+extern "C" {
+  #include "utils/testMacros.h"
+  #include "utils/zfpChecksums.h"
+  #include "utils/zfpHash.h"
+}
+
+#include <cstring>
+#include <sstream>
+
+TEST_F(TEST_FIXTURE, when_constructorCalled_then_rateSetWithWriteRandomAccess)
+{
+  double rate = ZFP_RATE_PARAM_BITS;
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, rate);
+  EXPECT_LT(rate, arr.rate());
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, rate);
+  EXPECT_LT(rate, arr.rate());
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, rate);
+  // alignment in 3D supports integer fixed-rates [1, 64] (use <=)
+  EXPECT_LE(rate, arr.rate());
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, rate);
+  // alignment in 4D supports integer fixed-rates [1, 64] (use <=)
+  EXPECT_LE(rate, arr.rate());
+#endif
+}
+
+TEST_F(TEST_FIXTURE, when_constructorCalledWithCacheSize_then_minCacheSizeEnforced)
+{
+  size_t cacheSize = 300;
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, ZFP_RATE_PARAM_BITS, 0, cacheSize);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS, 0, cacheSize);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS, 0, cacheSize);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS, 0, cacheSize);
+#endif
+
+  EXPECT_LE(cacheSize, arr.cache_size());
+}
+
+TEST_F(TEST_FIXTURE, when_setRate_then_compressionRateChanged)
+{
+  double oldRate = ZFP_RATE_PARAM_BITS;
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, oldRate, inputDataArr);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, oldRate, inputDataArr);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, oldRate, inputDataArr);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, oldRate, inputDataArr);
+#endif
+
+  double actualOldRate = arr.rate();
+  size_t oldCompressedSize = arr.compressed_size();
+  uint64 oldChecksum = hashBitstream((uint64*)arr.compressed_data(), oldCompressedSize);
+
+  double newRate = oldRate - 10;
+  EXPECT_LT(1, newRate);
+  arr.set_rate(newRate);
+  EXPECT_GT(actualOldRate, arr.rate());
+
+  arr.set(inputDataArr);
+  size_t newCompressedSize = arr.compressed_size();
+  uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), newCompressedSize);
+
+  EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, oldChecksum, checksum);
+
+  EXPECT_GT(oldCompressedSize, newCompressedSize);
+}
+
+void VerifyProperHeaderWritten(const zfp::array::header& h, size_t chosenSizeX, size_t chosenSizeY, size_t chosenSizeZ, size_t chosenSizeW, double chosenRate)
+{
+  // copy header into aligned memory suitable for bitstream r/w
+  size_t byte_size = h.size_bytes();
+  size_t num_64bit_entries = (byte_size + sizeof(uint64) - 1) / sizeof(uint64);
+  uint64* buffer = new uint64[num_64bit_entries];
+
+  memcpy(buffer, h.data(), h.size_bytes());
+
+  // verify valid header (manually through C API)
+  bitstream* stream = stream_open(buffer, byte_size);
+  zfp_stream* zfp = zfp_stream_open(stream);
+  zfp_field* field = zfp_field_alloc();
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, zfp_read_header(zfp, field, ZFP_HEADER_FULL));
+
+  // verify header contents
+  EXPECT_EQ(chosenSizeX, field->nx);
+  EXPECT_EQ(chosenSizeY, field->ny);
+  EXPECT_EQ(chosenSizeZ, field->nz);
+  EXPECT_EQ(chosenSizeW, field->nw);
+
+  EXPECT_EQ(ZFP_TYPE, field->type);
+
+  // to verify rate, we can only compare the 4 compression-param basis
+  zfp_stream* expectedZfpStream = zfp_stream_open(0);
+  zfp_stream_set_rate(expectedZfpStream, chosenRate, ZFP_TYPE, testEnv->getDims(), zfp_true);
+  EXPECT_EQ(expectedZfpStream->minbits, zfp->minbits);
+  EXPECT_EQ(expectedZfpStream->maxbits, zfp->maxbits);
+  EXPECT_EQ(expectedZfpStream->maxprec, zfp->maxprec);
+  EXPECT_EQ(expectedZfpStream->minexp, zfp->minexp);
+
+  zfp_stream_close(expectedZfpStream);
+  zfp_stream_close(zfp);
+  zfp_field_free(field);
+  stream_close(stream);
+
+  delete[] buffer;
+}
+
+TEST_F(TEST_FIXTURE, when_writeHeader_then_cCompatibleHeaderWritten)
+{
+  double chosenRate = ZFP_RATE_PARAM_BITS;
+
+  size_t chosenSizeX, chosenSizeY, chosenSizeZ, chosenSizeW;
+#if DIMS == 1
+  chosenSizeX = 55;
+  chosenSizeY = 0;
+  chosenSizeZ = 0;
+  chosenSizeW = 0;
+  ZFP_ARRAY_TYPE arr(chosenSizeX, chosenRate);
+#elif DIMS == 2
+  chosenSizeX = 55;
+  chosenSizeY = 23;
+  chosenSizeZ = 0;
+  chosenSizeW = 0;
+  ZFP_ARRAY_TYPE arr(chosenSizeX, chosenSizeY, chosenRate);
+#elif DIMS == 3
+  chosenSizeX = 55;
+  chosenSizeY = 23;
+  chosenSizeZ = 31;
+  chosenSizeW = 0;
+  ZFP_ARRAY_TYPE arr(chosenSizeX, chosenSizeY, chosenSizeZ, chosenRate);
+#elif DIMS == 4
+  // max rate for short headers for 4D arrays
+  chosenRate = std::min(chosenRate, 8.0);
+  chosenSizeX = 55;
+  chosenSizeY = 23;
+  chosenSizeZ = 31;
+  chosenSizeW = 10;
+  ZFP_ARRAY_TYPE arr(chosenSizeX, chosenSizeY, chosenSizeZ, chosenSizeW, chosenRate);
+#endif
+
+  ZFP_ARRAY_TYPE::header header(arr);
+  chosenRate = arr.rate();
+
+  VerifyProperHeaderWritten(header, chosenSizeX, chosenSizeY, chosenSizeZ, chosenSizeW, chosenRate);
+}
+
+TEST_F(TEST_FIXTURE, when_generateRandomData_then_checksumMatches)
+{
+  uint64 key1, key2;
+  computeKeyOriginalInput(ARRAY_TEST, dimLens, &key1, &key2);
+
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, getChecksumByKey(DIMS, ZFP_TYPE, key1, key2), _catFunc2(hashArray, SCALAR_BITS)((UINT*)inputDataArr, inputDataTotalLen, 1));
+}
+
+void FailWhenNoExceptionThrown()
+{
+  FAIL() << "No exception was thrown when one was expected";
+}
+
+void FailAndPrintException(std::exception const & e)
+{
+  FAIL() << "Unexpected exception thrown: " << typeid(e).name() << std::endl << "With message: " << e.what();
+}
+
+TEST_F(TEST_FIXTURE, when_constructorFromSerializedWithInvalidHeader_then_exceptionThrown)
+{
+  uchar buffer[0x100] = {};
+  try {
+    ZFP_ARRAY_TYPE::header h(buffer);
+    ZFP_ARRAY_TYPE arr(h, NULL);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp header is corrupt"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_zfpHeaderForCertainDimensionalityButHeaderMissing_when_construct_expect_zfpArrayHeaderExceptionThrown)
+{
+  uint missingDim = (DIMS % 4) + 1;
+  zfp_stream_set_rate(stream, 8, ZFP_TYPE, missingDim, zfp_true);
+
+  zfp_field_set_type(field, ZFP_TYPE);
+  switch (missingDim) {
+    case 1:
+      zfp_field_set_size_1d(field, 12);
+      break;
+    case 2:
+      zfp_field_set_size_2d(field, 12, 12);
+      break;
+    case 3:
+      zfp_field_set_size_3d(field, 12, 12, 12);
+      break;
+    case 4:
+      zfp_field_set_size_4d(field, 12, 12, 12, 12);
+      break;
+  }
+
+  // write header to buffer with C API
+  zfp_stream_rewind(stream);
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, zfp_write_header(stream, field, ZFP_HEADER_FULL));
+  zfp_stream_flush(stream);
+
+  ZFP_ARRAY_TYPE::header h(buffer);
+
+  try {
+    zfp::array* arr = zfp::array::construct(h);
+    FailWhenNoExceptionThrown();
+
+  } catch (zfp::exception const & e) {
+    std::stringstream ss;
+    ss << "array" << missingDim << " not supported; include zfp/array" << missingDim << ".hpp before zfp/factory.hpp";
+    EXPECT_EQ(e.what(), ss.str());
+
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_serializedCompressedArrayFromWrongScalarType_when_constructorFromSerialized_then_exceptionThrown)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE_WRONG_SCALAR arr(inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE_WRONG_SCALAR arr(inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE_WRONG_SCALAR arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 4
+  // max rate for short headers for 4D arrays
+  ZFP_ARRAY_TYPE_WRONG_SCALAR arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, std::min(ZFP_RATE_PARAM_BITS, 8));
+#endif
+
+  ZFP_ARRAY_TYPE_WRONG_SCALAR::header h(arr);
+
+  try {
+    ZFP_ARRAY_TYPE arr2(h, arr.compressed_data());
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp array scalar type does not match header"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_serializedCompressedArrayFromWrongDimensionality_when_constructorFromSerialized_then_exceptionThrown)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE_WRONG_DIM arr(100, 100, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE_WRONG_DIM arr(100, 100, 100, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE_WRONG_DIM arr(100, 100, 100, 100, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE_WRONG_DIM arr(100, ZFP_RATE_PARAM_BITS);
+#endif
+
+  try {
+    ZFP_ARRAY_TYPE_WRONG_DIM::header h(arr);
+    try {
+      ZFP_ARRAY_TYPE arr2(h, arr.compressed_data());
+      FailWhenNoExceptionThrown();
+    } catch (zfp::exception const & e) {
+      // short headers are available in (1D, 2D, and) 3D when ZFP_RATE_PARAM_BITS <= 32
+      EXPECT_LT(arr.dimensionality(), 4u);
+      EXPECT_EQ(e.what(), std::string("zfp array dimensionality does not match header"));
+    } catch (std::exception const & e) {
+      FailAndPrintException(e);
+    }
+  } catch (zfp::exception const & e) {
+    // short headers for 4D arrays requires ZFP_RATE_PARAM_BITS <= 8, which is violated
+    EXPECT_EQ(arr.dimensionality(), 4);
+    EXPECT_EQ(e.what(), std::string("zfp serialization supports only short headers"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_serializedNonFixedRateHeader_when_constructorFromSerialized_then_exceptionThrown)
+{
+  // create a compressed stream through C API
+  // (one that is not supported with compressed arrays)
+  zfp_field* field;
+#if DIMS == 1
+  field = zfp_field_1d(inputDataArr, ZFP_TYPE, inputDataSideLen);
+#elif DIMS == 2
+  field = zfp_field_2d(inputDataArr, ZFP_TYPE, inputDataSideLen, inputDataSideLen);
+#elif DIMS == 3
+  field = zfp_field_3d(inputDataArr, ZFP_TYPE, inputDataSideLen, inputDataSideLen, inputDataSideLen);
+#elif DIMS == 4
+  field = zfp_field_4d(inputDataArr, ZFP_TYPE, inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen);
+#endif
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  uchar* buffer = new uchar[bufsizeBytes];
+  memset(buffer, 0, bufsizeBytes);
+
+  bitstream* bs = stream_open(buffer, bufsizeBytes);
+  zfp_stream_set_bit_stream(stream, bs);
+  zfp_stream_rewind(stream);
+
+  zfp_stream_set_precision(stream, 10);
+  EXPECT_NE(zfp_mode_fixed_rate, zfp_stream_compression_mode(stream));
+
+  // write header
+  size_t writtenBits = zfp_write_header(stream, field, ZFP_HEADER_FULL);
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, writtenBits);
+  zfp_stream_flush(stream);
+
+  // copy header into header
+  size_t headerSizeBytes = DIV_ROUND_UP(writtenBits, CHAR_BIT);
+
+  // compress data
+  uchar* compressedDataPtr = (uchar*)stream_data(bs) + headerSizeBytes;
+  zfp_compress(stream, field);
+
+  // close/free C API things (keep buffer)
+  zfp_field_free(field);
+  zfp_stream_close(stream);
+  stream_close(bs);
+
+  try {
+    ZFP_ARRAY_TYPE::header h(buffer, headerSizeBytes);
+    ZFP_ARRAY_TYPE arr2(h, compressedDataPtr, bufsizeBytes - headerSizeBytes);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp deserialization supports only fixed-rate mode"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+
+  delete[] buffer;
+}
+
+TEST_F(TEST_FIXTURE, given_serializedNonFixedRateWrongScalarTypeWrongDimensionalityHeader_when_constructorFromSerialized_then_exceptionsThrown)
+{
+  // create a compressed stream through C API
+  // (one that is not supported with compressed arrays)
+  zfp_field* field;
+  // (inputDataSideLen specific to that dimensionality, can request too much memory if fitted to higher dimensionality)
+#if DIMS == 1
+  field = zfp_field_1d(inputDataArr, zfp_type_int32, 100);
+#elif DIMS == 2
+  field = zfp_field_2d(inputDataArr, zfp_type_int32, 100, 100);
+#elif DIMS == 3
+  field = zfp_field_3d(inputDataArr, zfp_type_int32, 100, 100, 100);
+#elif DIMS == 4
+  field = zfp_field_4d(inputDataArr, zfp_type_int32, 30, 30, 30, 30);
+#endif
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  uchar* buffer = new uchar[bufsizeBytes];
+  memset(buffer, 0, bufsizeBytes);
+
+  bitstream* bs = stream_open(buffer, bufsizeBytes);
+  zfp_stream_set_bit_stream(stream, bs);
+  zfp_stream_rewind(stream);
+
+  zfp_stream_set_precision(stream, 10);
+  EXPECT_NE(zfp_mode_fixed_rate, zfp_stream_compression_mode(stream));
+
+  // write header
+  size_t writtenBits = zfp_write_header(stream, field, ZFP_HEADER_FULL);
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, writtenBits);
+  zfp_stream_flush(stream);
+
+  // copy header into header
+  size_t headerSizeBytes = (writtenBits + CHAR_BIT - 1) / CHAR_BIT;
+
+  // compress data
+  uchar* compressedDataPtr = (uchar*)stream_data(bs) + headerSizeBytes;
+  zfp_compress(stream, field);
+
+  // close/free C API things (keep buffer)
+  zfp_field_free(field);
+  zfp_stream_close(stream);
+  stream_close(bs);
+
+  try {
+    ZFP_ARRAY_TYPE::header h(buffer, headerSizeBytes);
+    ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM arr2(h, compressedDataPtr, bufsizeBytes - headerSizeBytes);
+    FailWhenNoExceptionThrown();
+
+  } catch (zfp::exception const & e) {
+    // exception must match one of these
+    EXPECT_TRUE(
+      e.what() == std::string("zfp array scalar type does not match header") ||
+      e.what() == std::string("zfp array dimensionality does not match header") ||
+      e.what() == std::string("zfp serialization supports only float and double") ||
+      e.what() == std::string("zfp deserialization supports only fixed-rate mode")
+    );
+
+    // print exception if any of above were not met
+    if (HasFailure()) {
+      FailAndPrintException(e);
+    }
+
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+
+  delete[] buffer;
+}
+
+TEST_F(TEST_FIXTURE, given_compatibleHeaderWrittenViaCApi_when_constructorFromSerialized_then_successWithParamsSet)
+{
+  // create a compressed stream through C API
+  // (one that is supported with compressed arrays)
+  zfp_field* field;
+#if DIMS == 1
+  field = zfp_field_1d(inputDataArr, ZFP_TYPE, inputDataSideLen);
+#elif DIMS == 2
+  field = zfp_field_2d(inputDataArr, ZFP_TYPE, inputDataSideLen, inputDataSideLen);
+#elif DIMS == 3
+  field = zfp_field_3d(inputDataArr, ZFP_TYPE, inputDataSideLen, inputDataSideLen, inputDataSideLen);
+#elif DIMS == 4
+  field = zfp_field_4d(inputDataArr, ZFP_TYPE, inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen);
+#endif
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  double rate = zfp_stream_set_rate(stream, 8, ZFP_TYPE, DIMS, zfp_true);
+  EXPECT_EQ(zfp_mode_fixed_rate, zfp_stream_compression_mode(stream));
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  uchar* buffer = new uchar[bufsizeBytes];
+  memset(buffer, 0, bufsizeBytes);
+
+  bitstream* bs = stream_open(buffer, bufsizeBytes);
+  zfp_stream_set_bit_stream(stream, bs);
+  zfp_stream_rewind(stream);
+
+  // write header
+  size_t writtenBits = zfp_write_header(stream, field, ZFP_HEADER_FULL);
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, writtenBits);
+  zfp_stream_flush(stream);
+
+  // copy header into header
+  size_t headerSizeBytes = (writtenBits + CHAR_BIT - 1) / CHAR_BIT;
+
+  // compress data
+  uchar* compressedDataPtr = (uchar*)stream_data(bs) + headerSizeBytes;
+  zfp_compress(stream, field);
+
+  try {
+    ZFP_ARRAY_TYPE::header h(buffer, headerSizeBytes);
+    ZFP_ARRAY_TYPE arr2(h, compressedDataPtr, bufsizeBytes - headerSizeBytes);
+
+    EXPECT_EQ(arr2.dimensionality(), zfp_field_dimensionality(field));
+    EXPECT_EQ(arr2.scalar_type(), zfp_field_type(field));
+
+    size_t n[4];
+    EXPECT_EQ(arr2.size(), zfp_field_size(field, n));
+
+#if DIMS == 1
+    EXPECT_EQ(arr2.size_x(), n[0]);
+#elif DIMS == 2
+    EXPECT_EQ(arr2.size_x(), n[0]);
+    EXPECT_EQ(arr2.size_y(), n[1]);
+#elif DIMS == 3
+    EXPECT_EQ(arr2.size_x(), n[0]);
+    EXPECT_EQ(arr2.size_y(), n[1]);
+    EXPECT_EQ(arr2.size_z(), n[2]);
+#elif DIMS == 4
+    EXPECT_EQ(arr2.size_x(), n[0]);
+    EXPECT_EQ(arr2.size_y(), n[1]);
+    EXPECT_EQ(arr2.size_z(), n[2]);
+    EXPECT_EQ(arr2.size_w(), n[3]);
+#endif
+
+    EXPECT_EQ(arr2.rate(), rate);
+
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+
+  zfp_stream_close(stream);
+  stream_close(bs);
+  zfp_field_free(field);
+  delete[] buffer;
+}
+
+TEST_F(TEST_FIXTURE, given_incompleteChunkOfSerializedCompressedArray_when_constructorFromSerialized_then_exceptionThrown)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 4
+  // max rate for short headers for 4D arrays
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, std::min(ZFP_RATE_PARAM_BITS, 8));
+#endif
+
+  ZFP_ARRAY_TYPE::header h(arr);
+
+  try {
+    ZFP_ARRAY_TYPE arr2(h, arr.compressed_data(), arr.compressed_size() - 1);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("buffer size is smaller than required"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_serializedCompressedArrayHeader_when_factoryFuncConstruct_then_correctTypeConstructed)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 4
+  // max rate for short headers for 4D arrays
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, std::min(ZFP_RATE_PARAM_BITS, 8));
+#endif
+
+  ZFP_ARRAY_TYPE::header h(arr);
+
+  array* arr2 = zfp::array::construct(h);
+
+  ASSERT_TRUE(arr2 != 0);
+  ASSERT_TRUE(dynamic_cast<ZFP_ARRAY_TYPE *>(arr2) != NULL);
+  ASSERT_TRUE(dynamic_cast<ZFP_ARRAY_TYPE_WRONG_DIM *>(arr2) == NULL);
+
+  delete arr2;
+}
+
+TEST_F(TEST_FIXTURE, given_serializedCompressedArray_when_factoryFuncConstruct_then_correctTypeConstructedWithPopulatedEntries)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, ZFP_RATE_PARAM_BITS);
+#elif DIMS == 4
+  // max rate for short headers for 4D arrays
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, std::min(ZFP_RATE_PARAM_BITS, 8));
+#endif
+
+  arr[1] = 999999.;
+
+  ZFP_ARRAY_TYPE::header h(arr);
+
+  array* arr2 = zfp::array::construct(h, arr.compressed_data(), arr.compressed_size());
+
+  ASSERT_TRUE(arr2 != 0);
+  EXPECT_EQ(arr.compressed_size(), arr2->compressed_size());
+  ASSERT_TRUE(std::memcmp(arr.compressed_data(), arr2->compressed_data(), arr.compressed_size()) == 0);
+
+  delete arr2;
+}
+
+TEST_F(TEST_FIXTURE, given_uncompatibleSerializedMem_when_factoryFuncConstruct_then_throwsZfpHeaderException)
+{
+  size_t dummyLen = 1024;
+  uchar* dummyMem = new uchar[dummyLen];
+  memset(dummyMem, 0, dummyLen);
+
+  try {
+    ZFP_ARRAY_TYPE::header h(dummyMem);
+    array* arr = zfp::array::construct(h, dummyMem, dummyLen);
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp header is corrupt"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+
+  delete[] dummyMem;
+}
+
+#if DIMS == 1
+// with write random access in 1D, fixed-rate params rounded up to multiples of 16
+INSTANTIATE_TEST_SUITE_P(TestManyCompressionRates, TEST_FIXTURE, ::testing::Values(1, 2));
+#else
+INSTANTIATE_TEST_SUITE_P(TestManyCompressionRates, TEST_FIXTURE, ::testing::Values(0, 1, 2));
+#endif
+
+TEST_P(TEST_FIXTURE, given_dataset_when_set_then_underlyingBitstreamChecksumMatches)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate());
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate());
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate());
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate());
+#endif
+
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, zfp_mode_fixed_rate, GetParam(), &key1, &key2);
+  uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+
+  uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, expectedChecksum, checksum);
+
+  arr.set(inputDataArr);
+
+  checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+}
+
+TEST_P(TEST_FIXTURE, given_setArray_when_get_then_decompressedValsReturned)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#endif
+
+  SCALAR* decompressedArr = new SCALAR[inputDataTotalLen];
+  arr.get(decompressedArr);
+
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, DECOMPRESSED_ARRAY, dimLens, zfp_mode_fixed_rate, GetParam(), &key1, &key2);
+  uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+
+  uint64 checksum = _catFunc2(hashArray, SCALAR_BITS)((UINT*)decompressedArr, inputDataTotalLen, 1);
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+
+  delete[] decompressedArr;
+}
+
+TEST_P(TEST_FIXTURE, given_populatedCompressedArray_when_resizeWithClear_then_bitstreamZeroed)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate());
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate());
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate());
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate());
+#endif
+
+  arr.set(inputDataArr);
+  EXPECT_NE(0u, hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size()));
+
+#if DIMS == 1
+  arr.resize(inputDataSideLen + 1, true);
+#elif DIMS == 2
+  arr.resize(inputDataSideLen + 1, inputDataSideLen + 1, true);
+#elif DIMS == 3
+  arr.resize(inputDataSideLen + 1, inputDataSideLen + 1, inputDataSideLen + 1, true);
+#elif DIMS == 4
+  arr.resize(inputDataSideLen + 1, inputDataSideLen + 1, inputDataSideLen + 1, inputDataSideLen + 1, true);
+#endif
+
+  EXPECT_EQ(0u, hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size()));
+}
+
+TEST_P(TEST_FIXTURE, when_configureCompressedArrayFromDefaultConstructor_then_bitstreamChecksumMatches)
+{
+  ZFP_ARRAY_TYPE arr;
+
+#if DIMS == 1
+  arr.resize(inputDataSideLen, false);
+#elif DIMS == 2
+  arr.resize(inputDataSideLen, inputDataSideLen, false);
+#elif DIMS == 3
+  arr.resize(inputDataSideLen, inputDataSideLen, inputDataSideLen, false);
+#elif DIMS == 4
+  arr.resize(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, false);
+#endif
+
+  arr.set_rate(getRate());
+  arr.set(inputDataArr);
+
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, zfp_mode_fixed_rate, GetParam(), &key1, &key2);
+  uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+
+  uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+}
+
+// assumes arr1 was given a dirty cache
+// this irreversibly changes arr1 (clears entries)
+void CheckDeepCopyPerformedViaDirtyCache(ZFP_ARRAY_TYPE& arr1, ZFP_ARRAY_TYPE& arr2, void* arr1UnflushedBitstreamPtr)
+{
+  // flush arr2 first, to ensure arr1 remains unflushed
+  uint64 checksum = hashBitstream((uint64*)arr2.compressed_data(), arr2.compressed_size());
+  uint64 arr1UnflushedChecksum = hashBitstream((uint64*)arr1UnflushedBitstreamPtr, arr1.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, arr1UnflushedChecksum, checksum);
+
+  // flush arr1, compute its checksum, clear its bitstream, re-compute arr2's checksum
+  uint64 expectedChecksum = hashBitstream((uint64*)arr1.compressed_data(), arr1.compressed_size());
+
+#if DIMS == 1
+  arr1.resize(arr1.size(), true);
+#elif DIMS == 2
+  arr1.resize(arr1.size_x(), arr1.size_y(), true);
+#elif DIMS == 3
+  arr1.resize(arr1.size_x(), arr1.size_y(), arr1.size_z(), true);
+#elif DIMS == 4
+  arr1.resize(arr1.size_x(), arr1.size_y(), arr1.size_z(), arr1.size_w(), true);
+#endif
+
+  checksum = hashBitstream((uint64*)arr2.compressed_data(), arr2.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+}
+
+// this irreversibly changes arr1 (resize + clears entries)
+void CheckMemberVarsCopied(ZFP_ARRAY_TYPE& arr1, const ZFP_ARRAY_TYPE& arr2, bool assertCacheSize)
+{
+  double oldRate = arr1.rate();
+  size_t oldCompressedSize = arr1.compressed_size();
+  size_t oldCacheSize = arr1.cache_size();
+
+#if DIMS == 1
+  size_t oldSizeX = arr1.size();
+
+  arr1.resize(oldSizeX - 10);
+#elif DIMS == 2
+  size_t oldSizeX = arr1.size_x();
+  size_t oldSizeY = arr1.size_y();
+
+  arr1.resize(oldSizeX - 10, oldSizeY - 5);
+#elif DIMS == 3
+  size_t oldSizeX = arr1.size_x();
+  size_t oldSizeY = arr1.size_y();
+  size_t oldSizeZ = arr1.size_z();
+
+  arr1.resize(oldSizeX - 10, oldSizeY - 5, oldSizeZ - 8);
+#elif DIMS == 4
+  size_t oldSizeX = arr1.size_x();
+  size_t oldSizeY = arr1.size_y();
+  size_t oldSizeZ = arr1.size_z();
+  size_t oldSizeW = arr1.size_w();
+
+  arr1.resize(oldSizeX - 10, oldSizeY - 5, oldSizeZ - 8, oldSizeW - 3);
+#endif
+
+  arr1.set_rate(oldRate + 10);
+  arr1.set(inputDataArr);
+  arr1.set_cache_size(oldCacheSize + 10);
+
+  EXPECT_EQ(oldRate, arr2.rate());
+  EXPECT_EQ(oldCompressedSize, arr2.compressed_size());
+  if (assertCacheSize)
+    EXPECT_EQ(oldCacheSize, arr2.cache_size());
+
+#if DIMS == 1
+  EXPECT_EQ(oldSizeX, arr2.size());
+#elif DIMS == 2
+  EXPECT_EQ(oldSizeX, arr2.size_x());
+  EXPECT_EQ(oldSizeY, arr2.size_y());
+#elif DIMS == 3
+  EXPECT_EQ(oldSizeX, arr2.size_x());
+  EXPECT_EQ(oldSizeY, arr2.size_y());
+  EXPECT_EQ(oldSizeZ, arr2.size_z());
+#elif DIMS == 4
+  EXPECT_EQ(oldSizeX, arr2.size_x());
+  EXPECT_EQ(oldSizeY, arr2.size_y());
+  EXPECT_EQ(oldSizeZ, arr2.size_z());
+  EXPECT_EQ(oldSizeW, arr2.size_w());
+#endif
+}
+
+TEST_P(TEST_FIXTURE, given_compressedArray_when_copyConstructor_then_memberVariablesCopied)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr, 128);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr, 128);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr, 128);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr, 128);
+#endif
+
+  ZFP_ARRAY_TYPE arr2(arr);
+
+  CheckMemberVarsCopied(arr, arr2, true);
+}
+
+TEST_P(TEST_FIXTURE, given_compressedArray_when_copyConstructor_then_deepCopyPerformed)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#endif
+
+  // create arr with dirty cache
+  void* arrUnflushedBitstreamPtr = arr.compressed_data();
+  arr[0] = 999999;
+
+  ZFP_ARRAY_TYPE arr2(arr);
+
+  CheckDeepCopyPerformedViaDirtyCache(arr, arr2, arrUnflushedBitstreamPtr);
+}
+
+TEST_P(TEST_FIXTURE, given_compressedArray_when_setSecondArrayEqualToFirst_then_memberVariablesCopied)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr, 128);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr, 128);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr, 128);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr, 128);
+#endif
+
+  ZFP_ARRAY_TYPE arr2 = arr;
+
+  CheckMemberVarsCopied(arr, arr2, true);
+}
+
+TEST_P(TEST_FIXTURE, given_compressedArray_when_setSecondArrayEqualToFirst_then_deepCopyPerformed)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#endif
+
+  // create arr with dirty cache
+  void* arrUnflushedBitstreamPtr = arr.compressed_data();
+  arr[0] = 999999;
+
+  ZFP_ARRAY_TYPE arr2 = arr;
+
+  CheckDeepCopyPerformedViaDirtyCache(arr, arr2, arrUnflushedBitstreamPtr);
+}
+
+void CheckHeadersEquivalent(const ZFP_ARRAY_TYPE& arr1, const ZFP_ARRAY_TYPE& arr2)
+{
+  ZFP_ARRAY_TYPE::header h[2] = { arr1, arr2 };
+
+  uint64 header1Checksum = hashBitstream((uint64*)(h[0].data()), BITS_TO_BYTES(ZFP_HEADER_SIZE_BITS));
+  uint64 header2Checksum = hashBitstream((uint64*)(h[1].data()), BITS_TO_BYTES(ZFP_HEADER_SIZE_BITS));
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, header1Checksum, header2Checksum);
+}
+
+// this clears arr1's entries
+void CheckDeepCopyPerformed(ZFP_ARRAY_TYPE& arr1, ZFP_ARRAY_TYPE& arr2)
+{
+  // flush arr1, compute its checksum, clear its bitstream, re-compute arr2's checksum
+  uint64 expectedChecksum = hashBitstream((uint64*)arr1.compressed_data(), arr1.compressed_size());
+
+#if DIMS == 1
+  arr1.resize(arr1.size(), true);
+#elif DIMS == 2
+  arr1.resize(arr1.size_x(), arr1.size_y(), true);
+#elif DIMS == 3
+  arr1.resize(arr1.size_x(), arr1.size_y(), arr1.size_z(), true);
+#elif DIMS == 4
+  arr1.resize(arr1.size_x(), arr1.size_y(), arr1.size_z(), arr1.size_w(), true);
+#endif
+
+  uint64 checksum = hashBitstream((uint64*)arr2.compressed_data(), arr2.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+}
+
+TEST_P(TEST_FIXTURE, given_serializedCompressedArray_when_constructorFromSerialized_then_constructedArrIsBasicallyADeepCopy)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#elif DIMS == 4
+  // max rate for short headers for 4D arrays
+  if (getRate() > 8)
+    return;
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, getRate(), inputDataArr);
+#endif
+
+  ZFP_ARRAY_TYPE::header h(arr);
+
+  ZFP_ARRAY_TYPE arr2(h, arr.compressed_data(), arr.compressed_size());
+
+  CheckHeadersEquivalent(arr, arr2);
+  CheckDeepCopyPerformed(arr, arr2);
+  // cache size not preserved
+  CheckMemberVarsCopied(arr, arr2, false);
+}
diff --git a/tests/array/array/testArrayItersBase.cpp b/tests/array/array/testArrayItersBase.cpp
new file mode 100644
index 00000000..8fde7e57
--- /dev/null
+++ b/tests/array/array/testArrayItersBase.cpp
@@ -0,0 +1,205 @@
+#include "gtest/gtest.h"
+
+// assumes macros ARRAY_DIMS_SCALAR_TEST, ARRAY_DIMS_SCALAR_TEST_ITERS defined
+class ARRAY_DIMS_SCALAR_TEST_ITERS : public ARRAY_DIMS_SCALAR_TEST {};
+
+const SCALAR VAL = (SCALAR) 4;
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_constructedIteratorWithBegin_then_initializedToFirstPosition)
+{
+  iter = arr.begin();
+
+  EXPECT_EQ(0u, ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(iter));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_dereferenceIterator_then_returnsReference)
+{
+  arr[0] = VAL;
+  iter = arr.begin();
+
+  EXPECT_EQ(VAL, *iter);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_postIncrementIterator_then_advancedAfterEval)
+{
+  arr[0] = VAL;
+  iter = arr.begin();
+
+  SCALAR d = *iter++;
+
+  EXPECT_EQ(VAL, d);
+  EXPECT_EQ(1u, ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(iter));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_constructedIteratorWithEnd_then_initializedAfterLastEntry)
+{
+  iter = arr.begin();
+  for (size_t i = 0; i < arr.size(); i++, iter++);
+
+  EXPECT_EQ(ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(iter), ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(arr.end()));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preIncrementInterator_then_matchIteratorOffsetFromBeginning)
+{
+  iter = iter2 = arr.begin();
+  for (size_t i = 0; iter != arr.end(); ++iter, ++i)
+    ASSERT_TRUE(iter == iter2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preDecrementInterator_then_matchIteratorOffsetFromEnd)
+{
+  iter = iter2 = arr.end();
+  ptrdiff_t i = 0;
+  do {
+    --iter;
+    --i;
+    ASSERT_TRUE(iter == iter2 + i);
+  } while (iter != arr.begin());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preIncrementIterator_then_advancedBeforeEval)
+{
+  arr[0] = VAL;
+  iter = arr.begin();
+
+  EXPECT_EQ(0, *++iter);
+  EXPECT_EQ(1u, ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(iter));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_iterator_when_setAnotherIteratorEqualToThat_then_newIterPointsToSame)
+{
+  arr[1] = VAL;
+  iter = arr.begin();
+
+  iter2 = iter;
+
+  EXPECT_EQ(ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(iter), ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(iter2));
+  EXPECT_EQ(VAL, *++iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexIterators_when_compareForEquality_then_resultTrue)
+{
+  iter = arr.begin()++;
+  iter2 = arr.begin()++;
+
+  EXPECT_TRUE(iter == iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_differentIndexIterators_when_compareForInequality_then_resultTrue)
+{
+  iter = arr.begin();
+  iter2 = arr.end();
+
+  EXPECT_TRUE(iter != iter2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_differentArrayIterators_when_compareForInequality_then_resultTrue)
+{
+  iter = arr.begin();
+  iter2 = arr2.begin();
+
+  EXPECT_TRUE(iter != iter2);
+}
+
+// const iterators
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_constructedConstIteratorWithBegin_then_initializedToFirstPosition)
+{
+  citer = arr.cbegin();
+
+  EXPECT_EQ(0u, ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(citer));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_dereferenceConstIterator_then_returnsReference)
+{
+  arr[0] = VAL;
+  citer = arr.cbegin();
+
+  EXPECT_EQ(VAL, *citer);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_postIncrementConstIterator_then_advancedAfterEval)
+{
+  arr[0] = VAL;
+  citer = arr.cbegin();
+
+  SCALAR d = *citer++;
+
+  EXPECT_EQ(VAL, d);
+  EXPECT_EQ(1u, ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(citer));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_constructedConstIteratorWithEnd_then_initializedAfterLastEntry)
+{
+  citer = arr.cbegin();
+  for (size_t i = 0; i < arr.size(); i++, citer++);
+
+  EXPECT_EQ(ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(citer), ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(arr.cend()));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preIncrementConstInterator_then_matchIteratorOffsetFromBeginning)
+{
+  citer = citer2 = arr.cbegin();
+  for (size_t i = 0; citer != arr.cend(); ++citer, ++i)
+    EXPECT_TRUE(citer == citer2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preDecrementConstInterator_then_matchIteratorOffsetFromEnd)
+{
+  citer = citer2 = arr.cend();
+  ptrdiff_t i = 0;
+  do {
+    --citer;
+    --i;
+    EXPECT_TRUE(citer == citer2 + i);
+  } while (citer != arr.cbegin());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, when_preIncrementConstIterator_then_advancedBeforeEval)
+{
+  arr[0] = VAL;
+  citer = arr.cbegin();
+
+  EXPECT_EQ(0, *++citer);
+  EXPECT_EQ(1u, ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(citer));
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_const_iterator_when_setAnotherConstIteratorEqualToThat_then_newIterPointsToSame)
+{
+  arr[1] = VAL;
+  citer = arr.cbegin();
+
+  citer2 = citer;
+
+  EXPECT_EQ(ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(citer), ARRAY_DIMS_SCALAR_TEST::IterAbsOffset(citer2));
+  EXPECT_EQ(VAL, *++citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_sameArrayAndIndexConstIterators_when_compareForEquality_then_resultTrue)
+{
+  citer = arr.cbegin()++;
+  citer2 = arr.cbegin()++;
+
+  EXPECT_TRUE(citer == citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_differentIndexConstIterators_when_compareForInequality_then_resultTrue)
+{
+  citer = arr.cbegin();
+  citer2 = arr.cend();
+
+  EXPECT_TRUE(citer != citer2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_ITERS, given_differentArrayConstIterators_when_compareForInequality_then_resultTrue)
+{
+  citer = arr.cbegin();
+  citer2 = arr2.cbegin();
+
+  EXPECT_TRUE(iter != iter2);
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArrayPtrsBase.cpp b/tests/array/array/testArrayPtrsBase.cpp
new file mode 100644
index 00000000..e737aa19
--- /dev/null
+++ b/tests/array/array/testArrayPtrsBase.cpp
@@ -0,0 +1,388 @@
+#include "gtest/gtest.h"
+
+// assumes macros ARRAY_DIMS_SCALAR_TEST, ARRAY_DIMS_SCALAR_TEST_PTRS defined
+class ARRAY_DIMS_SCALAR_TEST_PTRS : public ARRAY_DIMS_SCALAR_TEST {};
+
+const SCALAR VAL = (SCALAR) 4;
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_dereference_then_originalValueReturned)
+{
+  arr[0] = VAL;
+
+  SCALAR d = *(&arr[0]);
+
+  EXPECT_EQ(VAL, d);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_setAnotherPtrEqualToThat_then_newPtrPointsToSameVal)
+{
+  ptr = &arr[0];
+  ptr2 = ptr;
+
+  *ptr = VAL;
+
+  EXPECT_EQ(VAL, *ptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_postIncrement_then_ptrAdvancedAfterEval)
+{
+  arr[1] = VAL;
+
+  ptr = &arr[0];
+  SCALAR d = *ptr++;
+
+  EXPECT_EQ(0, d);
+  EXPECT_EQ(VAL, *ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_postDecrement_then_ptrAdvancedAfterEval)
+{
+  arr[0] = VAL;
+
+  ptr = &arr[1];
+  SCALAR d = *ptr--;
+
+  EXPECT_EQ(0, d);
+  EXPECT_EQ(VAL, *ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, when_preIncrementPointer_then_matchPointerOffsetFromBeginning)
+{
+  ptr = ptr2 = &arr[0];
+  for (size_t i = 0; i != arr.size(); ++i, ++ptr)
+    EXPECT_TRUE(ptr == ptr2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, when_preDecrementPointer_then_matchPointerOffsetFromEnd)
+{
+  ptr = ptr2 = &arr[arr.size() - 1];
+  for (size_t i = 0; i != arr.size(); ++i, --ptr)
+    EXPECT_TRUE(ptr == ptr2 - i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_preIncrement_then_ptrAdvancedBeforeEval)
+{
+  arr[1] = VAL;
+
+  ptr = &arr[0];
+
+  EXPECT_EQ(VAL, *++ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_preDecrement_then_ptrAdvancedBeforeEval)
+{
+  arr[0] = VAL;
+
+  ptr = &arr[1];
+
+  EXPECT_EQ(VAL, *--ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_addToPointer_then_ptrAdvanced)
+{
+  arr[2] = VAL;
+
+  ptr = &arr[0];
+  ptr = ptr + 2;
+
+  EXPECT_EQ(VAL, *ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_advanceUsingBrackets_then_returnsReferenceAtAdvancedLocation)
+{
+  arr[2] = VAL;
+
+  ptr = &arr[0];
+
+  EXPECT_EQ(VAL, ptr[2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_pointerPlusEquals_then_ptrAdvanced)
+{
+  arr[2] = VAL;
+
+  ptr = &arr[0];
+  ptr += 2;
+
+  EXPECT_EQ(VAL, *ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_subtractFromPointer_then_ptrMovedBack)
+{
+  arr[0] = VAL;
+
+  ptr = &arr[2];
+  ptr = ptr - 2;
+
+  EXPECT_EQ(VAL, *ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointer_when_pointerMinusEquals_then_ptrMovedBack)
+{
+  arr[0] = VAL;
+
+  ptr = &arr[2];
+  ptr -= 2;
+
+  EXPECT_EQ(VAL, *ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryPointers_when_subtractPointers_then_resultIsEntryDifference)
+{
+  int i2 = 2;
+  int i = 0;
+  ptr2 = &arr[i2];
+  ptr = &arr[i];
+
+  EXPECT_EQ(i2 - i, ptr2 - ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_sameEntryPointers_when_compareForEquality_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i] + 2;
+  ptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(ptr == ptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_differentEntryPointers_when_compareForInequality_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i];
+  ptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(ptr != ptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_differentArrayPointers_when_compareForInequality_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i];
+  ptr2 = &arr2[i];
+
+  EXPECT_TRUE(ptr != ptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_increasingEntryPointers_when_compareForLessThan_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i];
+  ptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(ptr < ptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_increasingEntryPointers_when_compareForLessThanOrEqual_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i];
+  ptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(ptr <= ptr);
+  EXPECT_TRUE(ptr <= ptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_decreasingEntryPointers_when_compareForGreaterThan_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i];
+  ptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(ptr2 > ptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_decreasingEntryPointers_when_compareForGreaterThanOrEqual_then_resultTrue)
+{
+  int i = 0;
+  ptr = &arr[i];
+  ptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(ptr >= ptr);
+  EXPECT_TRUE(ptr2 >= ptr);
+}
+
+// const pointers
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_postIncrement_then_ptrAdvancedAfterEval)
+{
+  arr[1] = VAL;
+
+  cptr = &arr[0];
+  SCALAR d = *cptr++;
+
+  EXPECT_EQ(0, d);
+  EXPECT_EQ(VAL, *cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_postDecrement_then_ptrAdvancedAfterEval)
+{
+  arr[0] = VAL;
+
+  cptr = &arr[1];
+  SCALAR d = *cptr--;
+
+  EXPECT_EQ(0, d);
+  EXPECT_EQ(VAL, *cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, when_preIncrementConstPointer_then_matchPointerOffsetFromBeginning)
+{
+  cptr = cptr2 = &arr[0];
+  for (size_t i = 0; i != arr.size(); ++i, ++cptr)
+    EXPECT_TRUE(cptr == cptr2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, when_preDecrementConstPointer_then_matchPointerOffsetFromEnd)
+{
+  cptr = cptr2 = &arr[arr.size() - 1];
+  for (size_t i = 0; i != arr.size(); ++i, --cptr)
+    EXPECT_TRUE(cptr == cptr2 - i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_preIncrement_then_ptrAdvancedBeforeEval)
+{
+  arr[1] = VAL;
+
+  cptr = &arr[0];
+
+  EXPECT_EQ(VAL, *++cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_preDecrement_then_ptrAdvancedBeforeEval)
+{
+  arr[0] = VAL;
+
+  cptr = &arr[1];
+
+  EXPECT_EQ(VAL, *--cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_addToPointer_then_ptrAdvanced)
+{
+  arr[2] = VAL;
+
+  cptr = &arr[0];
+  cptr = ptr + 2;
+
+  EXPECT_EQ(VAL, *cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_advanceUsingBrackets_then_returnsReferenceAtAdvancedLocation)
+{
+  arr[2] = VAL;
+
+  cptr = &arr[0];
+
+  EXPECT_EQ(VAL, cptr[2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_pointerPlusEquals_then_ptrAdvanced)
+{
+  arr[2] = VAL;
+
+  cptr = &arr[0];
+  cptr += 2;
+
+  EXPECT_EQ(VAL, *cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_subtractFromPointer_then_ptrMovedBack)
+{
+  arr[0] = VAL;
+
+  cptr = &arr[2];
+  cptr = cptr - 2;
+
+  EXPECT_EQ(VAL, *cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointer_when_pointerMinusEquals_then_ptrMovedBack)
+{
+  arr[0] = VAL;
+
+  cptr = &arr[2];
+  cptr -= 2;
+
+  EXPECT_EQ(VAL, *cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_entryConstPointers_when_subtractPointers_then_resultIsEntryDifference)
+{
+  int i2 = 2;
+  int i = 0;
+  cptr2 = &arr[i2];
+  cptr = &arr[i];
+
+  EXPECT_EQ(i2 - i, cptr2 - cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_sameEntryConstPointers_when_compareForEquality_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i] + 2;
+  cptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(cptr == cptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_differentEntryConstPointers_when_compareForInequality_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i];
+  cptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(cptr != cptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_differentArrayConstPointers_when_compareForInequality_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i];
+  cptr2 = &arr2[i];
+
+  EXPECT_TRUE(cptr != cptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_increasingEntryConstPointers_when_compareForLessThan_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i];
+  cptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(cptr < cptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_increasingEntryConstPointers_when_compareForLessThanOrEqual_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i];
+  cptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(cptr <= cptr);
+  EXPECT_TRUE(cptr <= cptr2);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_decreasingEntryConstPointers_when_compareForGreaterThan_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i];
+  cptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(cptr2 > cptr);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_PTRS, given_decreasingEntryConstPointers_when_compareForGreaterThanOrEqual_then_resultTrue)
+{
+  int i = 0;
+  cptr = &arr[i];
+  cptr2 = &arr[i + 2];
+
+  EXPECT_TRUE(cptr >= cptr);
+  EXPECT_TRUE(cptr2 >= cptr);
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArrayRefsBase.cpp b/tests/array/array/testArrayRefsBase.cpp
new file mode 100644
index 00000000..3d1ae5f5
--- /dev/null
+++ b/tests/array/array/testArrayRefsBase.cpp
@@ -0,0 +1,133 @@
+#include "gtest/gtest.h"
+#include "utils/predicates.h"
+
+extern "C" {
+  #include "utils/zfpHash.h"
+}
+
+// assumes macros ARRAY_DIMS_SCALAR_TEST, ARRAY_DIMS_SCALAR_TEST_REFS defined
+class ARRAY_DIMS_SCALAR_TEST_REFS : public ARRAY_DIMS_SCALAR_TEST {};
+
+const SCALAR VAL = (SCALAR) 4;
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_setEntryWithEquals_then_entrySetInCacheOnly)
+{
+  // compressed_data() automatically flushes cache, so call it before setting entries
+  void* bitstreamPtr = arr.compressed_data();
+  size_t bitstreamLen = arr.compressed_size();
+  uint64 initializedChecksum = hashBitstream((uint64*)bitstreamPtr, bitstreamLen);
+
+  arr[0] = VAL;
+  uint64 checksum = hashBitstream((uint64*)bitstreamPtr, bitstreamLen);
+
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, initializedChecksum, checksum);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_dirtyCacheEntries_when_clearCache_then_cacheCleared)
+{
+  uint64 initializedChecksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+  arr[0] = VAL;
+
+  arr.clear_cache();
+
+  uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+  EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, initializedChecksum, checksum);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, given_setEntryWithEquals_when_flushCache_then_entryWrittenToBitstream)
+{
+  uint64 initializedChecksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+
+  arr[0] = VAL;
+  uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+
+  EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, initializedChecksum, checksum);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_setCacheSize_then_properlySet)
+{
+  size_t oldSize = arr.cache_size();
+
+  size_t newSize = oldSize * 2;
+  arr.set_cache_size(newSize);
+
+  EXPECT_EQ(newSize, arr.cache_size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_getIndexWithBrackets_then_refReturned)
+{
+  size_t i = 0;
+  arr[i] = VAL;
+
+  EXPECT_EQ(VAL, arr[i]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_setEntryWithAnotherEntryValue_then_valueSet)
+{
+  size_t i = 0, i2 = 1;
+  arr[i] = VAL;
+
+  arr[i2] = arr[i];
+
+  EXPECT_EQ(VAL, arr[i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_plusEqualsOnEntry_then_valueSet)
+{
+  size_t i = 0, i2 = 1;
+  arr[i] = VAL;
+  arr[i2] = VAL;
+
+  arr[i2] += arr[i];
+
+  EXPECT_EQ(2 * VAL, arr[i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_minusEqualsOnEntry_then_valueSet)
+{
+  size_t i = 0, i2 = 1;
+  arr[i] = VAL;
+  arr[i2] = VAL;
+
+  arr[i2] -= arr[i];
+
+  EXPECT_EQ(0, arr[i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_timesEqualsOnEntry_then_valueSet)
+{
+  size_t i = 0, i2 = 1;
+  arr[i] = VAL;
+  arr[i2] = VAL;
+
+  arr[i2] *= arr[i];
+
+  EXPECT_EQ(VAL * VAL, arr[i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_divideEqualsOnEntry_then_valueSet)
+{
+  size_t i = 0, i2 = 1;
+  arr[i] = VAL;
+  arr[i2] = VAL;
+
+  arr[i2] /= arr[i];
+
+  EXPECT_EQ(1, arr[i2]);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_REFS, when_swapTwoEntries_then_valuesSwapped)
+{
+  size_t i = 0, i2 = 1;
+  arr[i] = VAL;
+
+  swap(arr[i], arr[i2]);
+
+  EXPECT_EQ(0, arr[i]);
+  EXPECT_EQ(VAL, arr[i2]);
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArrayViewItersBase.cpp b/tests/array/array/testArrayViewItersBase.cpp
new file mode 100644
index 00000000..02de1a19
--- /dev/null
+++ b/tests/array/array/testArrayViewItersBase.cpp
@@ -0,0 +1,98 @@
+#include "gtest/gtest.h"
+#include "utils/predicates.h"
+
+// assumes macros ARRAY_DIMS_SCALAR_TEST, ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS defined
+class ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS : public ARRAY_DIMS_SCALAR_TEST {};
+
+// views
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS, when_preIncrementInterator_then_matchIteratorOffsetFromBeginning)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+#endif
+
+  ZFP_ARRAY_TYPE::view::iterator iter = v.begin();
+  ZFP_ARRAY_TYPE::view::iterator iter2 = iter;
+
+  for (size_t i = 0; iter != v.end(); ++iter, ++i)
+    EXPECT_TRUE(iter == iter2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS, when_preDecrementInterator_then_matchIteratorOffsetFromEnd)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+#endif
+
+  ZFP_ARRAY_TYPE::view::iterator iter = v.end();
+  ZFP_ARRAY_TYPE::view::iterator iter2 = iter;
+
+  ptrdiff_t i = 0;
+  do {
+    --iter;
+    --i;
+    EXPECT_TRUE(iter == iter2 + i);
+  } while (iter != v.begin());
+}
+
+// const views
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS, when_preIncrementConstInterator_then_matchIteratorOffsetFromBeginning)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+#endif
+
+  ZFP_ARRAY_TYPE::const_view::const_iterator iter = v.begin();
+  ZFP_ARRAY_TYPE::const_view::const_iterator iter2 = iter;
+
+  for (size_t i = 0; iter != v.end(); ++iter, ++i)
+    EXPECT_TRUE(iter == iter2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_ITERS, when_preDecrementConstInterator_then_matchIteratorOffsetFromEnd)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+#endif
+
+  ZFP_ARRAY_TYPE::const_view::const_iterator iter = v.end();
+  ZFP_ARRAY_TYPE::const_view::const_iterator iter2 = iter;
+
+  ptrdiff_t i = 0;
+  do {
+    --iter;
+    --i;
+    EXPECT_TRUE(iter == iter2 + i);
+  } while (iter != v.begin());
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArrayViewPtrsBase.cpp b/tests/array/array/testArrayViewPtrsBase.cpp
new file mode 100644
index 00000000..c4d602a2
--- /dev/null
+++ b/tests/array/array/testArrayViewPtrsBase.cpp
@@ -0,0 +1,102 @@
+#include "gtest/gtest.h"
+#include "utils/predicates.h"
+
+// assumes macros ARRAY_DIMS_SCALAR_TEST, ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS defined
+class ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS : public ARRAY_DIMS_SCALAR_TEST {};
+
+// views
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS, when_preIncrementInterator_then_matchPointerOffsetFromBeginning)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(0);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(0, 0);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(0, 0, 0);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(0, 0, 0, 0);
+#endif
+
+  ZFP_ARRAY_TYPE::view::pointer ptr2 = ptr;
+
+  for (size_t i = 0; i != v.size(); ++i, ++ptr)
+    EXPECT_TRUE(ptr == ptr2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS, when_preDecrementInterator_then_matchPointerOffsetFromEnd)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::view v(&arr, offset, viewLen);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(viewLen - 1);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(viewLenX - 1, viewLenY - 1);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(viewLenX - 1, viewLenY - 1, viewLenZ - 1);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::view::pointer ptr = &v(viewLenX - 1, viewLenY - 1, viewLenZ - 1, viewLenW - 1);
+#endif
+
+  ZFP_ARRAY_TYPE::view::pointer ptr2 = ptr;
+
+  for (size_t i = 0; i != v.size(); ++i, --ptr)
+    EXPECT_TRUE(ptr == ptr2 - i);
+}
+
+// const views
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS, when_preIncrementConstInterator_then_matchPointerOffsetFromBeginning)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(0);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(0, 0);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(0, 0, 0);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(0, 0, 0, 0);
+#endif
+
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr2 = ptr;
+
+  for (size_t i = 0; i != v.size(); ++i, ++ptr)
+    EXPECT_TRUE(ptr == ptr2 + i);
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEW_PTRS, when_preDecrementConstInterator_then_matchPointerOffsetFromEnd)
+{
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::const_view v(&arr, offset, viewLen);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(viewLen - 1);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(viewLenX - 1, viewLenY - 1);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(viewLenX - 1, viewLenY - 1, viewLenZ - 1);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr = &v(viewLenX - 1, viewLenY - 1, viewLenZ - 1, viewLenW - 1);
+#endif
+
+  ZFP_ARRAY_TYPE::const_view::const_pointer ptr2 = ptr;
+
+  for (size_t i = 0; i != v.size(); ++i, --ptr)
+    EXPECT_TRUE(ptr == ptr2 - i);
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testArrayViewsBase.cpp b/tests/array/array/testArrayViewsBase.cpp
new file mode 100644
index 00000000..ef31b7c3
--- /dev/null
+++ b/tests/array/array/testArrayViewsBase.cpp
@@ -0,0 +1,598 @@
+#include "gtest/gtest.h"
+#include "utils/predicates.h"
+
+// assumes macros ARRAY_DIMS_SCALAR_TEST, ARRAY_DIMS_SCALAR_TEST_VIEWS defined
+class ARRAY_DIMS_SCALAR_TEST_VIEWS : public ARRAY_DIMS_SCALAR_TEST {};
+
+/* preview, through const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_constView_when_rate_then_rateReturned)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr);
+  EXPECT_EQ(arr.rate(), v.rate());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_previewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr);
+
+  EXPECT_EQ(arr.size(), v.size());
+
+  EXPECT_EQ(arr.size_x(), v.size_x());
+  EXPECT_EQ(0, v.global_x(0));
+
+#if DIMS >= 2
+  EXPECT_EQ(arr.size_y(), v.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+#endif
+
+#if DIMS >= 3
+  EXPECT_EQ(arr.size_z(), v.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(arr.size_w(), v.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+/* const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_constViewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr);
+
+  EXPECT_EQ(arr.size(), v.size());
+
+  EXPECT_EQ(arr.size_x(), v.size_x());
+  EXPECT_EQ(0, v.global_x(0));
+
+#if DIMS >= 2
+  EXPECT_EQ(arr.size_y(), v.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+#endif
+
+#if DIMS >= 3
+  EXPECT_EQ(arr.size_z(), v.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(arr.size_w(), v.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_constViewMinConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::const_view v(&arr);
+  size_t i = 0;
+  SCALAR val;
+  size_t arrOffset = i;
+#if DIMS >= 2
+  size_t j = 0;
+  arrOffset += j*arr.size_x();
+#endif
+#if DIMS >= 3
+  size_t k = 0;
+  arrOffset = k*arr.size_x()*arr.size_y();
+#endif
+#if DIMS >= 4
+  size_t l = 0;
+  arrOffset = l*arr.size_x()*arr.size_y()*arr.size_z();
+#endif
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+
+  SCALAR oldVal = arr[arrOffset];
+  EXPECT_EQ(oldVal, val);
+
+  arr[arrOffset] += 1;
+  SCALAR newVal = arr[arrOffset];
+  EXPECT_NE(oldVal, newVal);
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+
+  EXPECT_EQ(newVal, val);
+}
+
+/* view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::view v(&arr);
+
+  EXPECT_EQ(arr.size(), v.size());
+
+  EXPECT_EQ(arr.size_x(), v.size_x());
+  EXPECT_EQ(0, v.global_x(0));
+
+#if DIMS >= 2
+  EXPECT_EQ(arr.size_y(), v.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+#endif
+
+#if DIMS >= 3
+  EXPECT_EQ(arr.size_z(), v.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(arr.size_w(), v.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_viewMinConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::view v(&arr);
+  size_t i = 0;
+  SCALAR val;
+  size_t arrOffset = i;
+
+#if DIMS >= 2
+  size_t j = 0;
+  arrOffset += j*arr.size_x();
+#endif
+#if DIMS >= 3
+  size_t k = 0;
+  arrOffset += k*arr.size_x()*arr.size_y();
+#endif
+#if DIMS >= 4
+  size_t l = 0;
+  arrOffset += l*arr.size_x()*arr.size_y()*arr.size_z();
+#endif
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+
+  SCALAR oldVal = arr[arrOffset];
+  EXPECT_EQ(oldVal, val);
+
+  arr[arrOffset] += 1;
+  SCALAR newVal = arr[arrOffset];
+  EXPECT_NE(oldVal, newVal);
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+  EXPECT_EQ(newVal, val);
+}
+
+#if DIMS >= 2
+/* flat_view (only in 2D, 3D, 4D) */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr);
+
+  EXPECT_EQ(arr.size(), v.size());
+
+  EXPECT_EQ(arr.size_x(), v.size_x());
+  EXPECT_EQ(0, v.global_x(0));
+
+#if DIMS >= 2
+  EXPECT_EQ(arr.size_y(), v.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+#endif
+
+#if DIMS >= 3
+  EXPECT_EQ(arr.size_z(), v.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(arr.size_w(), v.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_flatViewMinConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::flat_view v(&arr);
+  size_t i = 0;
+  SCALAR val;
+  size_t arrOffset = i;
+
+#if DIMS >= 2
+  size_t j = 0;
+  arrOffset += j*arr.size_x();
+#endif
+#if DIMS >= 3
+  size_t k = 0;
+  arrOffset += k*arr.size_x()*arr.size_y();
+#endif
+#if DIMS >= 4
+  size_t l = 0;
+  arrOffset += l*arr.size_x()*arr.size_y()*arr.size_z();
+#endif
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+
+  SCALAR oldVal = arr[arrOffset];
+  EXPECT_EQ(oldVal, val);
+
+  arr[arrOffset] += 1;
+  SCALAR newVal = arr[arrOffset];
+  EXPECT_NE(oldVal, newVal);
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+  EXPECT_EQ(newVal, val);
+}
+
+/* nested_view (only in 2D, 3D, 4D) */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_nestedViewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr);
+
+  EXPECT_EQ(arr.size(), v.size());
+
+  EXPECT_EQ(arr.size_x(), v.size_x());
+  EXPECT_EQ(0, v.global_x(0));
+
+  EXPECT_EQ(arr.size_y(), v.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+
+#if DIMS >= 3
+  EXPECT_EQ(arr.size_z(), v.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(arr.size_w(), v.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_nestedViewMinConstructor_then_isShallowCopyOfCompressedArray)
+{
+  ZFP_ARRAY_TYPE::nested_view v(&arr);
+  size_t i = 0;
+  SCALAR val;
+  size_t arrOffset = i;
+
+  size_t j = 0;
+  arrOffset += j*arr.size_x();
+#if DIMS >= 3
+  size_t k = 0;
+  arrOffset += k*arr.size_x()*arr.size_y();
+#endif
+#if DIMS >= 4
+  size_t l = 0;
+  arrOffset += l*arr.size_x()*arr.size_y()*arr.size_w();
+#endif
+
+#if DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+
+  SCALAR oldVal = arr[arrOffset];
+  EXPECT_EQ(oldVal, val);
+
+  arr[arrOffset] += 1;
+  SCALAR newVal = arr[arrOffset];
+  EXPECT_NE(oldVal, newVal);
+
+#if DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+  EXPECT_EQ(newVal, val);
+}
+#endif
+
+/* private_const_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::private_const_view v(&arr);
+
+  EXPECT_EQ(v.size(), arr.size());
+
+  EXPECT_EQ(v.size_x(), arr.size_x());
+  EXPECT_EQ(v.global_x(0), 0);
+
+#if DIMS >= 2
+  EXPECT_EQ(v.size_y(), arr.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+#endif
+
+#if DIMS >= 3
+  EXPECT_EQ(v.size_z(), arr.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(v.size_w(), arr.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewMinConstructor_then_cacheSizeEqualToArrayCacheSize)
+{
+  arr.set_cache_size(999);
+  size_t cacheSize = arr.cache_size();
+
+  ZFP_ARRAY_TYPE::private_const_view v(&arr);
+  EXPECT_EQ(cacheSize, v.cache_size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateConstViewFullConstructor_then_cacheSizeEqualToArrayCacheSize)
+{
+  arr.set_cache_size(999);
+  size_t cacheSize = arr.cache_size();
+
+  size_t offsetX = 5, viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+#if DIMS >= 2
+  size_t offsetY = 1, viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+#endif
+#if DIMS >= 3
+  size_t offsetZ = 0, viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+#endif
+#if DIMS >= 4
+  size_t offsetW = 1, viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+#endif
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, viewLenX);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::private_const_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+#endif
+
+  EXPECT_EQ(cacheSize, v.cache_size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateConstView_when_setCacheSize_then_isSet)
+{
+  arr.set_cache_size(4096);
+  ZFP_ARRAY_TYPE::private_const_view v(&arr);
+  size_t cacheSize = v.cache_size();
+
+  v.set_cache_size(cacheSize / 2);
+  EXPECT_NE(cacheSize, v.cache_size());
+}
+
+/* this also verifies underlying array is shallow copy */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateConstViewWithDirtyCache_when_clearCache_then_entriesCleared)
+{
+  SCALAR val = 3.3;
+  size_t i = 2;
+  size_t arrOffset = i;
+
+#if DIMS >= 2
+  size_t j = 1;
+  arrOffset += j*arr.size_x();
+#endif
+#if DIMS >= 3
+  size_t k = 1;
+  arrOffset += k*arr.size_x()*arr.size_y();
+#endif
+#if DIMS >= 4
+  size_t l = 1;
+  arrOffset += l*arr.size_x()*arr.size_y()*arr.size_z();
+#endif
+
+  arr[arrOffset] = val;
+  arr.flush_cache();
+
+  /* has its own cache */
+  ZFP_ARRAY_TYPE::private_const_view v(&arr);
+
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+  EXPECT_EQ(arr[arrOffset], val);
+
+  /* accessing v() fetched block into view-cache */
+  arr[arrOffset] = 0;
+  arr.flush_cache();
+  /* block already in view-cache, not fetched from mem */
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+  EXPECT_NE(arr[arrOffset], val);
+
+  /* re-loading the block has updated value */
+  v.clear_cache();
+#if DIMS == 1
+  val = v(i);
+#elif DIMS == 2
+  val = v(i, j);
+#elif DIMS == 3
+  val = v(i, j, k);
+#elif DIMS == 4
+  val = v(i, j, k, l);
+#endif
+  EXPECT_EQ(arr[arrOffset], val);
+}
+
+/* private_view */
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewMinConstructor_then_spansEntireArray)
+{
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+
+  EXPECT_EQ(v.size(), arr.size());
+
+  EXPECT_EQ(v.size_x(), arr.size_x());
+  EXPECT_EQ(0, v.global_x(0));
+
+#if DIMS >= 2
+  EXPECT_EQ(v.size_y(), arr.size_y());
+  EXPECT_EQ(0, v.global_y(0));
+#endif
+
+#if DIMS >= 3
+  EXPECT_EQ(v.size_z(), arr.size_z());
+  EXPECT_EQ(0, v.global_z(0));
+#endif
+
+#if DIMS >= 4
+  EXPECT_EQ(v.size_w(), arr.size_w());
+  EXPECT_EQ(0, v.global_w(0));
+#endif
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewMinConstructor_then_cacheSizeEqualToArrayCacheSize)
+{
+  arr.set_cache_size(999);
+  size_t cacheSize = arr.cache_size();
+
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+  EXPECT_EQ(cacheSize, v.cache_size());
+}
+
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, when_privateViewFullConstructor_then_cacheSizeEqualToArrayCacheSize)
+{
+  arr.set_cache_size(999);
+  size_t cacheSize = arr.cache_size();
+
+  size_t offsetX = 5, viewLenX = 3;
+  EXPECT_LT(offsetX + viewLenX, arr.size_x());
+#if DIMS >= 2
+  size_t offsetY = 1, viewLenY = 3;
+  EXPECT_LT(offsetY + viewLenY, arr.size_y());
+#endif
+#if DIMS >= 3
+  size_t offsetZ = 0, viewLenZ = 2;
+  EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+#endif
+#if DIMS >= 4
+  size_t offsetW = 1, viewLenW = 4;
+  EXPECT_LT(offsetW + viewLenW, arr.size_w());
+#endif
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, viewLenX);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, viewLenX, viewLenY);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, viewLenX, viewLenY, viewLenZ);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE::private_view v(&arr, offsetX, offsetY, offsetZ, offsetW, viewLenX, viewLenY, viewLenZ, viewLenW);
+#endif
+
+  EXPECT_EQ(cacheSize, v.cache_size());
+}
+
+/* this also verifies underlying array is shallow copy */
+TEST_F(ARRAY_DIMS_SCALAR_TEST_VIEWS, given_privateViewWithDirtyCache_when_flushCache_thenValuesPersistedToArray)
+{
+  SCALAR val = 5.5;
+  const size_t i = 3;
+  size_t arrOffset = i;
+
+#if DIMS >= 2
+  size_t j = 1;
+  arrOffset += j*arr.size_x();
+#endif
+#if DIMS >= 3
+  size_t k = 1;
+  arrOffset += k*arr.size_x()*arr.size_y();
+#endif
+#if DIMS >= 4
+  size_t l = 1;
+  arrOffset += l*arr.size_x()*arr.size_y()*arr.size_z();
+#endif
+
+  /* has its own cache */
+  ZFP_ARRAY_TYPE::private_view v(&arr);
+
+#if DIMS == 1
+  v(i) = val;
+#elif DIMS == 2
+  v(i, j) = val;
+#elif DIMS == 3
+  v(i, j, k) = val;
+#elif DIMS == 4
+  v(i, j, k, l) = val;
+#endif
+  EXPECT_NE(val, arr[arrOffset]);
+
+  /* setting and accessing v() and arr[] fetched blocks into both caches */
+  v.flush_cache();
+  EXPECT_NE(val, arr[arrOffset]);
+
+  /* force arr to re-decode block from mem */
+  arr.clear_cache();
+  EXPECT_EQ(val, arr[arrOffset]);
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/array/testConstruct.cpp b/tests/array/array/testConstruct.cpp
new file mode 100644
index 00000000..acf153f7
--- /dev/null
+++ b/tests/array/array/testConstruct.cpp
@@ -0,0 +1,140 @@
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+#include "gtest/gtest.h"
+#include "utils/gtestTestEnv.h"
+#include "utils/gtestCApiTest.h"
+#define TEST_FIXTURE ZfpArrayConstructTest
+
+TestEnv* const testEnv = new TestEnv;
+
+// this file tests exceptions thrown from zfp::array::construct() that cannot be
+// generalized and run on every {1/2/3 x f/d} combination, or need not be run
+// multiple times
+
+void FailWhenNoExceptionThrown()
+{
+  FAIL() << "No exception was thrown when one was expected";
+}
+
+void FailAndPrintException(std::exception const & e)
+{
+  FAIL() << "Unexpected exception thrown: " << typeid(e).name() << std::endl << "With message: " << e.what();
+}
+
+TEST_F(TEST_FIXTURE, given_zfpHeaderForIntegerData_when_construct_expect_zfpArrayHeaderExceptionThrown)
+{
+  zfp_type zfpType = zfp_type_int32;
+
+  zfp_stream_set_rate(stream, 16, zfpType, 2, zfp_true);
+
+  zfp_field_set_type(field, zfpType);
+  zfp_field_set_size_2d(field, 12, 12);
+
+  // write header to buffer with C API
+  zfp_stream_rewind(stream);
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, zfp_write_header(stream, field, ZFP_HEADER_FULL));
+  zfp_stream_flush(stream);
+
+  zfp::codec::zfp2<double>::header h(buffer);
+
+  try {
+    zfp::array* arr = zfp::array::construct(h);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp scalar type not supported"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_onlyInclude2D3D_and_zfpHeaderFor1D_when_construct_expect_zfpArrayHeaderExceptionThrown)
+{
+  zfp_type zfpType = zfp_type_float;
+
+  zfp_stream_set_rate(stream, 12, zfpType, 1, zfp_true);
+
+  zfp_field_set_type(field, zfpType);
+  zfp_field_set_size_1d(field, 12);
+
+  // write header to buffer with C API
+  zfp_stream_rewind(stream);
+  EXPECT_EQ(ZFP_HEADER_SIZE_BITS, zfp_write_header(stream, field, ZFP_HEADER_FULL));
+  zfp_stream_flush(stream);
+
+  zfp::codec::zfp1<float>::header h(buffer);
+
+  try {
+    zfp::array* arr = zfp::array::construct(h);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("array1 not supported; include zfp/array1.hpp before zfp/factory.hpp"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_validHeaderBuffer_withBufferSizeTooLow_when_construct_expect_zfpArrayHeaderExceptionThrown)
+{
+  zfp::array3d arr(12, 12, 12, 32);
+
+  zfp::array3d::header h(arr);
+
+  try {
+    zfp::array* arr2 = zfp::array::construct(h, arr.compressed_data(), 1);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp buffer size is smaller than required"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, given_compressedArrayWithLongHeader_when_writeHeader_expect_zfpArrayHeaderExceptionThrown)
+{
+  zfp::array3d arr(12, 12, 12, 33);
+
+  try {
+    zfp::array3d::header h(arr);
+    FailWhenNoExceptionThrown();
+  } catch (zfp::exception const & e) {
+    EXPECT_EQ(e.what(), std::string("zfp serialization supports only short headers"));
+  } catch (std::exception const & e) {
+    FailAndPrintException(e);
+  }
+}
+
+TEST_F(TEST_FIXTURE, when_headerFrom2DArray_expect_MatchingMetadata)
+{
+  zfp::array2d arr(7, 8, 32);
+
+  zfp::array2d::header h(arr);
+
+  EXPECT_EQ(h.scalar_type(), arr.scalar_type());
+  EXPECT_EQ(h.rate(), arr.rate());
+  EXPECT_EQ(h.dimensionality(), arr.dimensionality());
+  EXPECT_EQ(h.size_x(), arr.size_x());
+  EXPECT_EQ(h.size_y(), arr.size_y());
+}
+
+TEST_F(TEST_FIXTURE, when_headerFrom3DArray_expect_MatchingMetadata)
+{
+  zfp::array3d arr(7, 8, 9, 32);
+
+  zfp::array3d::header h(arr);
+
+  EXPECT_EQ(h.scalar_type(), arr.scalar_type());
+  EXPECT_EQ(h.rate(), arr.rate());
+  EXPECT_EQ(h.dimensionality(), arr.dimensionality());
+  EXPECT_EQ(h.size_x(), arr.size_x());
+  EXPECT_EQ(h.size_y(), arr.size_y());
+  EXPECT_EQ(h.size_z(), arr.size_z());
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/CMakeLists.txt b/tests/array/constArray/CMakeLists.txt
new file mode 100644
index 00000000..126cb082
--- /dev/null
+++ b/tests/array/constArray/CMakeLists.txt
@@ -0,0 +1,20 @@
+function(zfp_add_cpp_tests dims type bits)
+
+  # test compressed const array class
+  set(test_name testConstArray${dims}${type})
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp zfpHashLib genSmoothRandNumsLib zfpChecksumsLib)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+endfunction()
+
+zfp_add_cpp_tests(1 f 32)
+zfp_add_cpp_tests(2 f 32)
+zfp_add_cpp_tests(3 f 32)
+zfp_add_cpp_tests(4 f 32)
+zfp_add_cpp_tests(1 d 64)
+zfp_add_cpp_tests(2 d 64)
+zfp_add_cpp_tests(3 d 64)
+zfp_add_cpp_tests(4 d 64)
diff --git a/tests/array/constArray/testConstArray1Base.cpp b/tests/array/constArray/testConstArray1Base.cpp
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/tests/array/constArray/testConstArray1Base.cpp
@@ -0,0 +1 @@
+
diff --git a/tests/array/constArray/testConstArray1d.cpp b/tests/array/constArray/testConstArray1d.cpp
new file mode 100644
index 00000000..d1e8edd2
--- /dev/null
+++ b/tests/array/constArray/testConstArray1d.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray1dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 1; }
+};
+
+CArray1dTestEnv* const testEnv = new CArray1dTestEnv;
+
+class CArray1dTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray1dTest
+
+#define ZFP_ARRAY_TYPE const_array1d
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array1<double, zfp::codec::zfp1<double>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array1f
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array2d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array2f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array3d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 1
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray1Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray1f.cpp b/tests/array/constArray/testConstArray1f.cpp
new file mode 100644
index 00000000..321d5d20
--- /dev/null
+++ b/tests/array/constArray/testConstArray1f.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray1fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 1; }
+};
+
+CArray1fTestEnv* const testEnv = new CArray1fTestEnv;
+
+class CArray1fTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray1fTest
+
+#define ZFP_ARRAY_TYPE const_array1f
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array1<float, zfp::codec::zfp1<float>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array1d
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array2f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array2d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array3f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 1
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray1Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray2Base.cpp b/tests/array/constArray/testConstArray2Base.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/array/constArray/testConstArray2d.cpp b/tests/array/constArray/testConstArray2d.cpp
new file mode 100644
index 00000000..e92360f1
--- /dev/null
+++ b/tests/array/constArray/testConstArray2d.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray2dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 2; }
+};
+
+CArray2dTestEnv* const testEnv = new CArray2dTestEnv;
+
+class CArray2dTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray2dTest
+
+#define ZFP_ARRAY_TYPE const_array2d
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array2<double, zfp::codec::zfp2<double>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array2f
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array3d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array3f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array1d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 2
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray2Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray2f.cpp b/tests/array/constArray/testConstArray2f.cpp
new file mode 100644
index 00000000..bde63709
--- /dev/null
+++ b/tests/array/constArray/testConstArray2f.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray2fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 2; }
+};
+
+CArray2fTestEnv* const testEnv = new CArray2fTestEnv;
+
+class CArray2fTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray2fTest
+
+#define ZFP_ARRAY_TYPE const_array2f
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array2<float, zfp::codec::zfp2<float>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array2d
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array3f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array3d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array1f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 2
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray2Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray3Base.cpp b/tests/array/constArray/testConstArray3Base.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/array/constArray/testConstArray3d.cpp b/tests/array/constArray/testConstArray3d.cpp
new file mode 100644
index 00000000..59c091ee
--- /dev/null
+++ b/tests/array/constArray/testConstArray3d.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray3dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 3; }
+};
+
+CArray3dTestEnv* const testEnv = new CArray3dTestEnv;
+
+class CArray3dTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray3dTest
+
+#define ZFP_ARRAY_TYPE const_array3d
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array3<double, zfp::codec::zfp3<double>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array3f
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array4d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array4f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array2d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 3
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray3Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray3f.cpp b/tests/array/constArray/testConstArray3f.cpp
new file mode 100644
index 00000000..44358955
--- /dev/null
+++ b/tests/array/constArray/testConstArray3f.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray3fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 3; }
+};
+
+CArray3fTestEnv* const testEnv = new CArray3fTestEnv;
+
+class CArray3fTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray3fTest
+
+#define ZFP_ARRAY_TYPE const_array3f
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array3<float, zfp::codec::zfp3<float>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array3d
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array4f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array4d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array2f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 3
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray3Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray4Base.cpp b/tests/array/constArray/testConstArray4Base.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/array/constArray/testConstArray4d.cpp b/tests/array/constArray/testConstArray4d.cpp
new file mode 100644
index 00000000..598417e9
--- /dev/null
+++ b/tests/array/constArray/testConstArray4d.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dDouble.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestDoubleEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray4dTestEnv : public ArrayDoubleTestEnv {
+public:
+  virtual int getDims() { return 4; }
+};
+
+CArray4dTestEnv* const testEnv = new CArray4dTestEnv;
+
+class CArray4dTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray4dTest
+
+#define ZFP_ARRAY_TYPE const_array4d
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array4<double, zfp::codec::zfp4<double>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array4f
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array1d
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array1f
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array2d
+
+#define UINT uint64
+#define SCALAR double
+#define DIMS 4
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray4Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArray4f.cpp b/tests/array/constArray/testConstArray4f.cpp
new file mode 100644
index 00000000..cf5f08d5
--- /dev/null
+++ b/tests/array/constArray/testConstArray4f.cpp
@@ -0,0 +1,46 @@
+#include "zfp/constarray1.hpp"
+#include "zfp/constarray2.hpp"
+#include "zfp/constarray3.hpp"
+#include "zfp/constarray4.hpp"
+#include "zfp/factory.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dFloat.h"
+}
+
+#include "gtest/gtest.h"
+#include "utils/gtestFloatEnv.h"
+#include "utils/gtestBaseFixture.h"
+#include "utils/predicates.h"
+
+class CArray4fTestEnv : public ArrayFloatTestEnv {
+public:
+  virtual int getDims() { return 4; }
+};
+
+CArray4fTestEnv* const testEnv = new CArray4fTestEnv;
+
+class CArray4fTest : public CArrayNdTestFixture {};
+
+#define TEST_FIXTURE CArray4fTest
+
+#define ZFP_ARRAY_TYPE const_array4f
+#define ZFP_FULL_ARRAY_TYPE(BLOCK_TYPE) const_array4<float, zfp::codec::zfp4<float>, BLOCK_TYPE>
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR const_array4d
+#define ZFP_ARRAY_TYPE_WRONG_DIM const_array1f
+#define ZFP_ARRAY_TYPE_WRONG_SCALAR_DIM const_array1d
+#define ZFP_ARRAY_NOT_INCLUDED_TYPE const_array2f
+
+#define UINT uint32
+#define SCALAR float
+#define DIMS 4
+
+#include "testConstArrayBase.cpp"
+#include "testConstArray4Base.cpp"
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/array/constArray/testConstArrayBase.cpp b/tests/array/constArray/testConstArrayBase.cpp
new file mode 100644
index 00000000..53945744
--- /dev/null
+++ b/tests/array/constArray/testConstArrayBase.cpp
@@ -0,0 +1,242 @@
+extern "C" {
+  #include "utils/testMacros.h"
+  #include "utils/zfpChecksums.h"
+  #include "utils/zfpHash.h"
+}
+
+void FailWhenNoExceptionThrown()
+{
+  FAIL() << "No exception was thrown when one was expected";
+}
+
+void FailAndPrintException(std::exception const & e)
+{
+  FAIL() << "Unexpected exception thrown: " << typeid(e).name() << std::endl << "With message: " << e.what();
+}
+
+TEST_F(TEST_FIXTURE, when_constructorCalledForRate_then_rateSet)
+{
+  double rate = ZFP_RATE_PARAM_BITS;
+  zfp_config config = zfp_config_rate(rate, true);
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, config);
+  EXPECT_LT(rate, arr.rate());
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, config);
+  EXPECT_LT(rate, arr.rate());
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, config);
+  // alignment in 3D supports integer fixed-rates [1, 64] (use <=)
+  EXPECT_LE(rate, arr.rate());
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, config);
+  // alignment in 4D supports integer fixed-rates [1, 64] (use <=)
+  EXPECT_LE(rate, arr.rate());
+#endif
+}
+
+TEST_F(TEST_FIXTURE, when_setRate_then_compressionRateChanged)
+{
+  zfp_config config = zfp_config_rate(ZFP_RATE_PARAM_BITS, true);
+
+#if DIMS == 1
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, config, inputDataArr);
+#elif DIMS == 2
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, config, inputDataArr);
+#elif DIMS == 3
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, config, inputDataArr);
+#elif DIMS == 4
+  ZFP_ARRAY_TYPE arr(inputDataSideLen, inputDataSideLen, inputDataSideLen, inputDataSideLen, config, inputDataArr);
+#endif
+
+  double actualOldRate = arr.rate();
+  size_t oldCompressedSize = arr.compressed_size();
+  uint64 oldChecksum = hashBitstream((uint64*)arr.compressed_data(), oldCompressedSize);
+
+  double newRate = ZFP_RATE_PARAM_BITS - 10;
+  EXPECT_LT(1, newRate);
+  arr.set_rate(newRate);
+  EXPECT_GT(actualOldRate, arr.rate());
+
+  arr.set(inputDataArr);
+  size_t newCompressedSize = arr.compressed_size();
+  uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), newCompressedSize);
+
+  EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, oldChecksum, checksum);
+
+  EXPECT_GT(oldCompressedSize, newCompressedSize);
+}
+
+#if DIMS == 1
+INSTANTIATE_TEST_SUITE_P(TestManyCompressionModes, 
+                        TEST_FIXTURE,
+                        ::testing::Values(
+                            testConfig(TEST_RATE,1,TEST_INDEX_IMP), testConfig(TEST_RATE,2,TEST_INDEX_IMP),
+                            testConfig(TEST_PREC,0,TEST_INDEX_IMP), testConfig(TEST_PREC,1,TEST_INDEX_IMP), testConfig(TEST_PREC,2,TEST_INDEX_IMP),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_IMP), testConfig(TEST_ACCU,1,TEST_INDEX_IMP), testConfig(TEST_ACCU,2,TEST_INDEX_IMP),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_IMP),
+                            testConfig(TEST_RATE,1,TEST_INDEX_VRB), testConfig(TEST_RATE,2,TEST_INDEX_VRB),
+                            testConfig(TEST_PREC,0,TEST_INDEX_VRB), testConfig(TEST_PREC,1,TEST_INDEX_VRB), testConfig(TEST_PREC,2,TEST_INDEX_VRB),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_VRB), testConfig(TEST_ACCU,1,TEST_INDEX_VRB), testConfig(TEST_ACCU,2,TEST_INDEX_VRB),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_VRB),
+                            testConfig(TEST_RATE,1,TEST_INDEX_HY4), testConfig(TEST_RATE,2,TEST_INDEX_HY4),
+                            testConfig(TEST_PREC,0,TEST_INDEX_HY4), testConfig(TEST_PREC,1,TEST_INDEX_HY4), testConfig(TEST_PREC,2,TEST_INDEX_HY4),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_HY4), testConfig(TEST_ACCU,1,TEST_INDEX_HY4), testConfig(TEST_ACCU,2,TEST_INDEX_HY4),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_HY4),
+                            testConfig(TEST_RATE,1,TEST_INDEX_HY8), testConfig(TEST_RATE,2,TEST_INDEX_HY8),
+                            testConfig(TEST_PREC,0,TEST_INDEX_HY8), testConfig(TEST_PREC,1,TEST_INDEX_HY8), testConfig(TEST_PREC,2,TEST_INDEX_HY8),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_HY8), testConfig(TEST_ACCU,1,TEST_INDEX_HY8), testConfig(TEST_ACCU,2,TEST_INDEX_HY8),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_HY8)
+                        ),
+                        TEST_FIXTURE::PrintToStringParamName()
+);
+#else
+INSTANTIATE_TEST_SUITE_P(TestManyCompressionModes, 
+                        TEST_FIXTURE,
+                        ::testing::Values(
+                            testConfig(TEST_RATE,0,TEST_INDEX_IMP), testConfig(TEST_RATE,1,TEST_INDEX_IMP), testConfig(TEST_RATE,2,TEST_INDEX_IMP),
+                            testConfig(TEST_PREC,0,TEST_INDEX_IMP), testConfig(TEST_PREC,1,TEST_INDEX_IMP), testConfig(TEST_PREC,2,TEST_INDEX_IMP),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_IMP), testConfig(TEST_ACCU,1,TEST_INDEX_IMP), testConfig(TEST_ACCU,2,TEST_INDEX_IMP),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_IMP),
+                            testConfig(TEST_RATE,1,TEST_INDEX_VRB), testConfig(TEST_RATE,2,TEST_INDEX_VRB),
+                            testConfig(TEST_PREC,0,TEST_INDEX_VRB), testConfig(TEST_PREC,1,TEST_INDEX_VRB), testConfig(TEST_PREC,2,TEST_INDEX_VRB),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_VRB), testConfig(TEST_ACCU,1,TEST_INDEX_VRB), testConfig(TEST_ACCU,2,TEST_INDEX_VRB),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_VRB),
+                            testConfig(TEST_RATE,1,TEST_INDEX_HY4), testConfig(TEST_RATE,2,TEST_INDEX_HY4),
+                            testConfig(TEST_PREC,0,TEST_INDEX_HY4), testConfig(TEST_PREC,1,TEST_INDEX_HY4), testConfig(TEST_PREC,2,TEST_INDEX_HY4),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_HY4), testConfig(TEST_ACCU,1,TEST_INDEX_HY4), testConfig(TEST_ACCU,2,TEST_INDEX_HY4),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_HY4),
+                            testConfig(TEST_RATE,1,TEST_INDEX_HY8), testConfig(TEST_RATE,2,TEST_INDEX_HY8),
+                            testConfig(TEST_PREC,0,TEST_INDEX_HY8), testConfig(TEST_PREC,1,TEST_INDEX_HY8), testConfig(TEST_PREC,2,TEST_INDEX_HY8),
+                            testConfig(TEST_ACCU,0,TEST_INDEX_HY8), testConfig(TEST_ACCU,1,TEST_INDEX_HY8), testConfig(TEST_ACCU,2,TEST_INDEX_HY8),
+                            testConfig(TEST_RVRS,0,TEST_INDEX_HY8)
+                        ),
+                        TEST_FIXTURE::PrintToStringParamName()
+);
+#endif
+
+TEST_P(TEST_FIXTURE, when_constructorCalledWithCacheSize_then_minCacheSizeEnforced)
+{
+  size_t cacheSize = 300;
+  zfp_config config = getConfig();
+
+  switch(std::get<2>(GetParam()))
+  {
+    case TEST_INDEX_IMP:
+    {
+      if (std::get<0>(GetParam()) == TEST_RATE)
+      {
+        ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_IMP) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+        EXPECT_LE(cacheSize, arr.cache_size());
+      } else {
+        GTEST_SKIP() << "[ SKIPPED  ] Implicit index only supported for fixed rate" << std::endl;
+      }
+      break;
+    }
+    case TEST_INDEX_VRB:
+    {
+      ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_VRB) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+      EXPECT_LE(cacheSize, arr.cache_size());
+      break;
+    }
+    case TEST_INDEX_HY4:
+    {
+      ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_HY4) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+      EXPECT_LE(cacheSize, arr.cache_size());
+      break;
+    }
+    case TEST_INDEX_HY8:
+    {
+      ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_HY8<DIMS>) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+      EXPECT_LE(cacheSize, arr.cache_size());
+      break;
+    }
+  }
+}
+
+TEST_P(TEST_FIXTURE, given_dataset_when_set_then_underlyingBitstreamChecksumMatches)
+{
+  zfp_config config = getConfig();
+  uint64 key1, key2;
+
+  switch(std::get<2>(GetParam()))
+  {
+    case TEST_INDEX_IMP:
+    {
+      if (std::get<0>(GetParam()) == TEST_RATE)
+      {
+        ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_IMP) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+
+        computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, config.mode, std::get<1>(GetParam()), &key1, &key2);
+        uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+        uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+        EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, expectedChecksum, checksum);
+
+        arr.set(inputDataArr);
+        checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+        EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+      } else {
+        GTEST_SKIP() << "[ SKIPPED  ] Implicit index only supported for fixed rate" << std::endl;
+      }
+      break;
+    }
+    case TEST_INDEX_VRB:
+    {
+      ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_VRB) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+
+      computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, config.mode, std::get<1>(GetParam()), &key1, &key2);
+      uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+      uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+      EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, expectedChecksum, checksum);
+
+      arr.set(inputDataArr);
+      checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+      EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+      break;
+    }
+    case TEST_INDEX_HY4:
+    {
+      ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_HY4) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+
+      computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, config.mode, std::get<1>(GetParam()), &key1, &key2);
+      uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+      uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+      EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, expectedChecksum, checksum);
+
+      arr.set(inputDataArr);
+      checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+      EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+      break;
+    }
+    case TEST_INDEX_HY8:
+    {
+      ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_HY8<DIMS>) arr(_repeat_arg(inputDataSideLen, DIMS), config);
+
+      computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, config.mode, std::get<1>(GetParam()), &key1, &key2);
+      uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+      uint64 checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+      EXPECT_PRED_FORMAT2(ExpectNeqPrintHexPred, expectedChecksum, checksum);
+
+      arr.set(inputDataArr);
+      checksum = hashBitstream((uint64*)arr.compressed_data(), arr.compressed_size());
+      EXPECT_PRED_FORMAT2(ExpectEqPrintHexPred, expectedChecksum, checksum);
+      break;
+    }
+  }
+}
+
+TEST_P(TEST_FIXTURE, when_implicitIndexForNonRate_then_exceptionThrown)
+{
+  zfp_config config = getConfig();
+
+  if (std::get<2>(GetParam()) != TEST_INDEX_IMP || std::get<0>(GetParam()) == zfp_mode_fixed_rate)
+    GTEST_SKIP();
+
+  try {
+    ZFP_FULL_ARRAY_TYPE(TEST_INDEX_TYPE_IMP) arr(_repeat_arg(inputDataSideLen, DIMS), config); 
+    FailWhenNoExceptionThrown();
+  } 
+  catch (zfp::exception const&) { /* hitting this block is test success so do nothing */ }
+  catch (std::exception const& e) { FailAndPrintException(e); }
+}
diff --git a/tests/array/decode/CMakeLists.txt b/tests/array/decode/CMakeLists.txt
new file mode 100644
index 00000000..2767f88a
--- /dev/null
+++ b/tests/array/decode/CMakeLists.txt
@@ -0,0 +1,28 @@
+function(zfp_add_cpp_tests dims type bits)
+  # test templated block encoding
+  set(test_name testTemplatedDecode${dims}${type})
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp rand${bits}Lib)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+endfunction()
+
+zfp_add_cpp_tests(1 f 32)
+zfp_add_cpp_tests(2 f 32)
+zfp_add_cpp_tests(3 f 32)
+zfp_add_cpp_tests(4 f 32)
+zfp_add_cpp_tests(1 d 64)
+zfp_add_cpp_tests(2 d 64)
+zfp_add_cpp_tests(3 d 64)
+zfp_add_cpp_tests(4 d 64)
+
+#zfp_add_cpp_tests(1 Int32 32)
+#zfp_add_cpp_tests(2 Int32 32)
+#zfp_add_cpp_tests(3 Int32 32)
+#zfp_add_cpp_tests(4 Int32 32)
+#zfp_add_cpp_tests(1 Int64 64)
+#zfp_add_cpp_tests(2 Int64 64)
+#zfp_add_cpp_tests(3 Int64 64)
+#zfp_add_cpp_tests(4 Int64 64)
diff --git a/tests/array/decode/testTemplatedDecode1d.cpp b/tests/array/decode/testTemplatedDecode1d.cpp
new file mode 100644
index 00000000..ca70fd89
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode1d.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_1d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_1
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_double_1
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_1
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_double_1
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_1
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_double_1
+
+#define SCALAR double
+#define DIMS 1
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode1f.cpp b/tests/array/decode/testTemplatedDecode1f.cpp
new file mode 100644
index 00000000..914b3c95
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode1f.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_1d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_1
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_float_1
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_1
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_float_1
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_1
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_float_1
+
+#define SCALAR float
+#define DIMS 1
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode2d.cpp b/tests/array/decode/testTemplatedDecode2d.cpp
new file mode 100644
index 00000000..5915f10f
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode2d.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_2d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_2
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_double_2
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_2
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_double_2
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_2
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_double_2
+
+#define SCALAR double
+#define DIMS 2
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode2f.cpp b/tests/array/decode/testTemplatedDecode2f.cpp
new file mode 100644
index 00000000..526b2bd0
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode2f.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_2d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_2
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_float_2
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_2
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_float_2
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_2
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_float_2
+
+#define SCALAR float
+#define DIMS 2
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode3d.cpp b/tests/array/decode/testTemplatedDecode3d.cpp
new file mode 100644
index 00000000..4c567e90
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode3d.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_3d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_3
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_double_3
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_3
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_double_3
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_3
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_double_3
+
+#define SCALAR double
+#define DIMS 3
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode3f.cpp b/tests/array/decode/testTemplatedDecode3f.cpp
new file mode 100644
index 00000000..4fc28e94
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode3f.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_3d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_3
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_float_3
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_3
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_float_3
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_3
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_float_3
+
+#define SCALAR float
+#define DIMS 3
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode4d.cpp b/tests/array/decode/testTemplatedDecode4d.cpp
new file mode 100644
index 00000000..c159c49c
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode4d.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_4d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_4
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_double_4
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_4
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_double_4
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_4
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_double_4
+
+#define SCALAR double
+#define DIMS 4
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecode4f.cpp b/tests/array/decode/testTemplatedDecode4f.cpp
new file mode 100644
index 00000000..b94b32a0
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecode4f.cpp
@@ -0,0 +1,20 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_4d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_4
+#define ZFP_DECODE_BLOCK_FUNC zfp_decode_block_float_4
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_4
+#define ZFP_DECODE_BLOCK_STRIDED_FUNC zfp_decode_block_strided_float_4
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_4
+#define ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_decode_partial_block_strided_float_4
+
+#define SCALAR float
+#define DIMS 4
+
+#include "testTemplatedDecodeBase.cpp"
diff --git a/tests/array/decode/testTemplatedDecodeBase.cpp b/tests/array/decode/testTemplatedDecodeBase.cpp
new file mode 100644
index 00000000..471c75d3
--- /dev/null
+++ b/tests/array/decode/testTemplatedDecodeBase.cpp
@@ -0,0 +1,558 @@
+extern "C" {
+  #include "utils/testMacros.h"
+  #include "utils/zfpChecksums.h"
+  #include "utils/zfpHash.h"
+}
+
+#include "src/template/codec.h"
+#include "gtest/gtest.h"
+
+#define SX 2
+#define SY (3 * BLOCK_SIDE_LEN*SX)
+#define SZ (2 * BLOCK_SIDE_LEN*SY)
+#define SW (3 * BLOCK_SIDE_LEN*SZ)
+#define PX 1
+#define PY 2
+#define PZ 3
+#define PW 4
+#define DUMMY_VAL 99
+#define ASSERT_SCALAR_EQ(x, y) ASSERT_NEAR(x, y, 1e-32)
+
+void populateArray(SCALAR** dataArr)
+{
+    *dataArr = new SCALAR[BLOCK_SIZE];
+    ASSERT_TRUE(*dataArr != nullptr);
+
+    for (int i = 0; i < BLOCK_SIZE; i++)
+    {
+#ifdef FL_PT_DATA
+        (*dataArr)[i] = nextSignedRandFlPt();
+#else
+        (*dataArr)[i] = nextSignedRandInt();
+#endif
+    }
+}
+
+void populateStridedArray(SCALAR** dataArr, SCALAR dummyVal)
+{
+#if DIMS == 1
+    size_t countX = BLOCK_SIDE_LEN * SX;
+    *dataArr = (SCALAR*)malloc(sizeof(SCALAR) * countX);
+    ASSERT_TRUE(*dataArr != nullptr);
+
+    for (size_t i = 0; i < countX; i++) {
+        if (i % SX) {
+            (*dataArr)[i] = dummyVal;
+        } else {
+#ifdef FL_PT_DATA
+	        (*dataArr)[i] = nextSignedRandFlPt();
+#else
+	        (*dataArr)[i] = nextSignedRandInt();
+#endif
+        }
+    }
+
+#elif DIMS == 2
+    size_t countX = BLOCK_SIDE_LEN * SX;
+    size_t countY = SY / SX;
+    *dataArr = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY);
+    ASSERT_TRUE(*dataArr != nullptr);
+
+    for (size_t j = 0; j < countY; j++) {
+        for (size_t i = 0; i < countX; i++) {
+            size_t index = countX*j + i;
+            if (i % (countX/BLOCK_SIDE_LEN)
+                    || j % (countY/BLOCK_SIDE_LEN)) {
+                (*dataArr)[index] = dummyVal;
+            } else {
+#ifdef FL_PT_DATA
+	            (*dataArr)[index] = nextSignedRandFlPt();
+#else
+	            (*dataArr)[index] = nextSignedRandInt();
+#endif
+            }
+        }
+    }
+
+#elif DIMS == 3
+    size_t countX = BLOCK_SIDE_LEN * SX;
+    size_t countY = SY / SX;
+    size_t countZ = SZ / SY;
+    *dataArr = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ);
+    ASSERT_TRUE(*dataArr != nullptr);
+
+    for (size_t k = 0; k < countZ; k++) {
+        for (size_t j = 0; j < countY; j++) {
+            for (size_t i = 0; i < countX; i++) {
+                size_t index = countX*countY*k + countX*j + i;
+                if (i % (countX/BLOCK_SIDE_LEN)
+                        || j % (countY/BLOCK_SIDE_LEN)
+                        || k % (countZ/BLOCK_SIDE_LEN)) {
+                    (*dataArr)[index] = dummyVal;
+                } else {
+#ifdef FL_PT_DATA
+                    (*dataArr)[index] = nextSignedRandFlPt();
+#else
+                    (*dataArr)[index] = nextSignedRandInt();
+#endif
+                }
+            }
+        }
+    }
+
+#elif DIMS == 4
+    size_t countX = BLOCK_SIDE_LEN * SX;
+    size_t countY = SY / SX;
+    size_t countZ = SZ / SY;
+    size_t countW = SW / SZ;
+    *dataArr = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ * countW);
+    ASSERT_TRUE(*dataArr != nullptr);
+
+    for (size_t l = 0; l < countW; l++) {
+        for (size_t k = 0; k < countZ; k++) {
+            for (size_t j = 0; j < countY; j++) {
+                for (size_t i = 0; i < countX; i++) {
+                    size_t index = countX*countY*countZ*l + countX*countY*k + countX*j + i;
+                    if (i % (countX/BLOCK_SIDE_LEN)
+                            || j % (countY/BLOCK_SIDE_LEN)
+                            || k % (countZ/BLOCK_SIDE_LEN)
+                            || l % (countW/BLOCK_SIDE_LEN)) {
+                        (*dataArr)[index] = dummyVal;
+                    } else {
+#ifdef FL_PT_DATA
+                        (*dataArr)[index] = nextSignedRandFlPt();
+#else
+                        (*dataArr)[index] = nextSignedRandInt();
+#endif
+                    }
+                }
+            }
+        }
+    }
+#endif
+}
+
+void assertStridedBlockEntriesEqual(SCALAR* data1, SCALAR* data2)
+{
+#if DIMS == 1
+  size_t countX = BLOCK_SIDE_LEN * SX;
+
+  for (size_t i = 0; i < countX; i++) {
+    if (!(i % (countX/BLOCK_SIDE_LEN))) {
+      ASSERT_SCALAR_EQ(data1[i], data2[i]) << 
+                  "index " << i << " mismatch: " << data1[i] << " != " << data2[i];
+    }
+  }
+
+#elif DIMS == 2
+  size_t countX = BLOCK_SIDE_LEN * SX;
+  size_t countY = SY / SX;
+
+  for (size_t j = 0; j < countY; j++) {
+    for (size_t i = 0; i < countX; i++) {
+      if (!(i % (countX/BLOCK_SIDE_LEN))
+          && !(j % (countY/BLOCK_SIDE_LEN))) {
+        ASSERT_SCALAR_EQ(data1[countX*j + i], data2[countX*j + i]) << 
+                    "index " << (countX*j + i) << " mismatch: " << data1[countX*j + i] << " != " << data2[countX*j + i];
+      }
+    }
+  }
+
+#elif DIMS == 3
+  size_t countX = BLOCK_SIDE_LEN * SX;
+  size_t countY = SY / SX;
+  size_t countZ = SZ / SY;
+
+  for (size_t k = 0; k < countZ; k++) {
+    for (size_t j = 0; j < countY; j++) {
+      for (size_t i = 0; i < countX; i++) {
+        if (!(i % (countX/BLOCK_SIDE_LEN))
+            && !(j % (countY/BLOCK_SIDE_LEN))
+            && !(k % (countZ/BLOCK_SIDE_LEN))) {
+            ASSERT_SCALAR_EQ(data1[countX*countY*k + countX*j + i], data2[countX*countY*k + countX*j + i]) << 
+                        "index " << (countX*countY*k + countX*j + i) << " mismatch: " << 
+                        data1[countX*countY*k + countX*j + i] << " != " <<
+                        data2[countX*countY*k + countX*j + i];
+        }
+      }
+    }
+  }
+
+#elif DIMS == 4
+  size_t countX = BLOCK_SIDE_LEN * SX;
+  size_t countY = SY / SX;
+  size_t countZ = SZ / SY;
+  size_t countW = SW / SZ;
+
+  for (size_t l = 0; l < countW; l++) {
+    for (size_t k = 0; k < countZ; k++) {
+      for (size_t j = 0; j < countY; j++) {
+        for (size_t i = 0; i < countX; i++) {
+          if (!(i % (countX/BLOCK_SIDE_LEN))
+              && !(j % (countY/BLOCK_SIDE_LEN))
+              && !(k % (countZ/BLOCK_SIDE_LEN))
+              && !(l % (countW/BLOCK_SIDE_LEN))) {
+                ASSERT_SCALAR_EQ(data1[countX*countY*countZ*l + countX*countY*k + countX*j + i], data2[countX*countY*countZ*l + countX*countY*k + countX*j + i]) << 
+                            "index " << (countX*countY*countZ*l + countX*countY*k + countX*j + i) << " mismatch: " << 
+                            data1[countX*countY*countZ*l + countX*countY*k + countX*j + i] << " != " <<
+                            data2[countX*countY*countZ*l + countX*countY*k + countX*j + i];
+          }
+        }
+      }
+    }
+  }
+#endif
+}
+
+void assertPartialBlockEntriesEqual(SCALAR* data1, SCALAR* data2)
+{
+#if DIMS == 1
+  size_t countX = BLOCK_SIDE_LEN * SX;
+
+  for (size_t i = 0; i < countX; i++) {
+    if (i/(countX/BLOCK_SIDE_LEN) < PX
+        && !(i % (countX/BLOCK_SIDE_LEN))) {
+      ASSERT_SCALAR_EQ(data1[i], data2[i]) << 
+                  "index " << i << " mismatch: " << data1[i] << " != " << data2[i];
+    }
+  }
+
+#elif DIMS == 2
+  size_t countX = BLOCK_SIDE_LEN * SX;
+  size_t countY = SY / SX;
+
+  for (size_t j = 0; j < countY; j++) {
+    for (size_t i = 0; i < countX; i++) {
+      if (i/(countX/BLOCK_SIDE_LEN) < PX
+          && j/(countY/BLOCK_SIDE_LEN) < PY
+          && !(i % (countX/BLOCK_SIDE_LEN))
+          && !(j % (countY/BLOCK_SIDE_LEN))) {
+        ASSERT_SCALAR_EQ(data1[countX*j + i], data2[countX*j + i]) << 
+                    "index " << (countX*j + i) << " mismatch: " << data1[countX*j + i] << " != " << data2[countX*j + i];
+      }
+    }
+  }
+
+#elif DIMS == 3
+  size_t countX = BLOCK_SIDE_LEN * SX;
+  size_t countY = SY / SX;
+  size_t countZ = SZ / SY;
+
+  for (size_t k = 0; k < countZ; k++) {
+    for (size_t j = 0; j < countY; j++) {
+      for (size_t i = 0; i < countX; i++) {
+        if (i/(countX/BLOCK_SIDE_LEN) < PX
+            && j/(countY/BLOCK_SIDE_LEN) < PY
+            && k/(countZ/BLOCK_SIDE_LEN) < PZ
+            && !(i % (countX/BLOCK_SIDE_LEN))
+            && !(j % (countY/BLOCK_SIDE_LEN))
+            && !(k % (countZ/BLOCK_SIDE_LEN))) {
+            ASSERT_SCALAR_EQ(data1[countX*countY*k + countX*j + i], data2[countX*countY*k + countX*j + i]) << 
+                        "index " << (countX*countY*k + countX*j + i) << " mismatch: " << 
+                        data1[countX*countY*k + countX*j + i] << " != " <<
+                        data2[countX*countY*k + countX*j + i];
+        }
+      }
+    }
+  }
+
+#elif DIMS == 4
+  size_t countX = BLOCK_SIDE_LEN * SX;
+  size_t countY = SY / SX;
+  size_t countZ = SZ / SY;
+  size_t countW = SW / SZ;
+
+  for (size_t l = 0; l < countW; l++) {
+    for (size_t k = 0; k < countZ; k++) {
+      for (size_t j = 0; j < countY; j++) {
+        for (size_t i = 0; i < countX; i++) {
+          if (i/(countX/BLOCK_SIDE_LEN) < PX
+              && j/(countY/BLOCK_SIDE_LEN) < PY
+              && k/(countZ/BLOCK_SIDE_LEN) < PZ
+              && l/(countW/BLOCK_SIDE_LEN) < PW
+              && !(i % (countX/BLOCK_SIDE_LEN))
+              && !(j % (countY/BLOCK_SIDE_LEN))
+              && !(k % (countZ/BLOCK_SIDE_LEN))
+              && !(l % (countW/BLOCK_SIDE_LEN))) {
+                ASSERT_SCALAR_EQ(data1[countX*countY*countZ*l + countX*countY*k + countX*j + i], data2[countX*countY*countZ*l + countX*countY*k + countX*j + i]) << 
+                            "index " << (countX*countY*countZ*l + countX*countY*k + countX*j + i) << " mismatch: " << 
+                            data1[countX*countY*countZ*l + countX*countY*k + countX*j + i] << " != " <<
+                            data2[countX*countY*countZ*l + countX*countY*k + countX*j + i];
+          }
+        }
+      }
+    }
+  }
+#endif
+}
+
+void setupStream(zfp_field** field, zfp_stream** stream, bool isStrided = false)
+{
+    *stream = zfp_stream_open(NULL);
+    //zfp_stream_set_rate(*stream, ZFP_RATE_PARAM_BITS, ZFP_TYPE, DIMS, zfp_false);
+    zfp_stream_set_accuracy(*stream, 0);
+
+    size_t bufsizeBytes = zfp_stream_maximum_size(*stream, *field);
+    char* buffer = (char*)calloc(bufsizeBytes, sizeof(char));
+    ASSERT_TRUE(buffer != nullptr);
+
+    bitstream* s = stream_open(buffer, bufsizeBytes);
+    ASSERT_TRUE(s != nullptr);
+
+    if (isStrided)
+    {
+        switch (DIMS)
+        {
+            case 1:
+            {
+                zfp_field_set_stride_1d(*field, SX);
+                break;
+            }
+            case 2:
+            {
+                zfp_field_set_stride_2d(*field, SX, SY);
+                break;
+            }
+            case 3:
+            {
+                zfp_field_set_stride_3d(*field, SX, SY, SZ);
+                break;
+            }
+            case 4:
+            {
+                zfp_field_set_stride_4d(*field, SX, SY, SZ, SW);
+                break;
+            }
+        }
+    }
+
+    zfp_stream_set_bit_stream(*stream, s);
+}
+
+TEST(TemplatedDecodeTests, given_TemplatedDecodeBlock_resultsMatchNonTemplated)
+{
+    SCALAR* dataArr;
+    populateArray(&dataArr);
+
+    zfp_field* field = ZFP_FIELD_FUNC(dataArr, ZFP_TYPE, _repeat_arg(BLOCK_SIDE_LEN, DIMS));
+
+    zfp_stream* stream;
+    setupStream(&field, &stream);
+    ZFP_ENCODE_BLOCK_FUNC(stream, dataArr);
+    zfp_stream_flush(stream);
+    zfp_stream_rewind(stream);
+
+    zfp_stream* tstream;
+    setupStream(&field, &tstream);
+    encode_block<SCALAR, DIMS>(tstream, dataArr);
+    zfp_stream_flush(tstream);
+    zfp_stream_rewind(tstream);
+
+    SCALAR* data1 = new SCALAR[BLOCK_SIZE];
+    size_t sz = ZFP_DECODE_BLOCK_FUNC(stream, data1);
+
+    SCALAR* data2 = new SCALAR[BLOCK_SIZE];
+    size_t tsz = decode_block<SCALAR, DIMS>(tstream, data2);
+
+    ASSERT_TRUE(sz == tsz);
+    for (int i = 0; i < BLOCK_SIZE; i++)
+        ASSERT_SCALAR_EQ(data1[i], data2[i]);
+
+    zfp_field_free(field);
+    stream_close(zfp_stream_bit_stream(stream));
+    stream_close(zfp_stream_bit_stream(tstream));
+    zfp_stream_close(stream);
+    zfp_stream_close(tstream);
+
+    delete[] dataArr;
+    delete[] data1;
+    delete[] data2;
+}
+
+TEST(TemplatedDecodeTests, given_TemplatedDecodeBlockStrided_resultsMatchNonTemplated)
+{
+    size_t countX = 4 * SX;
+#if DIMS > 1
+    size_t countY = SY / SX;
+#endif
+#if DIMS > 2
+    size_t countZ = SZ / SY;
+#endif
+#if DIMS == 4
+    size_t countW = SW / SZ;
+#endif
+
+    SCALAR* dataArr;
+    populateStridedArray(&dataArr, DUMMY_VAL);
+    ASSERT_TRUE(dataArr != nullptr);
+
+    zfp_field* field = ZFP_FIELD_FUNC(dataArr, ZFP_TYPE, _repeat_arg(BLOCK_SIDE_LEN, DIMS));
+
+    zfp_stream* stream;
+    zfp_stream* tstream;
+    setupStream(&field, &stream, true);
+    setupStream(&field, &tstream, true);
+#if DIMS == 1
+    ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX);
+    encode_block_strided<SCALAR>(tstream, dataArr, SX);
+#elif DIMS == 2
+    ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX, SY);
+    encode_block_strided<SCALAR>(tstream, dataArr, SX, SY);
+#elif DIMS == 3
+    ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX, SY, SZ);
+    encode_block_strided<SCALAR>(tstream, dataArr, SX, SY, SZ);
+#elif DIMS == 4
+    ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX, SY, SZ, SW);
+    encode_block_strided<SCALAR>(tstream, dataArr, SX, SY, SZ, SW);
+#endif
+    zfp_stream_flush(stream);
+    zfp_stream_rewind(stream);
+
+    zfp_stream_flush(tstream);
+    zfp_stream_rewind(tstream);
+
+#if DIMS == 1
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t sz = ZFP_DECODE_BLOCK_STRIDED_FUNC(stream, data1, SX);
+    size_t tsz = decode_block_strided<SCALAR>(tstream, data2, SX);
+#elif DIMS == 2
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t sz = ZFP_DECODE_BLOCK_STRIDED_FUNC(stream, data1, SX, SY);
+    size_t tsz = decode_block_strided<SCALAR>(tstream, data2, SX, SY);
+#elif DIMS == 3
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t sz = ZFP_DECODE_BLOCK_STRIDED_FUNC(stream, data1, SX, SY, SZ);
+    size_t tsz = decode_block_strided<SCALAR>(tstream, data2, SX, SY, SZ);
+#elif DIMS == 4
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ * countW);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ * countW);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t sz = ZFP_DECODE_BLOCK_STRIDED_FUNC(stream, data1, SX, SY, SZ, SW);
+    size_t tsz = decode_block_strided<SCALAR>(tstream, data2, SX, SY, SZ, SW);
+#endif
+
+    ASSERT_TRUE(sz == tsz);
+    assertStridedBlockEntriesEqual(data1, data2);
+
+    zfp_field_free(field);
+    stream_close(zfp_stream_bit_stream(stream));
+    stream_close(zfp_stream_bit_stream(tstream));
+    zfp_stream_close(stream);
+    zfp_stream_close(tstream);
+
+    free(dataArr);
+    free(data1);
+    free(data2);
+}
+
+TEST(TemplatedDecodeTests, given_TemplatedDecodePartialBlockStrided_resultsMatchNonTemplated)
+{
+    size_t countX = 4 * SX;
+#if DIMS > 1
+    size_t countY = SY / SX;
+#endif
+#if DIMS > 2
+    size_t countZ = SZ / SY;
+#endif
+#if DIMS == 4
+    size_t countW = SW / SZ;
+#endif
+
+    SCALAR* dataArr;
+    populateStridedArray(&dataArr, DUMMY_VAL);
+    ASSERT_TRUE(dataArr != nullptr);
+
+    zfp_field* field = ZFP_FIELD_FUNC(dataArr, ZFP_TYPE, _repeat_arg(BLOCK_SIDE_LEN, DIMS));
+
+    zfp_stream* stream;
+    zfp_stream* tstream;
+    setupStream(&field, &stream, true);
+    setupStream(&field, &tstream, true);
+#if DIMS == 1
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, SX);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, SX);
+#elif DIMS == 2
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, PY, SX, SY);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, PY, SX, SY);
+#elif DIMS == 3
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, PY, PZ, SX, SY, SZ);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, PY, PZ, SX, SY, SZ);
+#elif DIMS == 4
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+#endif
+    zfp_stream_flush(stream);
+    zfp_stream_rewind(stream);
+
+    zfp_stream_flush(tstream);
+    zfp_stream_rewind(tstream);
+
+#if DIMS == 1
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t d_sz = ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, data1, PX, SX);
+    size_t d_tsz = decode_partial_block_strided<SCALAR>(tstream, data2, PX, SX);
+#elif DIMS == 2
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t d_sz = ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, data1, PX, PY, SX, SY);
+    size_t d_tsz = decode_partial_block_strided<SCALAR>(tstream, data2, PX, PY, SX, SY);
+#elif DIMS == 3
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t d_sz = ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, data1, PX, PY, PZ, SX, SY, SZ);
+    size_t d_tsz = decode_partial_block_strided<SCALAR>(tstream, data2, PX, PY, PZ, SX, SY, SZ);
+#elif DIMS == 4
+    SCALAR *data1 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ * countW);
+    ASSERT_TRUE(data1 != nullptr);
+
+    SCALAR *data2 = (SCALAR*)malloc(sizeof(SCALAR) * countX * countY * countZ * countW);
+    ASSERT_TRUE(data2 != nullptr);
+
+    size_t d_sz = ZFP_DECODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, data1, PX, PY, PZ, PW, SX, SY, SZ, SW);
+    size_t d_tsz = decode_partial_block_strided<SCALAR>(tstream, data2, PX, PY, PZ, PW, SX, SY, SZ, SW);
+#endif
+
+    ASSERT_TRUE(d_sz == d_tsz);
+    assertPartialBlockEntriesEqual(data1, data2);
+
+    zfp_field_free(field);
+    stream_close(zfp_stream_bit_stream(stream));
+    stream_close(zfp_stream_bit_stream(tstream));
+    zfp_stream_close(stream);
+    zfp_stream_close(tstream);
+
+    free(dataArr);
+    free(data1);
+    free(data2);
+}
diff --git a/tests/array/encode/CMakeLists.txt b/tests/array/encode/CMakeLists.txt
new file mode 100644
index 00000000..a4cda488
--- /dev/null
+++ b/tests/array/encode/CMakeLists.txt
@@ -0,0 +1,29 @@
+function(zfp_add_cpp_tests dims type bits)
+
+  # test templated block encoding
+  set(test_name testTemplatedEncode${dims}${type})
+  add_executable(${test_name} ${test_name}.cpp)
+  target_link_libraries(${test_name}
+    gtest gtest_main zfp rand${bits}Lib)
+  target_compile_definitions(${test_name} PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME ${test_name} COMMAND ${test_name})
+
+endfunction()
+
+zfp_add_cpp_tests(1 f 32)
+zfp_add_cpp_tests(2 f 32)
+zfp_add_cpp_tests(3 f 32)
+zfp_add_cpp_tests(4 f 32)
+zfp_add_cpp_tests(1 d 64)
+zfp_add_cpp_tests(2 d 64)
+zfp_add_cpp_tests(3 d 64)
+zfp_add_cpp_tests(4 d 64)
+
+#zfp_add_cpp_tests(1 Int32 32)
+#zfp_add_cpp_tests(2 Int32 32)
+#zfp_add_cpp_tests(3 Int32 32)
+#zfp_add_cpp_tests(4 Int32 32)
+#zfp_add_cpp_tests(1 Int64 64)
+#zfp_add_cpp_tests(2 Int64 64)
+#zfp_add_cpp_tests(3 Int64 64)
+#zfp_add_cpp_tests(4 Int64 64)
diff --git a/tests/array/encode/testTemplatedEncode1d.cpp b/tests/array/encode/testTemplatedEncode1d.cpp
new file mode 100644
index 00000000..e8e1a040
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode1d.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_1d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_1
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_1
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_1
+
+#define SCALAR double
+#define DIMS 1
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode1f.cpp b/tests/array/encode/testTemplatedEncode1f.cpp
new file mode 100644
index 00000000..f30dcdba
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode1f.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/1dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_1d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_1
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_1
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_1
+
+#define SCALAR float
+#define DIMS 1
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode2d.cpp b/tests/array/encode/testTemplatedEncode2d.cpp
new file mode 100644
index 00000000..c853ea3d
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode2d.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_2d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_2
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_2
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_2
+
+#define SCALAR double
+#define DIMS 2
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode2f.cpp b/tests/array/encode/testTemplatedEncode2f.cpp
new file mode 100644
index 00000000..6e4ea5ba
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode2f.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/2dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_2d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_2
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_2
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_2
+
+#define SCALAR float
+#define DIMS 2
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode3d.cpp b/tests/array/encode/testTemplatedEncode3d.cpp
new file mode 100644
index 00000000..6260e659
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode3d.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dDouble.h"
+  #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_3d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_3
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_3
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_3
+
+#define SCALAR double
+#define DIMS 3
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode3f.cpp b/tests/array/encode/testTemplatedEncode3f.cpp
new file mode 100644
index 00000000..af2079fa
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode3f.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/3dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_3d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_3
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_3
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_3
+
+#define SCALAR float
+#define DIMS 3
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode4d.cpp b/tests/array/encode/testTemplatedEncode4d.cpp
new file mode 100644
index 00000000..c71a27e4
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode4d.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+    #include "constants/4dDouble.h"
+    #include "utils/rand64.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_4d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_double_4
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_double_4
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_double_4
+
+#define SCALAR double
+#define DIMS 4
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncode4f.cpp b/tests/array/encode/testTemplatedEncode4f.cpp
new file mode 100644
index 00000000..91202a74
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncode4f.cpp
@@ -0,0 +1,17 @@
+#include "zfp.hpp"
+using namespace zfp;
+
+extern "C" {
+  #include "constants/4dFloat.h"
+  #include "utils/rand32.h"
+}
+
+#define ZFP_FIELD_FUNC zfp_field_4d
+#define ZFP_ENCODE_BLOCK_FUNC zfp_encode_block_float_4
+#define ZFP_ENCODE_BLOCK_STRIDED_FUNC zfp_encode_block_strided_float_4
+#define ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC zfp_encode_partial_block_strided_float_4
+
+#define SCALAR float
+#define DIMS 4
+
+#include "testTemplatedEncodeBase.cpp"
diff --git a/tests/array/encode/testTemplatedEncodeBase.cpp b/tests/array/encode/testTemplatedEncodeBase.cpp
new file mode 100644
index 00000000..06538cb7
--- /dev/null
+++ b/tests/array/encode/testTemplatedEncodeBase.cpp
@@ -0,0 +1,302 @@
+extern "C" {
+  #include "utils/testMacros.h"
+  #include "utils/zfpChecksums.h"
+  #include "utils/zfpHash.h"
+}
+
+#include "src/template/codec.h"
+#include "gtest/gtest.h"
+
+#define SX 2
+#define SY (3 * BLOCK_SIDE_LEN*SX)
+#define SZ (2 * BLOCK_SIDE_LEN*SY)
+#define SW (3 * BLOCK_SIDE_LEN*SZ)
+#define PX 1
+#define PY 2
+#define PZ 3
+#define PW 4
+#define DUMMY_VAL 99
+
+void populateArray(SCALAR** dataArr)
+{
+    *dataArr = new SCALAR[BLOCK_SIZE];
+    ASSERT_TRUE(*dataArr != nullptr);
+
+    for (int i = 0; i < BLOCK_SIZE; i++)
+    {
+#ifdef FL_PT_DATA
+        (*dataArr)[i] = nextSignedRandFlPt();
+#else
+        (*dataArr)[i] = nextSignedRandInt();
+#endif
+    }
+}
+
+void populateStridedArray(SCALAR** dataArr, SCALAR dummyVal)
+{
+    size_t i, j, k, l, countX, countY, countZ, countW;
+
+    switch(DIMS) {
+        case 1:
+            countX = BLOCK_SIDE_LEN * SX;
+            *dataArr = new SCALAR[countX];
+            ASSERT_TRUE(*dataArr != nullptr);
+
+            for (i = 0; i < countX; i++) {
+                if (i % SX) {
+                    (*dataArr)[i] = dummyVal;
+                } else {
+#ifdef FL_PT_DATA
+	    (*dataArr)[i] = nextSignedRandFlPt();
+#else
+	    (*dataArr)[i] = nextSignedRandInt();
+#endif
+                }
+            }
+            break;
+
+        case 2:
+            countX = BLOCK_SIDE_LEN * SX;
+            countY = SY / SX;
+            *dataArr = new SCALAR[countX * countY];
+            ASSERT_TRUE(*dataArr != nullptr);
+
+            for (j = 0; j < countY; j++) {
+                for (i = 0; i < countX; i++) {
+                    size_t index = countX*j + i;
+                    if (i % (countX/BLOCK_SIDE_LEN)
+                            || j % (countY/BLOCK_SIDE_LEN)) {
+                        (*dataArr)[index] = dummyVal;
+                    } else {
+#ifdef FL_PT_DATA
+	        (*dataArr)[index] = nextSignedRandFlPt();
+#else
+	        (*dataArr)[index] = nextSignedRandInt();
+#endif
+                    }
+                }
+            }
+            break;
+
+        case 3:
+            countX = BLOCK_SIDE_LEN * SX;
+            countY = SY / SX;
+            countZ = SZ / SY;
+            *dataArr = new SCALAR[countX * countY * countZ];
+            ASSERT_TRUE(*dataArr != nullptr);
+
+            for (k = 0; k < countZ; k++) {
+                for (j = 0; j < countY; j++) {
+                    for (i = 0; i < countX; i++) {
+                        size_t index = countX*countY*k + countX*j + i;
+                        if (i % (countX/BLOCK_SIDE_LEN)
+                                || j % (countY/BLOCK_SIDE_LEN)
+                                || k % (countZ/BLOCK_SIDE_LEN)) {
+                            (*dataArr)[index] = dummyVal;
+                        } else {
+#ifdef FL_PT_DATA
+                            (*dataArr)[index] = nextSignedRandFlPt();
+#else
+                            (*dataArr)[index] = nextSignedRandInt();
+#endif
+                        }
+                    }
+                }
+            }
+            break;
+
+        case 4:
+            countX = BLOCK_SIDE_LEN * SX;
+            countY = SY / SX;
+            countZ = SZ / SY;
+            countW = SW / SZ;
+            *dataArr = new SCALAR[countX * countY * countZ * countW];
+            ASSERT_TRUE(*dataArr != nullptr);
+
+            for (l = 0; l < countW; l++) {
+                for (k = 0; k < countZ; k++) {
+                    for (j = 0; j < countY; j++) {
+                        for (i = 0; i < countX; i++) {
+                            size_t index = countX*countY*countZ*l + countX*countY*k + countX*j + i;
+                            if (i % (countX/BLOCK_SIDE_LEN)
+                                    || j % (countY/BLOCK_SIDE_LEN)
+                                    || k % (countZ/BLOCK_SIDE_LEN)
+                                    || l % (countW/BLOCK_SIDE_LEN)) {
+                                (*dataArr)[index] = dummyVal;
+                            } else {
+#ifdef FL_PT_DATA
+                                (*dataArr)[index] = nextSignedRandFlPt();
+#else
+                                (*dataArr)[index] = nextSignedRandInt();
+#endif
+                            }
+                        }
+                    }
+                }
+            }
+            break;
+    }
+}
+
+void setupStream(zfp_field** field, zfp_stream** stream, bool isStrided = false)
+{
+    *stream = zfp_stream_open(NULL);
+    zfp_stream_set_rate(*stream, ZFP_RATE_PARAM_BITS, ZFP_TYPE, DIMS, zfp_false);
+
+    size_t bufsizeBytes = zfp_stream_maximum_size(*stream, *field);
+    char* buffer = (char*)calloc(bufsizeBytes, sizeof(char));
+    ASSERT_TRUE(buffer != nullptr);
+
+    bitstream* s = stream_open(buffer, bufsizeBytes);
+    ASSERT_TRUE(s != nullptr);
+
+    if (isStrided)
+    {
+        switch (DIMS)
+        {
+            case 1:
+            {
+                zfp_field_set_stride_1d(*field, SX);
+                break;
+            }
+            case 2:
+            {
+                zfp_field_set_stride_2d(*field, SX, SY);
+                break;
+            }
+            case 3:
+            {
+                zfp_field_set_stride_3d(*field, SX, SY, SZ);
+                break;
+            }
+            case 4:
+            {
+                zfp_field_set_stride_4d(*field, SX, SY, SZ, SW);
+                break;
+            }
+        }
+    }
+
+    zfp_stream_set_bit_stream(*stream, s);
+}
+
+bool streamsEqual(zfp_stream** stream1, zfp_stream** stream2)
+{
+    bitstream* s1 = zfp_stream_bit_stream(*stream1);
+    size_t sz1 = stream_size(s1);
+    char* data1 = (char*)stream_data(s1);
+    zfp_stream_flush(*stream1);
+
+    bitstream* s2 = zfp_stream_bit_stream(*stream2);
+    size_t sz2 = stream_size(s2);
+    char* data2 = (char*)stream_data(s2);
+    zfp_stream_flush(*stream2);
+
+    for (size_t i = 0; i < sz1; i++)
+        if (data1[i] != data2[i])
+            return false;
+    return true;
+}
+
+TEST(TemplatedEncodeTests, given_TemplatedEncodeBlock_resultsMatchNonTemplated)
+{
+    SCALAR* dataArr;
+    populateArray(&dataArr);
+
+    zfp_field* field = ZFP_FIELD_FUNC(dataArr, ZFP_TYPE, _repeat_arg(BLOCK_SIDE_LEN, DIMS));
+
+    zfp_stream* stream = zfp_stream_open(NULL);
+    setupStream(&field, &stream);
+    size_t sz = ZFP_ENCODE_BLOCK_FUNC(stream, dataArr);
+
+    zfp_stream* tstream = zfp_stream_open(NULL);
+    setupStream(&field, &tstream);
+    size_t tsz = encode_block<SCALAR, DIMS>(tstream, dataArr);
+
+    ASSERT_TRUE(sz == tsz);
+    ASSERT_TRUE(streamsEqual(&stream, &tstream));
+
+    zfp_field_free(field);
+    stream_close(zfp_stream_bit_stream(stream));
+    stream_close(zfp_stream_bit_stream(tstream));
+    zfp_stream_close(stream);
+    zfp_stream_close(tstream);
+    delete[] dataArr;
+}
+
+TEST(TemplatedEncodeTests, given_TemplatedEncodeBlockStrided_resultsMatchNonTemplated)
+{
+    SCALAR* dataArr;
+    populateStridedArray(&dataArr, DUMMY_VAL);
+
+    zfp_field* field = ZFP_FIELD_FUNC(dataArr, ZFP_TYPE, _repeat_arg(BLOCK_SIDE_LEN, DIMS));
+
+    zfp_stream* stream = zfp_stream_open(NULL);
+    setupStream(&field, &stream, true);
+
+    zfp_stream* tstream = zfp_stream_open(NULL);
+    setupStream(&field, &tstream, true);
+
+#if DIMS == 1
+    size_t sz = ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX);
+    size_t tsz = encode_block_strided<SCALAR>(tstream, dataArr, SX);
+#elif DIMS == 2
+    size_t sz = ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX, SY);
+    size_t tsz = encode_block_strided<SCALAR>(tstream, dataArr, SX, SY);
+#elif DIMS == 3
+    size_t sz = ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX, SY, SZ);
+    size_t tsz = encode_block_strided<SCALAR>(tstream, dataArr, SX, SY, SZ);
+#elif DIMS == 4
+    size_t sz = ZFP_ENCODE_BLOCK_STRIDED_FUNC(stream, dataArr, SX, SY, SZ, SW);
+    size_t tsz = encode_block_strided<SCALAR>(tstream, dataArr, SX, SY, SZ, SW);
+#endif
+
+    ASSERT_TRUE(sz == tsz);
+    ASSERT_TRUE(streamsEqual(&stream, &tstream));
+
+    zfp_field_free(field);
+    stream_close(zfp_stream_bit_stream(stream));
+    stream_close(zfp_stream_bit_stream(tstream));
+    zfp_stream_close(stream);
+    zfp_stream_close(tstream);
+    delete[] dataArr;
+}
+
+TEST(TemplatedEncodeTests, given_TemplatedEncodePartialBlockStrided_resultsMatchNonTemplated)
+{
+    SCALAR* dataArr;
+    populateStridedArray(&dataArr, DUMMY_VAL);
+
+    zfp_field* field = ZFP_FIELD_FUNC(dataArr, ZFP_TYPE, _repeat_arg(BLOCK_SIDE_LEN, DIMS));
+
+    zfp_stream* stream = zfp_stream_open(NULL);
+    setupStream(&field, &stream, true);
+
+    zfp_stream* tstream = zfp_stream_open(NULL);
+    setupStream(&field, &tstream, true);
+
+#if DIMS == 1
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, SX);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, SX);
+#elif DIMS == 2
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, PY, SX, SY);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, PY, SX, SY);
+#elif DIMS == 3
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, PY, PZ, SX, SY, SZ);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, PY, PZ, SX, SY, SZ);
+#elif DIMS == 4
+    size_t sz = ZFP_ENCODE_PARTIAL_BLOCK_STRIDED_FUNC(stream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+    size_t tsz = encode_partial_block_strided<SCALAR>(tstream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+#endif
+
+    ASSERT_TRUE(sz == tsz);
+    ASSERT_TRUE(streamsEqual(&stream, &tstream));
+
+    zfp_field_free(field);
+    stream_close(zfp_stream_bit_stream(stream));
+    stream_close(zfp_stream_bit_stream(tstream));
+    zfp_stream_close(stream);
+    zfp_stream_close(tstream);
+    delete[] dataArr;
+
+}
diff --git a/tests/array/utils/commonMacros.h b/tests/array/utils/commonMacros.h
new file mode 100644
index 00000000..9d2627bd
--- /dev/null
+++ b/tests/array/utils/commonMacros.h
@@ -0,0 +1,6 @@
+#include "zfp.h"
+
+#define DIV_ROUND_UP(x, y) (((x) + (y) - 1) / (y))
+#define BITS_TO_BYTES(x) DIV_ROUND_UP(x, CHAR_BIT)
+
+#define ZFP_HEADER_SIZE_BITS (ZFP_MAGIC_BITS + ZFP_META_BITS + ZFP_MODE_SHORT_BITS)
diff --git a/tests/array/utils/gtest1dTest.h b/tests/array/utils/gtest1dTest.h
new file mode 100644
index 00000000..36f2ae1f
--- /dev/null
+++ b/tests/array/utils/gtest1dTest.h
@@ -0,0 +1,47 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR double
+
+const size_t ARRAY_SIZE = 11;
+
+class Array1dTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array1d::iterator iter) {
+    return iter.i();
+  }
+  size_t IterAbsOffset(array1d::const_iterator citer) {
+    return citer.i();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE, true);
+    arr2.resize(ARRAY_SIZE, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offset = 5;
+    viewLen = 3;
+    EXPECT_LT(offset + viewLen, arr.size_x());
+  }
+
+  static array1d arr, arr2;
+  static array1d::pointer ptr, ptr2;
+  static array1d::const_pointer cptr, cptr2;
+  static array1d::iterator iter, iter2;
+  static array1d::const_iterator citer, citer2;
+  static size_t offset, viewLen;
+};
+
+array1d Array1dTest::arr(ARRAY_SIZE, ZFP_RATE_PARAM_BITS);
+array1d Array1dTest::arr2(ARRAY_SIZE, ZFP_RATE_PARAM_BITS);
+array1d::pointer Array1dTest::ptr, Array1dTest::ptr2;
+array1d::const_pointer Array1dTest::cptr, Array1dTest::cptr2;
+array1d::iterator Array1dTest::iter, Array1dTest::iter2;
+array1d::const_iterator Array1dTest::citer, Array1dTest::citer2;
+size_t Array1dTest::offset, Array1dTest::viewLen;
diff --git a/tests/array/utils/gtest1fTest.h b/tests/array/utils/gtest1fTest.h
new file mode 100644
index 00000000..f4e117aa
--- /dev/null
+++ b/tests/array/utils/gtest1fTest.h
@@ -0,0 +1,47 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR float
+
+const size_t ARRAY_SIZE = 11;
+
+class Array1fTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array1f::iterator iter) {
+    return iter.i();
+  }
+  size_t IterAbsOffset(array1f::const_iterator citer) {
+    return citer.i();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE, true);
+    arr2.resize(ARRAY_SIZE, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offset = 5;
+    viewLen = 3;
+    EXPECT_LT(offset + viewLen, arr.size_x());
+  }
+
+  static array1f arr, arr2;
+  static array1f::pointer ptr, ptr2;
+  static array1f::const_pointer cptr, cptr2;
+  static array1f::iterator iter, iter2;
+  static array1f::const_iterator citer, citer2;
+  static size_t offset, viewLen;
+};
+
+array1f Array1fTest::arr(ARRAY_SIZE, ZFP_RATE_PARAM_BITS);
+array1f Array1fTest::arr2(ARRAY_SIZE, ZFP_RATE_PARAM_BITS);
+array1f::pointer Array1fTest::ptr, Array1fTest::ptr2;
+array1f::const_pointer Array1fTest::cptr, Array1fTest::cptr2;
+array1f::iterator Array1fTest::iter, Array1fTest::iter2;
+array1f::const_iterator Array1fTest::citer, Array1fTest::citer2;
+size_t Array1fTest::offset, Array1fTest::viewLen;
diff --git a/tests/array/utils/gtest2dTest.h b/tests/array/utils/gtest2dTest.h
new file mode 100644
index 00000000..15e1cf2e
--- /dev/null
+++ b/tests/array/utils/gtest2dTest.h
@@ -0,0 +1,52 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR double
+
+const size_t ARRAY_SIZE_X = 11;
+const size_t ARRAY_SIZE_Y = 5;
+
+class Array2dTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array2d::iterator iter) {
+    return iter.i() + ARRAY_SIZE_X * iter.j();
+  }
+  size_t IterAbsOffset(array2d::const_iterator citer) {
+    return citer.i() + ARRAY_SIZE_X * citer.j();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, true);
+    arr2.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offsetX = 5;
+    viewLenX = 3;
+    EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+    offsetY = 1;
+    viewLenY = 3;
+    EXPECT_LT(offsetY + viewLenY, arr.size_y());
+  }
+
+  static array2d arr, arr2;
+  static array2d::pointer ptr, ptr2;
+  static array2d::const_pointer cptr, cptr2;
+  static array2d::iterator iter, iter2;
+  static array2d::const_iterator citer, citer2;
+  static size_t offsetX, offsetY, viewLenX, viewLenY;
+};
+
+array2d Array2dTest::arr(ARRAY_SIZE_X, ARRAY_SIZE_Y, ZFP_RATE_PARAM_BITS);
+array2d Array2dTest::arr2(ARRAY_SIZE_X, ARRAY_SIZE_Y, ZFP_RATE_PARAM_BITS);
+array2d::pointer Array2dTest::ptr, Array2dTest::ptr2;
+array2d::const_pointer Array2dTest::cptr, Array2dTest::cptr2;
+array2d::iterator Array2dTest::iter, Array2dTest::iter2;
+array2d::const_iterator Array2dTest::citer, Array2dTest::citer2;
+size_t Array2dTest::offsetX, Array2dTest::offsetY, Array2dTest::viewLenX, Array2dTest::viewLenY;
diff --git a/tests/array/utils/gtest2fTest.h b/tests/array/utils/gtest2fTest.h
new file mode 100644
index 00000000..bf73e68e
--- /dev/null
+++ b/tests/array/utils/gtest2fTest.h
@@ -0,0 +1,52 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR float
+
+const size_t ARRAY_SIZE_X = 11;
+const size_t ARRAY_SIZE_Y = 5;
+
+class Array2fTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array2f::iterator iter) {
+    return iter.i() + ARRAY_SIZE_X * iter.j();
+  }
+  size_t IterAbsOffset(array2f::const_iterator citer) {
+    return citer.i() + ARRAY_SIZE_X * citer.j();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, true);
+    arr2.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offsetX = 5;
+    viewLenX = 3;
+    EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+    offsetY = 1;
+    viewLenY = 3;
+    EXPECT_LT(offsetY + viewLenY, arr.size_y());
+  }
+
+  static array2f arr, arr2;
+  static array2f::pointer ptr, ptr2;
+  static array2f::const_pointer cptr, cptr2;
+  static array2f::iterator iter, iter2;
+  static array2f::const_iterator citer, citer2;
+  static size_t offsetX, offsetY, viewLenX, viewLenY;
+};
+
+array2f Array2fTest::arr(ARRAY_SIZE_X, ARRAY_SIZE_Y, ZFP_RATE_PARAM_BITS);
+array2f Array2fTest::arr2(ARRAY_SIZE_X, ARRAY_SIZE_Y, ZFP_RATE_PARAM_BITS);
+array2f::pointer Array2fTest::ptr, Array2fTest::ptr2;
+array2f::const_pointer Array2fTest::cptr, Array2fTest::cptr2;
+array2f::iterator Array2fTest::iter, Array2fTest::iter2;
+array2f::const_iterator Array2fTest::citer, Array2fTest::citer2;
+size_t Array2fTest::offsetX, Array2fTest::offsetY, Array2fTest::viewLenX, Array2fTest::viewLenY;
diff --git a/tests/array/utils/gtest3dTest.h b/tests/array/utils/gtest3dTest.h
new file mode 100644
index 00000000..8285f9c9
--- /dev/null
+++ b/tests/array/utils/gtest3dTest.h
@@ -0,0 +1,59 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR double
+
+const size_t ARRAY_SIZE_X = 11;
+const size_t ARRAY_SIZE_Y = 18;
+const size_t ARRAY_SIZE_Z = 5;
+
+class Array3dTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array3d::iterator iter) {
+    return iter.i() + ARRAY_SIZE_X * iter.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * iter.k();
+  }
+  size_t IterAbsOffset(array3d::const_iterator citer) {
+    return citer.i() + ARRAY_SIZE_X * citer.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * citer.k();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, true);
+    arr2.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offsetX = 5;
+    viewLenX = 3;
+    EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+    offsetY = 1;
+    viewLenY = 3;
+    EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+    offsetZ = 0;
+    viewLenZ = 2;
+    EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+  }
+
+  static array3d arr, arr2;
+  static array3d::pointer ptr, ptr2;
+  static array3d::const_pointer cptr, cptr2;
+  static array3d::iterator iter, iter2;
+  static array3d::const_iterator citer, citer2;
+  static size_t offsetX, offsetY, offsetZ;
+  static size_t viewLenX, viewLenY, viewLenZ;
+};
+
+array3d Array3dTest::arr(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ZFP_RATE_PARAM_BITS);
+array3d Array3dTest::arr2(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ZFP_RATE_PARAM_BITS);
+array3d::pointer Array3dTest::ptr, Array3dTest::ptr2;
+array3d::const_pointer Array3dTest::cptr, Array3dTest::cptr2;
+array3d::iterator Array3dTest::iter, Array3dTest::iter2;
+array3d::const_iterator Array3dTest::citer, Array3dTest::citer2;
+size_t Array3dTest::offsetX, Array3dTest::offsetY, Array3dTest::offsetZ;
+size_t Array3dTest::viewLenX, Array3dTest::viewLenY, Array3dTest::viewLenZ;
diff --git a/tests/array/utils/gtest3fTest.h b/tests/array/utils/gtest3fTest.h
new file mode 100644
index 00000000..dbc572d7
--- /dev/null
+++ b/tests/array/utils/gtest3fTest.h
@@ -0,0 +1,59 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR float
+
+const size_t ARRAY_SIZE_X = 11;
+const size_t ARRAY_SIZE_Y = 18;
+const size_t ARRAY_SIZE_Z = 5;
+
+class Array3fTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array3f::iterator iter) {
+    return iter.i() + ARRAY_SIZE_X * iter.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * iter.k();
+  }
+  size_t IterAbsOffset(array3f::const_iterator citer) {
+    return citer.i() + ARRAY_SIZE_X * citer.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * citer.k();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, true);
+    arr2.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offsetX = 5;
+    viewLenX = 3;
+    EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+    offsetY = 1;
+    viewLenY = 3;
+    EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+    offsetZ = 0;
+    viewLenZ = 2;
+    EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+  }
+
+  static array3f arr, arr2;
+  static array3f::pointer ptr, ptr2;
+  static array3f::const_pointer cptr, cptr2;
+  static array3f::iterator iter, iter2;
+  static array3f::const_iterator citer, citer2;
+  static size_t offsetX, offsetY, offsetZ;
+  static size_t viewLenX, viewLenY, viewLenZ;
+};
+
+array3f Array3fTest::arr(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ZFP_RATE_PARAM_BITS);
+array3f Array3fTest::arr2(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ZFP_RATE_PARAM_BITS);
+array3f::pointer Array3fTest::ptr, Array3fTest::ptr2;
+array3f::const_pointer Array3fTest::cptr, Array3fTest::cptr2;
+array3f::iterator Array3fTest::iter, Array3fTest::iter2;
+array3f::const_iterator Array3fTest::citer, Array3fTest::citer2;
+size_t Array3fTest::offsetX, Array3fTest::offsetY, Array3fTest::offsetZ;
+size_t Array3fTest::viewLenX, Array3fTest::viewLenY, Array3fTest::viewLenZ;
diff --git a/tests/array/utils/gtest4dTest.h b/tests/array/utils/gtest4dTest.h
new file mode 100644
index 00000000..2d719fca
--- /dev/null
+++ b/tests/array/utils/gtest4dTest.h
@@ -0,0 +1,64 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR double
+
+const size_t ARRAY_SIZE_X = 14;
+const size_t ARRAY_SIZE_Y = 9;
+const size_t ARRAY_SIZE_Z = 7;
+const size_t ARRAY_SIZE_W = 6;
+
+class Array4dTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array4d::iterator iter) {
+    return iter.i() + ARRAY_SIZE_X * iter.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * iter.k() + ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z * iter.l();
+  }
+  size_t IterAbsOffset(array4d::const_iterator citer) {
+    return citer.i() + ARRAY_SIZE_X * citer.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * citer.k() + ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z * citer.l();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, true);
+    arr2.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offsetX = 5;
+    viewLenX = 3;
+    EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+    offsetY = 1;
+    viewLenY = 3;
+    EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+    offsetZ = 0;
+    viewLenZ = 2;
+    EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+    offsetW = 1;
+    viewLenW = 4;
+    EXPECT_LT(offsetW + viewLenW, arr.size_w());
+  }
+
+  static array4d arr, arr2;
+  static array4d::pointer ptr, ptr2;
+  static array4d::const_pointer cptr, cptr2;
+  static array4d::iterator iter, iter2;
+  static array4d::const_iterator citer, citer2;
+  static size_t offsetX, offsetY, offsetZ, offsetW;
+  static size_t viewLenX, viewLenY, viewLenZ, viewLenW;
+};
+
+array4d Array4dTest::arr(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, ZFP_RATE_PARAM_BITS);
+array4d Array4dTest::arr2(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, ZFP_RATE_PARAM_BITS);
+array4d::pointer Array4dTest::ptr, Array4dTest::ptr2;
+array4d::const_pointer Array4dTest::cptr, Array4dTest::cptr2;
+array4d::iterator Array4dTest::iter, Array4dTest::iter2;
+array4d::const_iterator Array4dTest::citer, Array4dTest::citer2;
+size_t Array4dTest::offsetX, Array4dTest::offsetY, Array4dTest::offsetZ, Array4dTest::offsetW;
+size_t Array4dTest::viewLenX, Array4dTest::viewLenY, Array4dTest::viewLenZ, Array4dTest::viewLenW;
diff --git a/tests/array/utils/gtest4fTest.h b/tests/array/utils/gtest4fTest.h
new file mode 100644
index 00000000..9b0cf581
--- /dev/null
+++ b/tests/array/utils/gtest4fTest.h
@@ -0,0 +1,64 @@
+#include "gtest/gtest.h"
+
+extern "C" {
+  #include "constants/universalConsts.h"
+}
+
+#define SCALAR float
+
+const size_t ARRAY_SIZE_X = 14;
+const size_t ARRAY_SIZE_Y = 9;
+const size_t ARRAY_SIZE_Z = 7;
+const size_t ARRAY_SIZE_W = 6;
+
+class Array4fTest : public ::testing::Test {
+public:
+  size_t IterAbsOffset(array4f::iterator iter) {
+    return iter.i() + ARRAY_SIZE_X * iter.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * iter.k() + ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z * iter.l();
+  }
+  size_t IterAbsOffset(array4f::const_iterator citer) {
+    return citer.i() + ARRAY_SIZE_X * citer.j() + ARRAY_SIZE_X * ARRAY_SIZE_Y * citer.k() + ARRAY_SIZE_X * ARRAY_SIZE_Y * ARRAY_SIZE_Z * citer.l();
+  }
+
+protected:
+  virtual void SetUp() {
+    arr.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, true);
+    arr2.resize(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, true);
+
+    arr.set_rate(ZFP_RATE_PARAM_BITS);
+    arr2.set_rate(ZFP_RATE_PARAM_BITS);
+
+    offsetX = 5;
+    viewLenX = 3;
+    EXPECT_LT(offsetX + viewLenX, arr.size_x());
+
+    offsetY = 1;
+    viewLenY = 3;
+    EXPECT_LT(offsetY + viewLenY, arr.size_y());
+
+    offsetZ = 0;
+    viewLenZ = 2;
+    EXPECT_LT(offsetZ + viewLenZ, arr.size_z());
+
+    offsetW = 1;
+    viewLenW = 4;
+    EXPECT_LT(offsetW + viewLenW, arr.size_w());
+  }
+
+  static array4f arr, arr2;
+  static array4f::pointer ptr, ptr2;
+  static array4f::const_pointer cptr, cptr2;
+  static array4f::iterator iter, iter2;
+  static array4f::const_iterator citer, citer2;
+  static size_t offsetX, offsetY, offsetZ, offsetW;
+  static size_t viewLenX, viewLenY, viewLenZ, viewLenW;
+};
+
+array4f Array4fTest::arr(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, ZFP_RATE_PARAM_BITS);
+array4f Array4fTest::arr2(ARRAY_SIZE_X, ARRAY_SIZE_Y, ARRAY_SIZE_Z, ARRAY_SIZE_W, ZFP_RATE_PARAM_BITS);
+array4f::pointer Array4fTest::ptr, Array4fTest::ptr2;
+array4f::const_pointer Array4fTest::cptr, Array4fTest::cptr2;
+array4f::iterator Array4fTest::iter, Array4fTest::iter2;
+array4f::const_iterator Array4fTest::citer, Array4fTest::citer2;
+size_t Array4fTest::offsetX, Array4fTest::offsetY, Array4fTest::offsetZ, Array4fTest::offsetW;
+size_t Array4fTest::viewLenX, Array4fTest::viewLenY, Array4fTest::viewLenZ, Array4fTest::viewLenW;
diff --git a/tests/array/utils/gtestBaseFixture.h b/tests/array/utils/gtestBaseFixture.h
new file mode 100644
index 00000000..82b6be9a
--- /dev/null
+++ b/tests/array/utils/gtestBaseFixture.h
@@ -0,0 +1,149 @@
+#include "gtest/gtest.h"
+
+#include <cmath>
+#include <tuple>
+#include <type_traits>
+
+// assumes a constants/<dim><type>.h is already included
+
+class ArrayNdTestFixture : public ::testing::TestWithParam<int> {
+protected:
+  double getRate() { return std::ldexp(1.0, GetParam() + 3); }
+};
+
+
+
+typedef std::tuple<int,int,int> testConfig;
+
+#define TEST_RATE zfp_mode_fixed_rate
+#define TEST_PREC zfp_mode_fixed_precision
+#define TEST_ACCU zfp_mode_fixed_accuracy
+#define TEST_RVRS zfp_mode_reversible
+
+#define TEST_INDEX_IMP 0
+#define TEST_INDEX_VRB 1
+#define TEST_INDEX_HY4 2
+#define TEST_INDEX_HY8 3
+
+#define TEST_INDEX_TYPE_IMP zfp::index::implicit
+#define TEST_INDEX_TYPE_VRB zfp::index::verbatim
+#define TEST_INDEX_TYPE_HY4 zfp::index::hybrid4
+#define TEST_INDEX_TYPE_HY8 zfp::index::hybrid8
+
+class CArrayNdTestFixture : public ::testing::TestWithParam<testConfig> {
+protected:
+  static double         getRate(int param)      { return std::ldexp(1.0, param + 3); }
+  static unsigned int   getPrecision(int param) { return 1u << (param + 3); }
+  static double         getTolerance(int param) { return std::ldexp(1.0, -(1u << param)); }
+
+  // get(0): config mode selection
+  // get(1): config mode value selection
+  // get(2): block index type selection
+  zfp_config getConfig()
+  {
+    zfp_config config;
+
+    switch(std::get<0>(GetParam())) {
+      case zfp_mode_fixed_rate:
+      {
+        //TODO: check with/without align?
+        config = zfp_config_rate(getRate(std::get<1>(GetParam())), true);
+        break;
+      }
+      case zfp_mode_fixed_precision:
+      {
+        config = zfp_config_precision(getPrecision(std::get<1>(GetParam())));
+        break;
+      }
+      case zfp_mode_fixed_accuracy:
+      {
+        config = zfp_config_accuracy(getTolerance(std::get<1>(GetParam())));
+        break;
+      }
+      case zfp_mode_reversible:
+      {
+        config = zfp_config_reversible();
+        break;
+      }
+      case zfp_mode_expert:
+      {
+        //TODO: do we need this one?
+        //config = zfp_config_expert(uint minbits, uint maxbits, uint maxprec, int minexp);
+        //break;
+      }
+      default:
+      {
+        config = zfp_config_none();
+        break;
+      }
+    }
+    return config;
+  }
+
+public:
+  struct PrintToStringParamName
+  {
+    static std::string IndexToStr(int idx)
+    {
+      switch (idx)
+      {
+        case TEST_INDEX_IMP:
+        {
+            return "Implicit";
+        }
+        case TEST_INDEX_VRB:
+        {
+            return "Verbatim";
+        }
+        case TEST_INDEX_HY4:
+        {
+            return "Hybrid4";
+        }
+        case TEST_INDEX_HY8:
+        {
+            return "Hybrid8";
+        }
+        default:
+        {
+            return "BadIdxType";
+        }
+      }
+    }
+
+    template <class ParamType>
+    std::string operator()(const testing::TestParamInfo<ParamType>& info) const
+    {
+       std::stringstream out;
+       switch(std::get<0>(info.param))
+       {
+          case zfp_mode_fixed_rate:
+          {
+             out << "Fixed_Rate_val" << std::get<1>(info.param) << "_idx" << IndexToStr(std::get<2>(info.param));
+             break;
+          }
+          case zfp_mode_fixed_precision:
+          {
+             out << "Fixed_Precision_val" << std::get<1>(info.param) << "_idx" << IndexToStr(std::get<2>(info.param));
+             break;
+          }
+          case zfp_mode_fixed_accuracy:
+          {
+             out << "Fixed_Accuracy_val" << std::get<1>(info.param) << "_idx" << IndexToStr(std::get<2>(info.param));
+             break;
+          }
+          case zfp_mode_reversible:
+          {
+             out << "Reversible_idx" << IndexToStr(std::get<2>(info.param));
+             break;
+          }
+          case zfp_mode_expert:
+          {
+             out << "Expert_val" << std::get<1>(info.param) << "_idx" << IndexToStr(std::get<2>(info.param));
+             break;
+          }
+       }
+       return out.str();
+    }
+  };
+
+};
diff --git a/tests/array/utils/gtestCApiTest.h b/tests/array/utils/gtestCApiTest.h
new file mode 100644
index 00000000..b3caff7b
--- /dev/null
+++ b/tests/array/utils/gtestCApiTest.h
@@ -0,0 +1,31 @@
+#include "gtest/gtest.h"
+#include "commonMacros.h"
+
+class ZfpArrayConstructTest : public ::testing::Test {
+protected:
+  virtual void SetUp() {
+    size_t num_64bit_entries = DIV_ROUND_UP(ZFP_HEADER_SIZE_BITS, CHAR_BIT * sizeof(uint64));
+    buffer = new uint64[num_64bit_entries];
+
+    bs = stream_open(buffer, num_64bit_entries * sizeof(uint64));
+    stream = zfp_stream_open(bs);
+    field = zfp_field_alloc();
+  }
+
+  virtual void TearDown() {
+    zfp_field_free(field);
+    zfp_stream_close(stream);
+    stream_close(bs);
+    delete[] buffer;
+  }
+
+  static uint64* buffer;
+  static bitstream* bs;
+  static zfp_stream* stream;
+  static zfp_field* field;
+};
+
+uint64* ZfpArrayConstructTest::buffer;
+bitstream* ZfpArrayConstructTest::bs;
+zfp_stream* ZfpArrayConstructTest::stream;
+zfp_field* ZfpArrayConstructTest::field;
diff --git a/tests/array/utils/gtestDoubleEnv.h b/tests/array/utils/gtestDoubleEnv.h
new file mode 100644
index 00000000..a214b25b
--- /dev/null
+++ b/tests/array/utils/gtestDoubleEnv.h
@@ -0,0 +1,50 @@
+#include "gtest/gtest.h"
+#include "zfp.h"
+#include "commonMacros.h"
+
+extern "C" {
+  #include "utils/genSmoothRandNums.h"
+}
+
+#define SCALAR double
+#define ZFP_TYPE zfp_type_double
+
+const size_t MIN_TOTAL_ELEMENTS = 1000000;
+
+size_t inputDataSideLen, inputDataTotalLen;
+size_t dimLens[4];
+double* inputDataArr;
+
+uint64* buffer;
+bitstream* bs;
+zfp_stream* stream;
+zfp_field* field;
+
+class ArrayDoubleTestEnv : public ::testing::Environment {
+public:
+  virtual int getDims() = 0;
+
+  virtual void SetUp() {
+    generateSmoothRandDoubles(MIN_TOTAL_ELEMENTS, getDims(), &inputDataArr, &inputDataSideLen, &inputDataTotalLen);
+
+    for (int i = 0; i < 4; i++) {
+      dimLens[i] = (i < getDims()) ? inputDataSideLen : 0;
+    }
+
+    size_t num_64bit_entries = DIV_ROUND_UP(ZFP_HEADER_SIZE_BITS, CHAR_BIT * sizeof(uint64));
+    buffer = new uint64[num_64bit_entries];
+
+    bs = stream_open(buffer, num_64bit_entries * sizeof(uint64));
+    stream = zfp_stream_open(bs);
+    field = zfp_field_alloc();
+  }
+
+  virtual void TearDown() {
+    free(inputDataArr);
+
+    zfp_field_free(field);
+    zfp_stream_close(stream);
+    stream_close(bs);
+    delete[] buffer;
+  }
+};
diff --git a/tests/array/utils/gtestFloatEnv.h b/tests/array/utils/gtestFloatEnv.h
new file mode 100644
index 00000000..1f31b34f
--- /dev/null
+++ b/tests/array/utils/gtestFloatEnv.h
@@ -0,0 +1,50 @@
+#include "gtest/gtest.h"
+#include "zfp.h"
+#include "commonMacros.h"
+
+extern "C" {
+  #include "utils/genSmoothRandNums.h"
+}
+
+#define SCALAR float
+#define ZFP_TYPE zfp_type_float
+
+const size_t MIN_TOTAL_ELEMENTS = 1000000;
+
+size_t inputDataSideLen, inputDataTotalLen;
+size_t dimLens[4];
+float* inputDataArr;
+
+uint64* buffer;
+bitstream* bs;
+zfp_stream* stream;
+zfp_field* field;
+
+class ArrayFloatTestEnv : public ::testing::Environment {
+public:
+  virtual int getDims() = 0;
+
+  virtual void SetUp() {
+    generateSmoothRandFloats(MIN_TOTAL_ELEMENTS, getDims(), &inputDataArr, &inputDataSideLen, &inputDataTotalLen);
+
+    for (int i = 0; i < 4; i++) {
+      dimLens[i] = (i < getDims()) ? inputDataSideLen : 0;
+    }
+
+    size_t num_64bit_entries = DIV_ROUND_UP(ZFP_HEADER_SIZE_BITS, CHAR_BIT * sizeof(uint64));
+    buffer = new uint64[num_64bit_entries];
+
+    bs = stream_open(buffer, num_64bit_entries * sizeof(uint64));
+    stream = zfp_stream_open(bs);
+    field = zfp_field_alloc();
+  }
+
+  virtual void TearDown() {
+    free(inputDataArr);
+
+    zfp_field_free(field);
+    zfp_stream_close(stream);
+    stream_close(bs);
+    delete[] buffer;
+  }
+};
diff --git a/tests/array/utils/gtestSingleFixture.h b/tests/array/utils/gtestSingleFixture.h
new file mode 100644
index 00000000..b76353b0
--- /dev/null
+++ b/tests/array/utils/gtestSingleFixture.h
@@ -0,0 +1,6 @@
+#include "gtest/gtest.h"
+
+class TestFixture : public ::testing::TestWithParam<int> {
+protected:
+  virtual void SetUp() {}
+};
diff --git a/tests/array/utils/gtestTestEnv.h b/tests/array/utils/gtestTestEnv.h
new file mode 100644
index 00000000..3451027f
--- /dev/null
+++ b/tests/array/utils/gtestTestEnv.h
@@ -0,0 +1,8 @@
+#include "gtest/gtest.h"
+
+class TestEnv : public ::testing::Environment {
+public:
+  virtual void SetUp() {}
+
+  virtual void TearDown() {}
+};
diff --git a/tests/array/utils/predicates.h b/tests/array/utils/predicates.h
new file mode 100644
index 00000000..347143d9
--- /dev/null
+++ b/tests/array/utils/predicates.h
@@ -0,0 +1,59 @@
+#include "zfp/array1.hpp"
+
+#include "gtest/gtest.h"
+
+testing::AssertionResult ExpectEqPrintHexPred(const char* expected_expr, const char* actual_expr, uint64 expected, uint64 actual)
+{
+  if (actual == expected)
+    return testing::AssertionSuccess();
+
+  std::stringstream ss, msg;
+  std::string expected_str, actual_str;
+
+  ss.str("");
+  ss << std::showbase << std::hex << expected;
+  expected_str = ss.str();
+
+  ss.str("");
+  ss << std::showbase << std::hex << actual;
+  actual_str = ss.str();
+
+  msg << "\t  Expected: " << expected_expr;
+  if (expected_str != expected_expr) {
+    msg << "\n\t  Which is: " << std::showbase << std::hex << expected;
+  }
+  msg << "\nTo be equal to: " << actual_expr;
+  if (actual_str != actual_expr) {
+    msg << "\n\t  Which is: " << std::showbase << std::hex << actual;
+  }
+
+  return testing::AssertionFailure() << msg.str();
+}
+
+testing::AssertionResult ExpectNeqPrintHexPred(const char* expected_expr, const char* actual_expr, uint64 expected, uint64 actual)
+{
+  if (actual != expected)
+    return testing::AssertionSuccess();
+
+  std::stringstream ss, msg;
+  std::string expected_str, actual_str;
+
+  ss.str("");
+  ss << std::showbase << std::hex << expected;
+  expected_str = ss.str();
+
+  ss.str("");
+  ss << std::showbase << std::hex << actual;
+  actual_str = ss.str();
+
+  msg << "\t  Expected: " << expected_expr;
+  if (expected_str != expected_expr) {
+    msg << "\n\t  Which is: " << std::showbase << std::hex << expected;
+  }
+  msg << "\nNot to be equal to: " << actual_expr;
+  if (actual_str != actual_expr) {
+    msg << "\n\t  Which is: " << std::showbase << std::hex << actual;
+  }
+
+  return testing::AssertionFailure() << msg.str();
+}
diff --git a/tests/array/zfp/CMakeLists.txt b/tests/array/zfp/CMakeLists.txt
new file mode 100644
index 00000000..6d194a78
--- /dev/null
+++ b/tests/array/zfp/CMakeLists.txt
@@ -0,0 +1,6 @@
+if(ZFP_WITH_ALIGNED_ALLOC)
+  add_executable(testAlignedMemory testAlignedMemory.cpp)
+  target_link_libraries(testAlignedMemory gtest gtest_main zfp)
+  target_compile_definitions(testAlignedMemory PRIVATE ${zfp_compressed_array_defs})
+  add_test(NAME testAlignedMemory COMMAND testAlignedMemory)
+endif()
diff --git a/tests/array/zfp/testAlignedMemory.cpp b/tests/array/zfp/testAlignedMemory.cpp
new file mode 100644
index 00000000..0c94a5d2
--- /dev/null
+++ b/tests/array/zfp/testAlignedMemory.cpp
@@ -0,0 +1,34 @@
+#include "zfp/array3.hpp"
+using namespace zfp;
+
+#include "gtest/gtest.h"
+#include "../utils/gtestTestEnv.h"
+#include "../utils/gtestSingleFixture.h"
+#include "../utils/predicates.h"
+
+#include <stdint.h>
+
+TestEnv* const testEnv = new TestEnv;
+
+class AlignedMemoryTest : public TestFixture {};
+
+#define TEST_FIXTURE AlignedMemoryTest
+
+INSTANTIATE_TEST_SUITE_P(TestManyMemoryAlignments, TEST_FIXTURE, ::testing::Range(4, 11));
+
+TEST_P(TEST_FIXTURE, when_allocateAlignedMem_expect_addressAligned)
+{
+  size_t alignmentBytes = (size_t)(1u << GetParam());
+  void* ptr = allocate_aligned(30, alignmentBytes);
+
+  uintptr_t address = (uintptr_t)ptr;
+  EXPECT_EQ(address % alignmentBytes, 0);
+
+  deallocate_aligned(ptr);
+}
+
+int main(int argc, char* argv[]) {
+  ::testing::InitGoogleTest(&argc, argv);
+  static_cast<void>(::testing::AddGlobalTestEnvironment(testEnv));
+  return RUN_ALL_TESTS();
+}
diff --git a/tests/cfp/CMakeLists.txt b/tests/cfp/CMakeLists.txt
new file mode 100644
index 00000000..405d3ee5
--- /dev/null
+++ b/tests/cfp/CMakeLists.txt
@@ -0,0 +1,22 @@
+function(cfp_add_test dims type bits)
+  set(test_name testCfpArray${dims}${type})
+  add_executable(${test_name} ${test_name}.c)
+  target_link_libraries(${test_name}
+    cmocka cfp zfpHashLib genSmoothRandNumsLib zfpChecksumsLib)
+  add_test(NAME ${test_name} COMMAND ${test_name})
+endfunction()
+
+cfp_add_test(1 f 32)
+cfp_add_test(2 f 32)
+cfp_add_test(3 f 32)
+cfp_add_test(4 f 32)
+cfp_add_test(1 d 64)
+cfp_add_test(2 d 64)
+cfp_add_test(3 d 64)
+cfp_add_test(4 d 64)
+
+if(DEFINED CFP_NAMESPACE)
+  add_executable(testCfpNamespace testCfpNamespace.c)
+  target_link_libraries(testCfpNamespace cmocka cfp)
+  add_test(NAME testCfpNamespace COMMAND testCfpNamespace)
+endif()
diff --git a/tests/cfp/testCfpArray1_source.c b/tests/cfp/testCfpArray1_source.c
new file mode 100644
index 00000000..35d47a11
--- /dev/null
+++ b/tests/cfp/testCfpArray1_source.c
@@ -0,0 +1,580 @@
+// ###############
+// cfp_array tests
+// ###############
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ctor_expect_paramsSet)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), bundle->totalDataLen);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr) >= bundle->rate);
+
+  uchar* compressedPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+  assert_int_not_equal(hashBitstream((uint64*)compressedPtr, compressedSize), 0);
+
+  // sets a minimum cache size
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.cache_size(cfpArr) >= bundle->csize);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _header_expect_matchingMetadata)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.scalar_type(srcCfpHdr), SCALAR_TYPE);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.dimensionality(srcCfpHdr), 1);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.rate(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.rate(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_x(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size(srcCfpArr)),
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_resize_expect_sizeChanged)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t newSize = 999;
+  assert_int_not_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.resize(cfpArr, newSize, 1);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSize);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_set_expect_entryWrittenToCacheOnly)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  // getting the ptr automatically flushes cache, so do this before setting an entry
+  uchar* compressedDataPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+
+  uchar* oldMemory = malloc(compressedSize * sizeof(uchar));
+  memcpy(oldMemory, compressedDataPtr, compressedSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, 1, VAL);
+
+  assert_memory_equal(compressedDataPtr, oldMemory, compressedSize);
+  free(oldMemory);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, VAL);
+
+  // dirty cache doesn't immediately apply compression
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ref_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ptr_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_begin_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_end_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.end(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, SIZE_X);
+}
+
+
+// #############
+// cfp_ref tests
+// #############
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i);
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.reference.get(cfpArrRef) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_set_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.set(cfpArrRef, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_copy_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i1, VAL);
+  CFP_REF_TYPE cfpArrRef_a = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i1);
+  CFP_REF_TYPE cfpArrRef_b = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i2);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.copy(cfpArrRef_b, cfpArrRef_a);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i2) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_ptr_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.reference.ptr(cfpArrRef);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArrPtr.reference.array.object);
+}
+
+
+// #############
+// cfp_ptr tests
+// #############
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_set_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set(cfpArrPtr, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_at_set_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, io = 3;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set_at(cfpArrPtr, io, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref(cfpArrPtr);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  size_t oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref_at(cfpArrPtr, oi);
+
+  assert_int_equal(cfpArrPtr.reference.x + oi, cfpArrRef.x);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 5;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.lt(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 5;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.gt(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 5;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 5;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_eq_expect_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.eq(cfpArrPtrA, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_neq_expect_not_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 5;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.neq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 5;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.pointer.distance(cfpArrPtrA, cfpArrPtrB), (int)cfpArrPtrB.reference.x - (int)cfpArrPtrA.reference.x);
+  assert_ptr_equal(cfpArrPtrA.reference.array.object, cfpArrPtrB.reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.next(cfpArrPtr, oi);
+
+  assert_int_equal(cfpArrPtr.reference.x, i + oi);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 15, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.prev(cfpArrPtr, oi);
+
+  assert_int_equal(cfpArrPtr.reference.x, i - oi);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.inc(cfpArrPtr);
+
+  assert_int_equal(cfpArrPtr.reference.x, i + 1);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.dec(cfpArrPtr);
+
+  assert_int_equal(cfpArrPtr.reference.x, i - 1);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i).reference.array.object);
+}
+
+
+// ##############
+// cfp_iter tests
+// ##############
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref(cfpArrIter);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 5;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, io);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr(cfpArrIter);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 5;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, io);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.inc(cfpArrIter);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 1);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 4;
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.dec(cfpArrIter);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 4);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 3);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 7);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 7);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.prev(cfpArrIter, 3);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 4);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 3);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 7);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter1, cfpArrIter2), 4);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter2, CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr)), -7);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 3);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 7);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.lt(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 3);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 7);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.gt(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 3);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 7);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 3);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 7);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_get_index_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  size_t idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.i(cfpArrIter);
+
+  assert_int_equal(idx, 0u);
+}
diff --git a/tests/cfp/testCfpArray1d.c b/tests/cfp/testCfpArray1d.c
new file mode 100644
index 00000000..0520ae04
--- /dev/null
+++ b/tests/cfp/testCfpArray1d.c
@@ -0,0 +1,101 @@
+#include "src/traitsd.h"
+#include "src/block1.h"
+
+#include "constants/1dDouble.h"
+
+#define CFP_ARRAY_TYPE cfp_array1d
+#define CFP_REF_TYPE cfp_ref1d
+#define CFP_PTR_TYPE cfp_ptr1d
+#define CFP_ITER_TYPE cfp_iter1d
+#define SUB_NAMESPACE array1d
+#define SCALAR double
+#define SCALAR_TYPE zfp_type_double
+#define DIMENSIONALITY 1
+
+#include "testCfpArray_source.c"
+#include "testCfpArray1_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array1d_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref1d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref1d_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref1d_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref1d_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1d_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+
+    // fixed rate rounds up to multiples of 16 (omit fixed rate 8)
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate2, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate2, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray1f.c b/tests/cfp/testCfpArray1f.c
new file mode 100644
index 00000000..84576655
--- /dev/null
+++ b/tests/cfp/testCfpArray1f.c
@@ -0,0 +1,100 @@
+#include "src/traitsf.h"
+#include "src/block1.h"
+
+#include "constants/1dFloat.h"
+
+#define CFP_ARRAY_TYPE cfp_array1f
+#define CFP_REF_TYPE cfp_ref1f
+#define CFP_PTR_TYPE cfp_ptr1f
+#define CFP_ITER_TYPE cfp_iter1f
+#define SUB_NAMESPACE array1f
+#define SCALAR float
+#define SCALAR_TYPE zfp_type_float
+#define DIMENSIONALITY 1
+
+#include "testCfpArray_source.c"
+#include "testCfpArray1_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array1f_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref1f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref1f_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref1f_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref1f_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr1f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter1f_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    // fixed rate rounds up to multiples of 16 (omit fixed rate 8)
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate2, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array1f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate2, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray2_source.c b/tests/cfp/testCfpArray2_source.c
new file mode 100644
index 00000000..6eaa3e16
--- /dev/null
+++ b/tests/cfp/testCfpArray2_source.c
@@ -0,0 +1,634 @@
+// ###############
+// cfp_array tests
+// ###############
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ctor_expect_paramsSet)(void **state)
+{
+  struct setupVars *bundle = *state;
+  size_t csize = 300;
+  CFP_ARRAY_TYPE cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->dataSideLen, bundle->rate, bundle->dataArr, csize);
+  assert_non_null(cfpArr.object);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), bundle->totalDataLen);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr) >= bundle->rate);
+
+  uchar* compressedPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+  assert_int_not_equal(hashBitstream((uint64*)compressedPtr, compressedSize), 0);
+
+  // sets a minimum cache size
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.cache_size(cfpArr) >= csize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(cfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _header_expect_matchingMetadata)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.scalar_type(srcCfpHdr), SCALAR_TYPE);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.dimensionality(srcCfpHdr), 2);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.rate(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.rate(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_x(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_x(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_y(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_y(srcCfpArr));
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_resize_expect_sizeChanged)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t newSizeX = 81, newSizeY = 123;
+  assert_int_not_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSizeX * newSizeY);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.resize(cfpArr, newSizeX, newSizeY, 1);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr), newSizeX);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr), newSizeY);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSizeX * newSizeY);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_set_expect_entryWrittenToCacheOnly)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  // getting the ptr automatically flushes cache, so do this before setting an entry
+  uchar* compressedDataPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+
+  uchar* oldMemory = malloc(compressedSize * sizeof(uchar));
+  memcpy(oldMemory, compressedDataPtr, compressedSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, 1, 1, (SCALAR)VAL);
+
+  assert_memory_equal(compressedDataPtr, oldMemory, compressedSize);
+  free(oldMemory);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, j, (SCALAR)VAL);
+
+  // dirty cache doesn't immediately apply compression
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i, j) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ref_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ptr_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_begin_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+  assert_int_equal(cfpArrIter.y, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_end_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.end(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+  assert_int_equal(cfpArrIter.y, SIZE_Y);
+}
+
+
+// #############
+// cfp_ref tests
+// #############
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j);
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, j, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.reference.get(cfpArrRef) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_set_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.set(cfpArrRef, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i, j) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_copy_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, j1 = 2, i2 = 2, j2 = 1;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i1, j1, VAL);
+  CFP_REF_TYPE cfpArrRef_a = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i1, j1);
+  CFP_REF_TYPE cfpArrRef_b = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i2, j2);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.copy(cfpArrRef_b, cfpArrRef_a);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i2, j2) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_ptr_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.reference.ptr(cfpArrRef);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArrPtr.reference.array.object);
+}
+
+
+// #############
+// cfp_ptr tests
+// #############
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_set_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set(cfpArrPtr, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_at_set_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, io = 3;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set_at(cfpArrPtr, io, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref(cfpArrPtr);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  size_t oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref_at(cfpArrPtr, oi);
+
+  assert_int_equal(cfpArrPtr.reference.x + oi, cfpArrRef.x);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.lt(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.gt(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_eq_expect_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1;
+  size_t j1 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.eq(cfpArrPtrA, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_neq_expect_not_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 2, j2 = 1;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.neq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 2, j2 = 1;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2);
+
+  assert_int_equal((int)CFP_NAMESPACE.SUB_NAMESPACE.pointer.distance(cfpArrPtrA, cfpArrPtrB),
+                   (int)(i2 +
+                         j2*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) -
+                   (int)(i1 +
+                         j1*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)));
+  assert_ptr_equal(cfpArrPtrA.reference.array.object, cfpArrPtrB.reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.next(cfpArrPtr, oi);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * j) + oi;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 8, j = 2, oi = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.prev(cfpArrPtr, oi);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * j) - oi;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.inc(cfpArrPtr);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * j) + 1;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.dec(cfpArrPtr);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * j) - 1;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j).reference.array.object);
+}
+
+
+// ##############
+// cfp_iter tests
+// ##############
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref(cfpArrIter);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 0);
+  assert_int_equal(cfpArrRef.y, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 21;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 5);
+  assert_int_equal(cfpArrRef.y, 1);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr(cfpArrIter);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 0);
+  assert_int_equal(cfpArrPtr.reference.y, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 21;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 5);
+  assert_int_equal(cfpArrPtr.reference.y, 1);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.inc(cfpArrIter);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 1);
+  assert_int_equal(cfpArrIter.y, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 4;
+  cfpArrIter.y = 0;
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.dec(cfpArrIter);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 3);
+  assert_int_equal(cfpArrIter.y, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 16);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 15);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 7);
+  assert_int_equal(cfpArrIter.y, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 31);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.prev(cfpArrIter, 15);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 4);
+  assert_int_equal(cfpArrIter.y, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 15);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 31);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter1, cfpArrIter2), 16);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter2, CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr)), -31);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 15);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 31);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.lt(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 15);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 31);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.gt(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 15);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 31);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 15);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 31);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_get_index_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 1;
+  cfpArrIter.y = 3;
+
+  size_t i_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.i(cfpArrIter);
+  size_t j_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.j(cfpArrIter);
+
+  assert_int_equal(i_idx, 1u);
+  assert_int_equal(j_idx, 3u);
+}
diff --git a/tests/cfp/testCfpArray2d.c b/tests/cfp/testCfpArray2d.c
new file mode 100644
index 00000000..c6b57b3f
--- /dev/null
+++ b/tests/cfp/testCfpArray2d.c
@@ -0,0 +1,101 @@
+#include "src/traitsd.h"
+#include "src/block2.h"
+
+#include "constants/2dDouble.h"
+
+#define CFP_ARRAY_TYPE cfp_array2d
+#define CFP_REF_TYPE cfp_ref2d
+#define CFP_PTR_TYPE cfp_ptr2d
+#define CFP_ITER_TYPE cfp_iter2d
+#define SUB_NAMESPACE array2d
+#define SCALAR double
+#define SCALAR_TYPE zfp_type_double
+#define DIMENSIONALITY 2
+
+#include "testCfpArray_source.c"
+#include "testCfpArray2_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array2d_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref2d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref2d_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref2d_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref2d_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2d_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate2, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate2, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray2f.c b/tests/cfp/testCfpArray2f.c
new file mode 100644
index 00000000..b40a92ad
--- /dev/null
+++ b/tests/cfp/testCfpArray2f.c
@@ -0,0 +1,101 @@
+#include "src/traitsf.h"
+#include "src/block2.h"
+
+#include "constants/2dFloat.h"
+
+#define CFP_ARRAY_TYPE cfp_array2f
+#define CFP_REF_TYPE cfp_ref2f
+#define CFP_PTR_TYPE cfp_ptr2f
+#define CFP_ITER_TYPE cfp_iter2f
+#define SUB_NAMESPACE array2f
+#define SCALAR float
+#define SCALAR_TYPE zfp_type_float
+#define DIMENSIONALITY 2
+
+#include "testCfpArray_source.c"
+#include "testCfpArray2_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array2f_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref2f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref2f_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref2f_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref2f_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr2f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter2f_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate2, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array2f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate2, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray3_source.c b/tests/cfp/testCfpArray3_source.c
new file mode 100644
index 00000000..5fc36a3b
--- /dev/null
+++ b/tests/cfp/testCfpArray3_source.c
@@ -0,0 +1,662 @@
+// ###############
+// cfp_array tests
+// ###############
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ctor_expect_paramsSet)(void **state)
+{
+  struct setupVars *bundle = *state;
+  size_t csize = 300;
+  CFP_ARRAY_TYPE cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, bundle->rate, bundle->dataArr, csize);
+  assert_non_null(cfpArr.object);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), bundle->totalDataLen);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr) >= bundle->rate);
+
+  uchar* compressedPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+  assert_int_not_equal(hashBitstream((uint64*)compressedPtr, compressedSize), 0);
+
+  // sets a minimum cache size
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.cache_size(cfpArr) >= csize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(cfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _header_expect_matchingMetadata)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.scalar_type(srcCfpHdr), SCALAR_TYPE);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.dimensionality(srcCfpHdr), 3);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.rate(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.rate(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_x(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_x(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_y(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_y(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_z(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_z(srcCfpArr));
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_resize_expect_sizeChanged)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t newSizeX = 81, newSizeY = 123, newSizeZ = 14;
+  assert_int_not_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSizeX * newSizeY * newSizeZ);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.resize(cfpArr, newSizeX, newSizeY, newSizeZ, 1);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr), newSizeX);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr), newSizeY);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr), newSizeZ);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSizeX * newSizeY * newSizeZ);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_set_expect_entryWrittenToCacheOnly)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  // getting the ptr automatically flushes cache, so do this before setting an entry
+  uchar* compressedDataPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+
+  uchar* oldMemory = malloc(compressedSize * sizeof(uchar));
+  memcpy(oldMemory, compressedDataPtr, compressedSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, 1, 1, 1, (SCALAR)VAL);
+
+  assert_memory_equal(compressedDataPtr, oldMemory, compressedSize);
+  free(oldMemory);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, j, k, (SCALAR)VAL);
+
+  // dirty cache doesn't immediately apply compression
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i, j, k) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ref_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ptr_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_begin_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+  assert_int_equal(cfpArrIter.y, 0);
+  assert_int_equal(cfpArrIter.z, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_end_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.end(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+  assert_int_equal(cfpArrIter.y, 0);
+  assert_int_equal(cfpArrIter.z, SIZE_Z);
+}
+
+
+// #############
+// cfp_ref tests
+// #############
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k);
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, j, k, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.reference.get(cfpArrRef) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_set_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.set(cfpArrRef, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i, j, k) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_copy_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, j1 = 2, k1 = 1, i2 = 2, j2 = 1, k2 = 2;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i1, j1, k1, VAL);
+  CFP_REF_TYPE cfpArrRef_a = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i1, j1, k1);
+  CFP_REF_TYPE cfpArrRef_b = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i2, j2, k2);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.copy(cfpArrRef_b, cfpArrRef_a);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i2, j2, k2) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_ptr_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.reference.ptr(cfpArrRef);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArrPtr.reference.array.object);
+}
+
+
+// #############
+// cfp_ptr tests
+// #############
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_set_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 3;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set(cfpArrPtr, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_at_set_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 3, io = 4;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set_at(cfpArrPtr, io, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref(cfpArrPtr);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  size_t oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref_at(cfpArrPtr, oi);
+
+  assert_int_equal(cfpArrPtr.reference.x + oi, cfpArrRef.x);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.lt(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.gt(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_eq_expect_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, j1 = 2, k1 = 1;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.eq(cfpArrPtrA, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_neq_expect_not_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 2, j2 = 1;
+  size_t k1 = 1, k2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.neq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2, k1 = 1;
+  size_t j1 = 2, j2 = 1, k2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2);
+
+  assert_int_equal((int)CFP_NAMESPACE.SUB_NAMESPACE.pointer.distance(cfpArrPtrA, cfpArrPtrB),
+                   (int)(i2 +
+                         j2*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) +
+                         k2*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)) -
+                   (int)(i1 +
+                         j1*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) +
+                         k1*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)));
+  assert_ptr_equal(cfpArrPtrA.reference.array.object, cfpArrPtrB.reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.next(cfpArrPtr, oi);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * k)) + oi;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = (idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) % CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = idx / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr));
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 8, j = 4, k = 1, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.prev(cfpArrPtr, oi);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * k)) - oi;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = (idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) % CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = idx / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr));
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.inc(cfpArrPtr);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * k)) + 1;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = (idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) % CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = idx / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr));
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.dec(cfpArrPtr);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * k)) - 1;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = (idx / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) % CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = idx / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr));
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k).reference.array.object);
+}
+
+
+// ##############
+// cfp_iter tests
+// ##############
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref(cfpArrIter);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 0);
+  assert_int_equal(cfpArrRef.y, 0);
+  assert_int_equal(cfpArrRef.z, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 1749;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 5);
+  assert_int_equal(cfpArrRef.y, 1);
+  assert_int_equal(cfpArrRef.z, 4);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr(cfpArrIter);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 0);
+  assert_int_equal(cfpArrPtr.reference.y, 0);
+  assert_int_equal(cfpArrPtr.reference.z, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 1749;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 5);
+  assert_int_equal(cfpArrPtr.reference.y, 1);
+  assert_int_equal(cfpArrPtr.reference.z, 4);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.inc(cfpArrIter);
+
+  assert_int_equal(cfpArrIter.x, 1);
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 4;
+  cfpArrIter.y = 0;
+  cfpArrIter.z = 0;
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.dec(cfpArrIter);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 3);
+  assert_int_equal(cfpArrIter.y, 3);
+  assert_int_equal(cfpArrIter.z, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_next_expect_correct)(void **state)
+{ 
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 64);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 63);
+  
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 7);
+  assert_int_equal(cfpArrIter.y, 3);
+  assert_int_equal(cfpArrIter.z, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_prev_expect_correct)(void **state)
+{ 
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 127);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.prev(cfpArrIter, 63);
+  
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 4);
+  assert_int_equal(cfpArrIter.y, 0);
+  assert_int_equal(cfpArrIter.z, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 63);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 127);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter1, cfpArrIter2), 64);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter2, CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr)), -127);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_lt_expect_less)(void **state)
+{ 
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 63);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 127);
+  
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.lt(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_gt_expect_greater)(void **state)
+{ 
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 63);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 127);
+  
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.gt(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 63);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 127);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 63);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 127);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_get_index_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 1;
+  cfpArrIter.y = 3;
+  cfpArrIter.z = 2;
+
+  size_t i_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.i(cfpArrIter);
+  size_t j_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.j(cfpArrIter);
+  size_t k_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.k(cfpArrIter);
+
+  assert_int_equal(i_idx, 1u);
+  assert_int_equal(j_idx, 3u);
+  assert_int_equal(k_idx, 2u);
+}
diff --git a/tests/cfp/testCfpArray3d.c b/tests/cfp/testCfpArray3d.c
new file mode 100644
index 00000000..bc0265f3
--- /dev/null
+++ b/tests/cfp/testCfpArray3d.c
@@ -0,0 +1,101 @@
+#include "src/traitsd.h"
+#include "src/block3.h"
+
+#include "constants/3dDouble.h"
+
+#define CFP_ARRAY_TYPE cfp_array3d
+#define CFP_REF_TYPE cfp_ref3d
+#define CFP_PTR_TYPE cfp_ptr3d
+#define CFP_ITER_TYPE cfp_iter3d
+#define SUB_NAMESPACE array3d
+#define SCALAR double
+#define SCALAR_TYPE zfp_type_double
+#define DIMENSIONALITY 3
+
+#include "testCfpArray_source.c"
+#include "testCfpArray3_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array3d_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref3d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref3d_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref3d_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref3d_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3d_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate2, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate2, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray3f.c b/tests/cfp/testCfpArray3f.c
new file mode 100644
index 00000000..451658df
--- /dev/null
+++ b/tests/cfp/testCfpArray3f.c
@@ -0,0 +1,101 @@
+#include "src/traitsf.h"
+#include "src/block3.h"
+
+#include "constants/3dFloat.h"
+
+#define CFP_ARRAY_TYPE cfp_array3f
+#define CFP_REF_TYPE cfp_ref3f
+#define CFP_PTR_TYPE cfp_ptr3f
+#define CFP_ITER_TYPE cfp_iter3f
+#define SUB_NAMESPACE array3f
+#define SCALAR float
+#define SCALAR_TYPE zfp_type_float
+#define DIMENSIONALITY 3
+
+#include "testCfpArray_source.c"
+#include "testCfpArray3_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array3f_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref3f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref3f_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref3f_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref3f_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr3f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter3f_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate2, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate1, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array3f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate2, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray4_source.c b/tests/cfp/testCfpArray4_source.c
new file mode 100644
index 00000000..1f63ad0d
--- /dev/null
+++ b/tests/cfp/testCfpArray4_source.c
@@ -0,0 +1,692 @@
+// ###############
+// cfp_array tests
+// ###############
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ctor_expect_paramsSet)(void **state)
+{
+  struct setupVars *bundle = *state;
+  size_t csize = 300;
+  CFP_ARRAY_TYPE cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, bundle->rate, bundle->dataArr, csize);
+  assert_non_null(cfpArr.object);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), bundle->totalDataLen);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr) >= bundle->rate);
+
+  uchar* compressedPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+  assert_int_not_equal(hashBitstream((uint64*)compressedPtr, compressedSize), 0);
+
+  // sets a minimum cache size
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.cache_size(cfpArr) >= csize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(cfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _header_expect_matchingMetadata)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.scalar_type(srcCfpHdr), SCALAR_TYPE);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.dimensionality(srcCfpHdr), 4);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.rate(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.rate(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_x(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_x(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_y(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_y(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_z(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_z(srcCfpArr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_w(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.size_w(srcCfpArr));
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_resize_expect_sizeChanged)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t newSizeX = 81, newSizeY = 123, newSizeZ = 14, newSizeW = 6;
+  assert_int_not_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSizeX * newSizeY * newSizeZ * newSizeW);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.resize(cfpArr, newSizeX, newSizeY, newSizeZ, newSizeW, 1);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr), newSizeX);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr), newSizeY);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr), newSizeZ);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size_w(cfpArr), newSizeW);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr), newSizeX * newSizeY * newSizeZ * newSizeW);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_set_expect_entryWrittenToCacheOnly)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  // getting the ptr automatically flushes cache, so do this before setting an entry
+  uchar* compressedDataPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+
+  uchar* oldMemory = malloc(compressedSize * sizeof(uchar));
+  memcpy(oldMemory, compressedDataPtr, compressedSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, 1, 1, 1, 1, (SCALAR)VAL);
+
+  assert_memory_equal(compressedDataPtr, oldMemory, compressedSize);
+  free(oldMemory);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, j, k, l, (SCALAR)VAL);
+
+  // dirty cache doesn't immediately apply compression
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i, j, k, l) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ref_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k, l);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ptr_expect_arrayObjectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_begin_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+  assert_int_equal(cfpArrIter.y, 0);
+  assert_int_equal(cfpArrIter.z, 0);
+  assert_int_equal(cfpArrIter.w, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_end_expect_objectValid)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.end(cfpArr);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 0);
+  assert_int_equal(cfpArrIter.y, 0);
+  assert_int_equal(cfpArrIter.z, 0);
+  assert_int_equal(cfpArrIter.w, SIZE_W);
+}
+
+// #############
+// cfp_ref tests
+// #############
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_get_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k, l);
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i, j, k, l, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.reference.get(cfpArrRef) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_set_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k, l);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.set(cfpArrRef, VAL);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i, j, k, l) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_copy_expect_arrayUpdated)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, j1 = 2, k1 = 1, l1 = 1, i2 = 2, j2 = 1, k2 = 2, l2 = 2;
+  CFP_NAMESPACE.SUB_NAMESPACE.set(cfpArr, i1, j1, k1, l1, VAL);
+  CFP_REF_TYPE cfpArrRef_a = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i1, j1, k1, l1);
+  CFP_REF_TYPE cfpArrRef_b = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i2, j2, k2, l2);
+  CFP_NAMESPACE.SUB_NAMESPACE.reference.copy(cfpArrRef_b, cfpArrRef_a);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get(cfpArr, i2, j2, k2, l2) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_REF_TYPE, _when_ptr_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref(cfpArr, i, j, k, l);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.reference.ptr(cfpArrRef);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArrPtr.reference.array.object);
+}
+
+
+// #############
+// cfp_ptr tests
+// #############
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_set_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 3, l = 4;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set(cfpArrPtr, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_get_at_set_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 3, l = 4, io = 5;
+  SCALAR val = 5;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  CFP_NAMESPACE.SUB_NAMESPACE.pointer.set_at(cfpArrPtr, io, val);
+
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) < 1e-12);
+  assert_true(val - CFP_NAMESPACE.SUB_NAMESPACE.pointer.get_at(cfpArrPtr, io) > -1e-12);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_expect_addressMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref(cfpArrPtr);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  size_t oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.pointer.ref_at(cfpArrPtr, oi);
+
+  assert_int_equal(cfpArrPtr.reference.x + oi, cfpArrRef.x);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArrRef.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  size_t l1 = 1, l2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2, l2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.lt(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  size_t l1 = 1, l2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2, l2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.gt(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  size_t l1 = 1, l2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2, l2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 1, j2 = 2;
+  size_t k1 = 1, k2 = 2;
+  size_t l1 = 1, l2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2, l2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrA, cfpArrPtrA));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.geq(cfpArrPtrB, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_eq_expect_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, j1 = 2, k1 = 1, l1 = 1;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.eq(cfpArrPtrA, cfpArrPtrA));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_neq_expect_not_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2;
+  size_t j1 = 2, j2 = 1;
+  size_t k1 = 1, k2 = 2;
+  size_t l1 = 1, l2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2, l2);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.neq(cfpArrPtrA, cfpArrPtrB));
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i1 = 1, i2 = 2, k1 = 1, l1 = 1;
+  size_t j1 = 2, j2 = 1, k2 = 2, l2 = 2;
+  CFP_PTR_TYPE cfpArrPtrA = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i1, j1, k1, l1);
+  CFP_PTR_TYPE cfpArrPtrB = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i2, j2, k2, l2);
+
+  assert_int_equal((int)CFP_NAMESPACE.SUB_NAMESPACE.pointer.distance(cfpArrPtrA, cfpArrPtrB),
+                   (int)(i2 +
+                         j2*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) +
+                         k2*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) +
+                         l2*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr)) -
+                   (int)(i1 +
+                         j1*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) +
+                         k1*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) + 
+                         l1*CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)*CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr)));
+  assert_ptr_equal(cfpArrPtrA.reference.array.object, cfpArrPtrB.reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.next(cfpArrPtr, oi);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * (k + CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr) * l))) + oi;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = ((idx - x) / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) %  CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = ((idx - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x) / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)))            % CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr); 
+  size_t w = (idx - z * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x)            / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr)); 
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_int_equal(cfpArrPtr.reference.w, w);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 8, j = 4, k = 1, l = 1, oi = 10;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.prev(cfpArrPtr, oi);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * (k + CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr) * l))) - oi;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = ((idx - x) / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) %  CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = ((idx - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x) / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)))            % CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr); 
+  size_t w = (idx - z * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x)            / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr)); 
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_int_equal(cfpArrPtr.reference.w, w);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.inc(cfpArrPtr);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * (k + CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr) * l))) + 1; 
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = ((idx - x) / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) %  CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = ((idx - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x) / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)))            % CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr); 
+  size_t w = (idx - z * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x)            / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr)); 
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_int_equal(cfpArrPtr.reference.w, w);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l).reference.array.object);
+}
+
+static void
+_catFunc3(given_, CFP_PTR_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  size_t i = 1, j = 2, k = 1, l = 1;
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l);
+  cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.dec(cfpArrPtr);
+
+  size_t idx = (i + CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * (j + CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * (k + CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr) * l))) - 1;
+  size_t x = idx % CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr);
+  size_t y = ((idx - x) / CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr)) %  CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr);
+  size_t z = ((idx - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x) / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr)))            % CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr); 
+  size_t w = (idx - z * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - y * CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) - x)            / (CFP_NAMESPACE.SUB_NAMESPACE.size_x(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_y(cfpArr) * CFP_NAMESPACE.SUB_NAMESPACE.size_z(cfpArr)); 
+
+  assert_int_equal(cfpArrPtr.reference.x, x);
+  assert_int_equal(cfpArrPtr.reference.y, y);
+  assert_int_equal(cfpArrPtr.reference.z, z);
+  assert_int_equal(cfpArrPtr.reference.w, w);
+  assert_ptr_equal(cfpArrPtr.reference.array.object, CFP_NAMESPACE.SUB_NAMESPACE.ptr(cfpArr, i, j, k, l).reference.array.object);
+}
+
+
+// ##############
+// cfp_iter tests
+// ##############
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref(cfpArrIter);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 0);
+  assert_int_equal(cfpArrRef.y, 0);
+  assert_int_equal(cfpArrRef.z, 0);
+  assert_int_equal(cfpArrRef.w, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ref_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 38709;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ref_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrRef.array.object, cfpArr.object);
+  assert_int_equal(cfpArrRef.x, 5);
+  assert_int_equal(cfpArrRef.y, 1);
+  assert_int_equal(cfpArrRef.z, 4);
+  assert_int_equal(cfpArrRef.w, 4);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr(cfpArrIter);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 0);
+  assert_int_equal(cfpArrPtr.reference.y, 0);
+  assert_int_equal(cfpArrPtr.reference.z, 0);
+  assert_int_equal(cfpArrPtr.reference.w, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_ptr_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t io = 38709;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.iterator.ptr_at(cfpArrIter, io);
+
+  assert_ptr_equal(cfpArrPtr.reference.array.object, cfpArr.object);
+  assert_int_equal(cfpArrPtr.reference.x, 5);
+  assert_int_equal(cfpArrPtr.reference.y, 1);
+  assert_int_equal(cfpArrPtr.reference.z, 4);
+  assert_int_equal(cfpArrPtr.reference.w, 4);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_inc_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.inc(cfpArrIter);
+
+  assert_int_equal(cfpArrIter.x, 1);
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_dec_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 4;
+  cfpArrIter.y = 0;
+  cfpArrIter.z = 0;
+  cfpArrIter.w = 0;
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.dec(cfpArrIter);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 3);
+  assert_int_equal(cfpArrIter.y, 3);
+  assert_int_equal(cfpArrIter.z, 3);
+  assert_int_equal(cfpArrIter.w, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_next_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+ 
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 256);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 255);
+ 
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 7);
+  assert_int_equal(cfpArrIter.y, 3);
+  assert_int_equal(cfpArrIter.z, 3);
+  assert_int_equal(cfpArrIter.w, 3);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_prev_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+ 
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter, 511);
+  cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.prev(cfpArrIter, 255);
+
+  assert_ptr_equal(cfpArrIter.array.object, cfpArr.object);
+  assert_int_equal(cfpArrIter.x, 4);
+  assert_int_equal(cfpArrIter.y, 0);
+  assert_int_equal(cfpArrIter.z, 0);
+  assert_int_equal(cfpArrIter.w, 0);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_distance_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 255);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 511);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter1, cfpArrIter2), 256);
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.distance(cfpArrIter2, CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr)), -511);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_lt_expect_less)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+ 
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 255);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 511);
+ 
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.lt(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_gt_expect_greater)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+ 
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 255);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 511);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.gt(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_leq_expect_less_or_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 255);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 511);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.leq(cfpArrIter1, cfpArrIter2));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_geq_expect_greater_or_equal)(void **state)
+{ 
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter1, 255);
+  cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.iterator.next(cfpArrIter2, 511);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter1, cfpArrIter1));
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.geq(cfpArrIter2, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_get_index_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  cfpArrIter.x = 1;
+  cfpArrIter.y = 3;
+  cfpArrIter.z = 2;
+  cfpArrIter.w = 1;
+
+  size_t i_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.i(cfpArrIter);
+  size_t j_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.j(cfpArrIter);
+  size_t k_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.k(cfpArrIter);
+  size_t l_idx = CFP_NAMESPACE.SUB_NAMESPACE.iterator.l(cfpArrIter);
+
+  assert_int_equal(i_idx, 1u);
+  assert_int_equal(j_idx, 3u);
+  assert_int_equal(k_idx, 2u);
+  assert_int_equal(l_idx, 1u);
+}
diff --git a/tests/cfp/testCfpArray4d.c b/tests/cfp/testCfpArray4d.c
new file mode 100644
index 00000000..61ed9391
--- /dev/null
+++ b/tests/cfp/testCfpArray4d.c
@@ -0,0 +1,98 @@
+#include "src/traitsd.h"
+#include "src/block4.h"
+
+#include "constants/4dDouble.h"
+
+#define CFP_ARRAY_TYPE cfp_array4d
+#define CFP_REF_TYPE cfp_ref4d
+#define CFP_PTR_TYPE cfp_ptr4d
+#define CFP_ITER_TYPE cfp_iter4d
+#define SUB_NAMESPACE array4d
+#define SCALAR double
+#define SCALAR_TYPE zfp_type_double
+#define DIMENSIONALITY 4
+
+#include "testCfpArray_source.c"
+#include "testCfpArray4_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array4d_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref4d_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref4d_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref4d_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref4d_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4d_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    /* NOTE: 4D arrays only support 8bit rates so setupFixedRate1 and 2 aren't used for testing here */
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4d_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate0, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray4f.c b/tests/cfp/testCfpArray4f.c
new file mode 100644
index 00000000..56cc3817
--- /dev/null
+++ b/tests/cfp/testCfpArray4f.c
@@ -0,0 +1,98 @@
+#include "src/traitsf.h"
+#include "src/block4.h"
+
+#include "constants/4dFloat.h"
+
+#define CFP_ARRAY_TYPE cfp_array4f
+#define CFP_REF_TYPE cfp_ref4f
+#define CFP_PTR_TYPE cfp_ptr4f
+#define CFP_ITER_TYPE cfp_iter4f
+#define SUB_NAMESPACE array4f
+#define SCALAR float
+#define SCALAR_TYPE zfp_type_float
+#define DIMENSIONALITY 4
+
+#include "testCfpArray_source.c"
+#include "testCfpArray4_source.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+    cmocka_unit_test(given_cfp_array4f_when_defaultCtor_expect_returnsNonNullPtr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_ctor_expect_paramsSet, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_copyCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_copyCtor_expect_cacheCopied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_headerCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_header_expect_matchingMetadata, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_header_when_bufferCtor_expect_copied, setupCfpArrLargeComplete, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_header_when_bufferCtor_expect_paramsCopied, setupCfpArrLargeComplete, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_setRate_expect_rateSet, setupCfpArrMinimal, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_setCacheSize_expect_cacheSizeSet, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_clearCache_expect_cacheCleared, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_resize_expect_sizeChanged, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_setFlat_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_getFlat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_set_expect_entryWrittenToCacheOnly, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_ref_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_ptr_expect_arrayObjectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_ref_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_ptr_flat_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_begin_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_end_expect_objectValid, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ref4f_when_get_expect_entryReturned, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref4f_when_set_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref4f_when_ptr_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ref4f_when_copy_expect_arrayUpdated, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_ref_expect_addressMatches, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_ptr4f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_get_set_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_get_at_set_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_ref_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_ref_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_ptr_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_ptr_at_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_lt_expect_less, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_gt_expect_greater, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_leq_expect_less_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_geq_expect_greater_or_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_eq_expect_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_neq_expect_not_equal, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_distance_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_next_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_prev_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_inc_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_dec_expect_correct, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_iterate_touch_all, setupCfpArrSmall, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_iter4f_when_get_index_expect_correct, setupCfpArrSmall, teardownCfpArr),
+
+    /* NOTE: 4D arrays only support 8bit rates so setupFixedRate1 and 2 aren't used for testing here */
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_setArray_expect_compressedStreamChecksumMatches, setupFixedRate0, teardownCfpArr),
+    cmocka_unit_test_setup_teardown(given_cfp_array4f_when_getArray_expect_decompressedArrChecksumMatches, setupFixedRate0, teardownCfpArr),
+  };
+
+  return cmocka_run_group_tests(tests, prepCommonSetupVars, teardownCommonSetupVars);
+}
diff --git a/tests/cfp/testCfpArray_source.c b/tests/cfp/testCfpArray_source.c
new file mode 100644
index 00000000..de098341
--- /dev/null
+++ b/tests/cfp/testCfpArray_source.c
@@ -0,0 +1,643 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+#include "zfp/array.h"
+#include "zfp.h"
+
+#include "utils/genSmoothRandNums.h"
+#include "utils/testMacros.h"
+#include "utils/zfpChecksums.h"
+#include "utils/zfpHash.h"
+
+#define SIZE_X 20
+#define SIZE_Y 21
+#define SIZE_Z 22
+#define SIZE_W 5
+
+#define VAL 12345678.9
+
+#define MIN_TOTAL_ELEMENTS 1000000
+
+#define CFP_HEADER_TYPE cfp_header
+
+struct setupVars {
+  size_t dataSideLen;
+  size_t totalDataLen;
+  Scalar* dataArr;
+  Scalar* decompressedArr;
+
+  // dimensions of data that gets compressed (currently same as dataSideLen)
+  size_t dimLens[4];
+
+  CFP_ARRAY_TYPE cfpArr;
+
+  int paramNum;
+  double rate;
+  size_t csize;
+};
+
+// run this once per (datatype, DIM) combination for performance
+static int
+setupRandomData(void** state)
+{
+  struct setupVars *bundle = *state;
+
+  switch(ZFP_TYPE) {
+    case zfp_type_float:
+      generateSmoothRandFloats(MIN_TOTAL_ELEMENTS, DIMS, (float**)&bundle->dataArr, &bundle->dataSideLen, &bundle->totalDataLen);
+      break;
+
+    case zfp_type_double:
+      generateSmoothRandDoubles(MIN_TOTAL_ELEMENTS, DIMS, (double**)&bundle->dataArr, &bundle->dataSideLen, &bundle->totalDataLen);
+      break;
+
+    default:
+      fail_msg("Invalid zfp_type during setupRandomData()");
+      break;
+  }
+  assert_non_null(bundle->dataArr);
+
+  // for now, entire randomly generated array always entirely compressed
+  int i;
+  for (i = 0; i < 4; i++) {
+    bundle->dimLens[i] = (i < DIMS) ? bundle->dataSideLen : 0;
+  }
+
+  bundle->decompressedArr = malloc(bundle->totalDataLen * sizeof(Scalar));
+  assert_non_null(bundle->decompressedArr);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+prepCommonSetupVars(void** state)
+{
+  struct setupVars *bundle = calloc(1, sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  bundle->rate = ZFP_RATE_PARAM_BITS;
+  bundle->csize = 300;
+
+  *state = bundle;
+
+  return setupRandomData(state);
+}
+
+static int
+teardownRandomData(void** state)
+{
+  struct setupVars *bundle = *state;
+  free(bundle->dataArr);
+  free(bundle->decompressedArr);
+
+  return 0;
+}
+
+static int
+teardownCommonSetupVars(void** state)
+{
+  struct setupVars *bundle = *state;
+
+  int result = teardownRandomData(state);
+
+  free(bundle);
+
+  return result;
+}
+
+static int
+setupCfpArrMinimal(void** state)
+{
+  struct setupVars *bundle = *state;
+
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor_default();
+  assert_non_null(bundle->cfpArr.object);
+
+  return 0;
+}
+
+static int
+setupCfpArrSizeRate(void** state, size_t sizeX, size_t sizeY, size_t sizeZ, size_t sizeW)
+{
+  struct setupVars *bundle = *state;
+
+#if DIMS == 1
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(sizeX, bundle->rate, 0, 0);
+#elif DIMS == 2
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(sizeX, sizeY, bundle->rate, 0, 0);
+#elif DIMS == 3
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(sizeX, sizeY, sizeZ, bundle->rate, 0, 0);
+#else
+  /* NOTE: 4d rate is capped at 8 bits */
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(sizeX, sizeY, sizeZ, sizeW, 8, 0, 0);
+#endif
+
+  assert_non_null(bundle->cfpArr.object);
+
+  return 0;
+}
+
+static int
+setupCfpArrLargeComplete(void **state)
+{
+  struct setupVars *bundle = *state;
+
+#if DIMS == 1
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->rate, bundle->dataArr, bundle->csize);
+#elif DIMS == 2
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->dataSideLen, bundle->rate, bundle->dataArr, bundle->csize);
+#elif DIMS == 3
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, bundle->rate, bundle->dataArr, bundle->csize);
+#else
+  /* NOTE: 4d rate is capped at 8 bits */
+  bundle->cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor(bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, 8, bundle->dataArr, bundle->csize);
+#endif
+
+  assert_non_null(bundle->cfpArr.object);
+
+  return 0;
+}
+
+static int
+setupCfpArrLarge(void** state)
+{
+  struct setupVars *bundle = *state;
+  return setupCfpArrSizeRate(state, bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen, bundle->dataSideLen);
+}
+
+static int
+setupCfpArrSmall(void** state)
+{
+  return setupCfpArrSizeRate(state, SIZE_X, SIZE_Y, SIZE_Z, SIZE_W);
+}
+
+static int
+teardownCfpArr(void** state)
+{
+  struct setupVars *bundle = *state;
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(bundle->cfpArr);
+
+  return 0;
+}
+
+// assumes setupRandomData() already run (having set some setupVars members)
+static int
+loadFixedRateVars(void **state, int paramNum)
+{
+  struct setupVars *bundle = *state;
+  bundle->paramNum = paramNum;
+
+#if DIMS == 4
+  // 4d (de)serialization rate limit
+  if (bundle->paramNum != 0) {
+    fail_msg("Unknown paramNum during loadFixedRateVars()");
+  }
+#else
+  if (bundle->paramNum > 2 || bundle->paramNum < 0) {
+    fail_msg("Unknown paramNum during loadFixedRateVars()");
+  }
+#endif
+
+  bundle->rate = (double)(1u << (bundle->paramNum + 3));
+  *state = bundle;
+
+  return setupCfpArrLarge(state);
+}
+
+static int
+setupFixedRate0(void **state)
+{
+  return loadFixedRateVars(state, 0);
+}
+
+static int
+setupFixedRate1(void **state)
+{
+  return loadFixedRateVars(state, 1);
+}
+
+static int
+setupFixedRate2(void **state)
+{
+  return loadFixedRateVars(state, 2);
+}
+
+// dataArr and the struct itself are freed in teardownCommonSetupVars()
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+  free(bundle->decompressedArr);
+
+  return 0;
+}
+
+static void
+when_seededRandomSmoothDataGenerated_expect_ChecksumMatches(void **state)
+{
+  struct setupVars *bundle = *state;
+  UInt checksum = _catFunc2(hashArray, SCALAR_BITS)((const UInt*)bundle->dataArr, bundle->totalDataLen, 1);
+
+  uint64 key1, key2;
+  computeKeyOriginalInput(ARRAY_TEST, bundle->dimLens, &key1, &key2);
+  uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+
+  assert_int_equal(checksum, expectedChecksum);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_defaultCtor_expect_returnsNonNullPtr)(void **state)
+{
+  CFP_ARRAY_TYPE cfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor_default();
+  assert_non_null(cfpArr.object);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(cfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_copyCtor_expect_paramsCopied)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+  CFP_ARRAY_TYPE newCfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor_copy(srcCfpArr);
+
+  // verify size
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.size(newCfpArr), CFP_NAMESPACE.SUB_NAMESPACE.size(srcCfpArr));
+
+  // verify rate
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.rate(newCfpArr), CFP_NAMESPACE.SUB_NAMESPACE.rate(srcCfpArr));
+
+  // verify compressed size, data
+  size_t newDataSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(newCfpArr);
+  size_t srcDataSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(srcCfpArr);
+  assert_int_equal(newDataSize, srcDataSize);
+
+  uchar* newData = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(newCfpArr);
+  uchar* srcData = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(srcCfpArr);
+  assert_memory_equal(newData, srcData, newDataSize);
+
+  // verify cache size
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.cache_size(newCfpArr), CFP_NAMESPACE.SUB_NAMESPACE.cache_size(srcCfpArr));
+
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(newCfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_copyCtor_expect_cacheCopied)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+
+  // get ptr to compressed data (automatically flushes cache)
+  uchar* srcData = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(srcCfpArr);
+
+  // create dirty cache
+  size_t i = 5;
+  CFP_NAMESPACE.SUB_NAMESPACE.set_flat(srcCfpArr, i, (SCALAR)VAL);
+
+  // exec copy constructor
+  CFP_ARRAY_TYPE newCfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor_copy(srcCfpArr);
+
+  size_t newDataSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(newCfpArr);
+  size_t srcDataSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(srcCfpArr);
+  assert_int_equal(newDataSize, srcDataSize);
+
+  // getting data ptr to copy-constructed array requires a flush (no way to avoid)
+  uchar* newData = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(newCfpArr);
+  assert_memory_not_equal(newData, srcData, newDataSize);
+
+  // verify flush brings both to same state
+  CFP_NAMESPACE.SUB_NAMESPACE.flush_cache(srcCfpArr);
+  assert_memory_equal(newData, srcData, newDataSize);
+
+  // verify compressed value is the same
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get_flat(newCfpArr, i) == CFP_NAMESPACE.SUB_NAMESPACE.get_flat(srcCfpArr, i));
+
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(newCfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_headerCtor_expect_copied)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+
+  // get header
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+
+  // get compressed bitstream
+  void* srcBuff = (void*)CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(srcCfpArr);
+  size_t srcSz  = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(srcCfpArr);
+
+  // exec construct from header + stream
+  CFP_ARRAY_TYPE newCfpArr = CFP_NAMESPACE.SUB_NAMESPACE.ctor_header(srcCfpHdr, srcBuff, srcSz); 
+
+  // verify reconstruction from header + stream results in equivalent array data
+  void* newBuff = (void*)CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(newCfpArr);
+  size_t newSz  = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(newCfpArr);
+
+  assert_int_equal(srcSz, newSz);
+  assert_memory_equal(srcBuff, newBuff, newSz);
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+  CFP_NAMESPACE.SUB_NAMESPACE.dtor(newCfpArr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _header_when_bufferCtor_expect_copied)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+
+  // get header
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+  const void* srcBuff = CFP_NAMESPACE.SUB_NAMESPACE.header.data(srcCfpHdr);
+  size_t srcSz = CFP_NAMESPACE.SUB_NAMESPACE.header.size_bytes(srcCfpHdr, ZFP_DATA_HEADER);
+
+  // exec new header construct from source header
+  CFP_HEADER_TYPE newCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor_buffer(srcBuff, srcSz);
+
+  const void* newBuff = CFP_NAMESPACE.SUB_NAMESPACE.header.data(newCfpHdr);
+  size_t newSz = CFP_NAMESPACE.SUB_NAMESPACE.header.size_bytes(newCfpHdr, ZFP_DATA_HEADER);
+
+  assert_int_equal(srcSz, newSz);
+  assert_memory_equal(srcBuff, newBuff, newSz);
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(newCfpHdr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _header_when_bufferCtor_expect_paramsCopied)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE srcCfpArr = bundle->cfpArr;
+
+  // get header
+  CFP_HEADER_TYPE srcCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor(srcCfpArr);
+  const void* hBuff = CFP_NAMESPACE.SUB_NAMESPACE.header.data(srcCfpHdr);
+  size_t hSz = CFP_NAMESPACE.SUB_NAMESPACE.header.size_bytes(srcCfpHdr, ZFP_DATA_HEADER);
+
+  // exec new header construct from source header
+  CFP_HEADER_TYPE newCfpHdr = CFP_NAMESPACE.SUB_NAMESPACE.header.ctor_buffer(hBuff, hSz);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.scalar_type(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.scalar_type(newCfpHdr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.dimensionality(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.dimensionality(newCfpHdr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.rate(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.rate(newCfpHdr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_bytes(srcCfpHdr, ZFP_DATA_HEADER), CFP_NAMESPACE.SUB_NAMESPACE.header.size_bytes(newCfpHdr, ZFP_DATA_HEADER));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_x(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.size_x(newCfpHdr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_y(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.size_y(newCfpHdr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_z(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.size_z(newCfpHdr));
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.header.size_w(srcCfpHdr), CFP_NAMESPACE.SUB_NAMESPACE.header.size_w(newCfpHdr));
+
+  // cleanup
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(srcCfpHdr);
+  CFP_NAMESPACE.SUB_NAMESPACE.header.dtor(newCfpHdr);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_setRate_expect_rateSet)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr), 0);
+
+  double rate = CFP_NAMESPACE.SUB_NAMESPACE.set_rate(cfpArr, bundle->rate);
+  assert_int_not_equal(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr), 0);
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.rate(cfpArr) == rate);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_setCacheSize_expect_cacheSizeSet)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t oldCsize = CFP_NAMESPACE.SUB_NAMESPACE.cache_size(cfpArr);
+  size_t newCsize = oldCsize + 999;
+
+  // set_cache_size() accepts a minimum cache size
+  CFP_NAMESPACE.SUB_NAMESPACE.set_cache_size(cfpArr, newCsize);
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.cache_size(cfpArr) >= newCsize);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _with_dirtyCache_when_flushCache_expect_cacheEntriesPersistedToMemory)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  // getting the ptr automatically flushes cache, so do this before setting an entry
+  uchar* compressedDataPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+
+  uchar* oldMemory = malloc(compressedSize * sizeof(uchar));
+  memcpy(oldMemory, compressedDataPtr, compressedSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set_flat(cfpArr, 0, (SCALAR)VAL);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.flush_cache(cfpArr);
+
+  assert_memory_not_equal(compressedDataPtr, oldMemory, compressedSize);
+  free(oldMemory);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_clearCache_expect_cacheCleared)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  SCALAR prevVal = CFP_NAMESPACE.SUB_NAMESPACE.get_flat(cfpArr, 0);
+  CFP_NAMESPACE.SUB_NAMESPACE.set_flat(cfpArr, 0, (SCALAR)VAL);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.clear_cache(cfpArr);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.flush_cache(cfpArr);
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get_flat(cfpArr, 0) == prevVal);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_setFlat_expect_entryWrittenToCacheOnly)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  // getting the ptr automatically flushes cache, so do this before setting an entry
+  uchar* compressedDataPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+
+  uchar* oldMemory = malloc(compressedSize * sizeof(uchar));
+  memcpy(oldMemory, compressedDataPtr, compressedSize);
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set_flat(cfpArr, 0, (SCALAR)VAL);
+
+  assert_memory_equal(compressedDataPtr, oldMemory, compressedSize);
+  free(oldMemory);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_getFlat_expect_entryReturned)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_NAMESPACE.SUB_NAMESPACE.set_flat(cfpArr, 0, (SCALAR)VAL);
+
+  // dirty cache preserves exact value (compression not applied until flush)
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.get_flat(cfpArr, 0) == (SCALAR)VAL);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_setArray_expect_compressedStreamChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  uchar* compressedPtr = CFP_NAMESPACE.SUB_NAMESPACE.compressed_data(cfpArr);
+  CFP_NAMESPACE.SUB_NAMESPACE.set_array(cfpArr, bundle->dataArr);
+
+  size_t compressedSize = CFP_NAMESPACE.SUB_NAMESPACE.compressed_size(cfpArr);
+  uint64 checksum = hashBitstream((uint64*)compressedPtr, compressedSize);
+
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, bundle->dimLens, zfp_mode_fixed_rate, bundle->paramNum, &key1, &key2);
+  uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+
+  assert_int_equal(checksum, expectedChecksum);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_getArray_expect_decompressedArrChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_NAMESPACE.SUB_NAMESPACE.set_array(cfpArr, bundle->dataArr);
+  CFP_NAMESPACE.SUB_NAMESPACE.get_array(cfpArr, bundle->decompressedArr);
+
+  UInt checksum = _catFunc2(hashArray, SCALAR_BITS)((UInt*)bundle->decompressedArr, bundle->totalDataLen, 1);
+
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, DECOMPRESSED_ARRAY, bundle->dimLens, zfp_mode_fixed_rate, bundle->paramNum, &key1, &key2);
+  uint64 expectedChecksum = getChecksumByKey(DIMS, ZFP_TYPE, key1, key2);
+
+  assert_int_equal(checksum, expectedChecksum);
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ref_flat_expect_entryReturned)(void **state)
+{
+    struct setupVars *bundle = *state;
+    CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+    size_t i = 10;
+    CFP_REF_TYPE cfpArrRef = CFP_NAMESPACE.SUB_NAMESPACE.ref_flat(cfpArr, i);
+
+    assert_true(CFP_NAMESPACE.SUB_NAMESPACE.reference.get(cfpArrRef) == CFP_NAMESPACE.SUB_NAMESPACE.get_flat(cfpArr, i));
+}
+
+static void
+_catFunc3(given_, CFP_ARRAY_TYPE, _when_ptr_flat_expect_entryReturned)(void **state)
+{
+    struct setupVars *bundle = *state;
+    CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+    size_t i = 10;
+    CFP_PTR_TYPE cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr_flat(cfpArr, i);
+
+    assert_true(CFP_NAMESPACE.SUB_NAMESPACE.reference.get(cfpArrPtr.reference) == CFP_NAMESPACE.SUB_NAMESPACE.get_flat(cfpArr, i));
+}
+
+// ##############
+// cfp_iter tests
+// ##############
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_get_set_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  SCALAR val = 5;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_NAMESPACE.SUB_NAMESPACE.iterator.set(cfpArrIter, val);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.get(cfpArrIter), val);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_get_at_set_at_expect_correct)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  size_t i = 3;
+  SCALAR val = 5;
+
+  CFP_ITER_TYPE cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_NAMESPACE.SUB_NAMESPACE.iterator.set_at(cfpArrIter, i, val);
+
+  assert_int_equal(CFP_NAMESPACE.SUB_NAMESPACE.iterator.get_at(cfpArrIter, i), val);
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_iterate_touch_all)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+  CFP_ITER_TYPE cfpArrIter;
+  CFP_PTR_TYPE cfpArrPtr;
+
+  SCALAR val = -1;
+
+  for (cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+       CFP_NAMESPACE.SUB_NAMESPACE.iterator.neq(cfpArrIter, CFP_NAMESPACE.SUB_NAMESPACE.end(cfpArr));
+       cfpArrIter = CFP_NAMESPACE.SUB_NAMESPACE.iterator.inc(cfpArrIter))
+  {
+    CFP_NAMESPACE.SUB_NAMESPACE.iterator.set(cfpArrIter, val);
+  }
+
+  for (cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.ptr_flat(cfpArr, 0);
+       CFP_NAMESPACE.SUB_NAMESPACE.pointer.leq(cfpArrPtr, CFP_NAMESPACE.SUB_NAMESPACE.ptr_flat(cfpArr, CFP_NAMESPACE.SUB_NAMESPACE.size(cfpArr) - 1));
+       cfpArrPtr = CFP_NAMESPACE.SUB_NAMESPACE.pointer.inc(cfpArrPtr))
+  {
+    assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) - val < 1e-12);
+    assert_true(CFP_NAMESPACE.SUB_NAMESPACE.pointer.get(cfpArrPtr) - val > -1e-12);
+  }
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_eq_expect_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.eq(cfpArrIter1, cfpArrIter1));
+}
+
+static void
+_catFunc3(given_, CFP_ITER_TYPE, _when_neq_expect_not_equal)(void **state)
+{
+  struct setupVars *bundle = *state;
+  CFP_ARRAY_TYPE cfpArr = bundle->cfpArr;
+
+  CFP_ITER_TYPE cfpArrIter1 = CFP_NAMESPACE.SUB_NAMESPACE.begin(cfpArr);
+  CFP_ITER_TYPE cfpArrIter2 = CFP_NAMESPACE.SUB_NAMESPACE.end(cfpArr);
+
+  assert_true(CFP_NAMESPACE.SUB_NAMESPACE.iterator.neq(cfpArrIter1, cfpArrIter2));
+}
diff --git a/tests/cfp/testCfpNamespace.c b/tests/cfp/testCfpNamespace.c
new file mode 100644
index 00000000..87ac687f
--- /dev/null
+++ b/tests/cfp/testCfpNamespace.c
@@ -0,0 +1,27 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include "zfp/array.h"
+
+/* only run this test when compiling with CFP_NAMESPACE=cfp2 */
+
+/* test fails if compiler errors out */
+static void
+given_cfpCompiledWithNamespace_cfp2_when_linkToCfpLib_expect_namespacePersists(void** state)
+{
+  cfp_array1d arr = cfp2.array1d.ctor_default();
+  assert_non_null(arr.object);
+
+  cfp2.array1d.dtor(arr);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(given_cfpCompiledWithNamespace_cfp2_when_linkToCfpLib_expect_namespacePersists),
+  };
+
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/ci-utils/CMakeLists.txt b/tests/ci-utils/CMakeLists.txt
new file mode 100644
index 00000000..e012e6cd
--- /dev/null
+++ b/tests/ci-utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+# This empty project is used to determine if OpenMP is available on CI machines
+# without compiling any ZFP code.
+
+cmake_minimum_required(VERSION 3.9)
+
+find_package(OpenMP COMPONENTS C REQUIRED)
diff --git a/tests/constants/1dDouble.h b/tests/constants/1dDouble.h
new file mode 100644
index 00000000..e55b490f
--- /dev/null
+++ b/tests/constants/1dDouble.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "doubleConsts.h"
+
+#define DIM_INT_STR 1dDouble
diff --git a/tests/constants/1dFloat.h b/tests/constants/1dFloat.h
new file mode 100644
index 00000000..feea2584
--- /dev/null
+++ b/tests/constants/1dFloat.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "floatConsts.h"
+
+#define DIM_INT_STR 1dFloat
diff --git a/tests/constants/1dInt32.h b/tests/constants/1dInt32.h
new file mode 100644
index 00000000..d2bffba3
--- /dev/null
+++ b/tests/constants/1dInt32.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int32Consts.h"
+
+#define DIM_INT_STR 1dInt32
diff --git a/tests/constants/1dInt64.h b/tests/constants/1dInt64.h
new file mode 100644
index 00000000..e1e0a48c
--- /dev/null
+++ b/tests/constants/1dInt64.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int64Consts.h"
+
+#define DIM_INT_STR 1dInt64
diff --git a/tests/constants/2dDouble.h b/tests/constants/2dDouble.h
new file mode 100644
index 00000000..f8c66bac
--- /dev/null
+++ b/tests/constants/2dDouble.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "doubleConsts.h"
+
+#define DIM_INT_STR 2dDouble
diff --git a/tests/constants/2dFloat.h b/tests/constants/2dFloat.h
new file mode 100644
index 00000000..cc9bd6e3
--- /dev/null
+++ b/tests/constants/2dFloat.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "floatConsts.h"
+
+#define DIM_INT_STR 2dFloat
diff --git a/tests/constants/2dInt32.h b/tests/constants/2dInt32.h
new file mode 100644
index 00000000..1792ea7a
--- /dev/null
+++ b/tests/constants/2dInt32.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int32Consts.h"
+
+#define DIM_INT_STR 2dInt32
diff --git a/tests/constants/2dInt64.h b/tests/constants/2dInt64.h
new file mode 100644
index 00000000..f81b09b3
--- /dev/null
+++ b/tests/constants/2dInt64.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int64Consts.h"
+
+#define DIM_INT_STR 2dInt64
diff --git a/tests/constants/3dDouble.h b/tests/constants/3dDouble.h
new file mode 100644
index 00000000..cc45da6d
--- /dev/null
+++ b/tests/constants/3dDouble.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "doubleConsts.h"
+
+#define DIM_INT_STR 3dDouble
diff --git a/tests/constants/3dFloat.h b/tests/constants/3dFloat.h
new file mode 100644
index 00000000..65fa0793
--- /dev/null
+++ b/tests/constants/3dFloat.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "floatConsts.h"
+
+#define DIM_INT_STR 3dFloat
diff --git a/tests/constants/3dInt32.h b/tests/constants/3dInt32.h
new file mode 100644
index 00000000..a966fc06
--- /dev/null
+++ b/tests/constants/3dInt32.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int32Consts.h"
+
+#define DIM_INT_STR 3dInt32
diff --git a/tests/constants/3dInt64.h b/tests/constants/3dInt64.h
new file mode 100644
index 00000000..dc89aaf8
--- /dev/null
+++ b/tests/constants/3dInt64.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int64Consts.h"
+
+#define DIM_INT_STR 3dInt64
diff --git a/tests/constants/4dDouble.h b/tests/constants/4dDouble.h
new file mode 100644
index 00000000..308a7f10
--- /dev/null
+++ b/tests/constants/4dDouble.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "doubleConsts.h"
+
+#define DIM_INT_STR 4dDouble
diff --git a/tests/constants/4dFloat.h b/tests/constants/4dFloat.h
new file mode 100644
index 00000000..75e1bc47
--- /dev/null
+++ b/tests/constants/4dFloat.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "floatConsts.h"
+
+#define DIM_INT_STR 4dFloat
diff --git a/tests/constants/4dInt32.h b/tests/constants/4dInt32.h
new file mode 100644
index 00000000..19087b57
--- /dev/null
+++ b/tests/constants/4dInt32.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int32Consts.h"
+
+#define DIM_INT_STR 4dInt32
diff --git a/tests/constants/4dInt64.h b/tests/constants/4dInt64.h
new file mode 100644
index 00000000..b42778ab
--- /dev/null
+++ b/tests/constants/4dInt64.h
@@ -0,0 +1,4 @@
+#include "universalConsts.h"
+#include "int64Consts.h"
+
+#define DIM_INT_STR 4dInt64
diff --git a/tests/constants/checksums/1dDouble.h b/tests/constants/checksums/1dDouble.h
new file mode 100644
index 00000000..47aa497a
--- /dev/null
+++ b/tests/constants/checksums/1dDouble.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _1dDoubleChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x3), UINT64C(0xb519ca1b83e2b23f)},
+{UINT64C(0xa0), UINT64C(0x3), UINT64C(0xd1a4b883363919a6)},
+{UINT64C(0xd1), UINT64C(0x3), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x3), UINT64C(0xc1de1da8)},
+{UINT64C(0xd3), UINT64C(0x3), UINT64C(0xeb469308)},
+{UINT64C(0xd4), UINT64C(0x3), UINT64C(0x97d201d8)},
+{UINT64C(0xd5), UINT64C(0x3), UINT64C(0x49dccd6ddfc3e6d0)},
+{UINT64C(0xd6), UINT64C(0x3), UINT64C(0xcfe894df52ba0b77)},
+{UINT64C(0xd7), UINT64C(0x3), UINT64C(0xdac7d74cdcc77f2)},
+{UINT64C(0xd8), UINT64C(0x3), UINT64C(0xaea40aaff9d6d766)},
+{UINT64C(0xd9), UINT64C(0x3), UINT64C(0xadd892805c539502)},
+{UINT64C(0xda), UINT64C(0x3), UINT64C(0x30cf22a9e4dafb50)},
+{UINT64C(0x2a0), UINT64C(0x3), UINT64C(0x7e0c5012d3011a34)},
+{UINT64C(0x120), UINT64C(0x3), UINT64C(0xf034a06e00000000)},
+{UINT64C(0x320), UINT64C(0x3), UINT64C(0x907a60b70d3a1692)},
+{UINT64C(0x400), UINT64C(0x100000), UINT64C(0x49d66cd3c1044484)},
+{UINT64C(0x4b0), UINT64C(0x100000), UINT64C(0xd19e1bd58ae7b771)},
+{UINT64C(0x530), UINT64C(0x100000), UINT64C(0xe823070a00000000)},
+{UINT64C(0x4b1), UINT64C(0x100000), UINT64C(0xd1de17cee7c8de3b)},
+{UINT64C(0x531), UINT64C(0x100000), UINT64C(0x174e113400000000)},
+{UINT64C(0x4b2), UINT64C(0x100000), UINT64C(0x89204000682034e7)},
+{UINT64C(0x532), UINT64C(0x100000), UINT64C(0xa8c3e7eef220a0e4)},
+{UINT64C(0x4a0), UINT64C(0x100000), UINT64C(0x713fc507f37f624d)},
+{UINT64C(0x520), UINT64C(0x100000), UINT64C(0xeecbcbd400000000)},
+{UINT64C(0x4a1), UINT64C(0x100000), UINT64C(0xa9c0457b722fce7c)},
+{UINT64C(0x521), UINT64C(0x100000), UINT64C(0x5763edcdef7122e3)},
+{UINT64C(0x4a2), UINT64C(0x100000), UINT64C(0xb6569815387d0248)},
+{UINT64C(0x522), UINT64C(0x100000), UINT64C(0x84a661bb59df99b6)},
+{UINT64C(0x4c0), UINT64C(0x100000), UINT64C(0x492797c144b2f5aa)},
+{UINT64C(0x540), UINT64C(0x100000), UINT64C(0x33931390025a6c7)},
+{UINT64C(0x4c1), UINT64C(0x100000), UINT64C(0x5f530b841d8ad3b2)},
+{UINT64C(0x541), UINT64C(0x100000), UINT64C(0x1245fb8d26d1004b)},
+{UINT64C(0x4c2), UINT64C(0x100000), UINT64C(0x8aaa2c3635420ca1)},
+{UINT64C(0x542), UINT64C(0x100000), UINT64C(0x495d680180ba02ab)},
+{UINT64C(0x4d0), UINT64C(0x100000), UINT64C(0x268fd6fbede5ed59)},
+};
diff --git a/tests/constants/checksums/1dFloat.h b/tests/constants/checksums/1dFloat.h
new file mode 100644
index 00000000..38cbf1cc
--- /dev/null
+++ b/tests/constants/checksums/1dFloat.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _1dFloatChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x3), UINT64C(0xa35730c2)},
+{UINT64C(0xa0), UINT64C(0x3), UINT64C(0x40bdf65ac73b115c)},
+{UINT64C(0xd1), UINT64C(0x3), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x3), UINT64C(0x7baad4bceaf3d5c4)},
+{UINT64C(0xd3), UINT64C(0x3), UINT64C(0xe3aab612eaf3d5c4)},
+{UINT64C(0xd4), UINT64C(0x3), UINT64C(0x71d55b09a42ac833)},
+{UINT64C(0xd5), UINT64C(0x3), UINT64C(0x8ca6efaedb5cd44e)},
+{UINT64C(0xd6), UINT64C(0x3), UINT64C(0x15845a4aad908133)},
+{UINT64C(0xd7), UINT64C(0x3), UINT64C(0xd277135bdcf92823)},
+{UINT64C(0xd8), UINT64C(0x3), UINT64C(0xed4fc9b68f3c00b8)},
+{UINT64C(0xd9), UINT64C(0x3), UINT64C(0x9fd129bea6d1bbd5)},
+{UINT64C(0xda), UINT64C(0x3), UINT64C(0xaee692dd4f340c9f)},
+{UINT64C(0x2a0), UINT64C(0x3), UINT64C(0xb4fe1804c2e28f46)},
+{UINT64C(0x120), UINT64C(0x3), UINT64C(0xe7f64d14)},
+{UINT64C(0x320), UINT64C(0x3), UINT64C(0x6bc01e0)},
+{UINT64C(0x400), UINT64C(0x100000), UINT64C(0x81123c83)},
+{UINT64C(0x4b0), UINT64C(0x100000), UINT64C(0x6698eeddef2c576f)},
+{UINT64C(0x530), UINT64C(0x100000), UINT64C(0x7582fd98)},
+{UINT64C(0x4b1), UINT64C(0x100000), UINT64C(0xd2f86ca7a1a270e6)},
+{UINT64C(0x531), UINT64C(0x100000), UINT64C(0xa6e9b884)},
+{UINT64C(0x4b2), UINT64C(0x100000), UINT64C(0x5b428a8dde9cc0c1)},
+{UINT64C(0x532), UINT64C(0x100000), UINT64C(0x81123c83)},
+{UINT64C(0x4a0), UINT64C(0x100000), UINT64C(0x4271157f4d1561e4)},
+{UINT64C(0x520), UINT64C(0x100000), UINT64C(0xc298af05)},
+{UINT64C(0x4a1), UINT64C(0x100000), UINT64C(0x450fcb1330dab01a)},
+{UINT64C(0x521), UINT64C(0x100000), UINT64C(0xfe1c110c)},
+{UINT64C(0x4a2), UINT64C(0x100000), UINT64C(0xae3f40d6903e54eb)},
+{UINT64C(0x522), UINT64C(0x100000), UINT64C(0x81123c83)},
+{UINT64C(0x4c0), UINT64C(0x100000), UINT64C(0xe8cef6c8c8ac1e62)},
+{UINT64C(0x540), UINT64C(0x100000), UINT64C(0xaef278e8)},
+{UINT64C(0x4c1), UINT64C(0x100000), UINT64C(0x83fe1bf6d49a1b6e)},
+{UINT64C(0x541), UINT64C(0x100000), UINT64C(0x60361242)},
+{UINT64C(0x4c2), UINT64C(0x100000), UINT64C(0xe7ab29faf14866d1)},
+{UINT64C(0x542), UINT64C(0x100000), UINT64C(0x67e8c596)},
+{UINT64C(0x4d0), UINT64C(0x100000), UINT64C(0xed4507b04c0b7919)},
+};
diff --git a/tests/constants/checksums/1dInt32.h b/tests/constants/checksums/1dInt32.h
new file mode 100644
index 00000000..86273a5c
--- /dev/null
+++ b/tests/constants/checksums/1dInt32.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _1dInt32Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x3), UINT64C(0xf3e7c054)},
+{UINT64C(0xa0), UINT64C(0x3), UINT64C(0xc9d92bd5bdfd2c41)},
+{UINT64C(0x2a0), UINT64C(0x3), UINT64C(0x2b7ac04c5f2c27f9)},
+{UINT64C(0x120), UINT64C(0x3), UINT64C(0x4b38a824)},
+{UINT64C(0x320), UINT64C(0x3), UINT64C(0xfbfb6da8)},
+{UINT64C(0x400), UINT64C(0x1000), UINT64C(0x224cbf63)},
+{UINT64C(0x4b0), UINT64C(0x1000), UINT64C(0xd31e1d4f3028cea)},
+{UINT64C(0x530), UINT64C(0x1000), UINT64C(0xae502d39)},
+{UINT64C(0x4b1), UINT64C(0x1000), UINT64C(0x2d76d29099fb22ec)},
+{UINT64C(0x531), UINT64C(0x1000), UINT64C(0xdf369702)},
+{UINT64C(0x4b2), UINT64C(0x1000), UINT64C(0xb90d9da736a534a9)},
+{UINT64C(0x532), UINT64C(0x1000), UINT64C(0x8e2310b0)},
+{UINT64C(0x4a0), UINT64C(0x1000), UINT64C(0x804c71c729a559cf)},
+{UINT64C(0x520), UINT64C(0x1000), UINT64C(0xff2890c)},
+{UINT64C(0x4a1), UINT64C(0x1000), UINT64C(0xbe1ef33c903369a4)},
+{UINT64C(0x521), UINT64C(0x1000), UINT64C(0x35a6f08e)},
+{UINT64C(0x4a2), UINT64C(0x1000), UINT64C(0x8c1e4b2bdfca4bca)},
+{UINT64C(0x522), UINT64C(0x1000), UINT64C(0x8e2310b0)},
+{UINT64C(0x4d0), UINT64C(0x1000), UINT64C(0xcd449c2be8c8a337)},
+};
diff --git a/tests/constants/checksums/1dInt64.h b/tests/constants/checksums/1dInt64.h
new file mode 100644
index 00000000..1eee00cb
--- /dev/null
+++ b/tests/constants/checksums/1dInt64.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _1dInt64Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x3), UINT64C(0x10decbfab896db77)},
+{UINT64C(0xa0), UINT64C(0x3), UINT64C(0x103c2fc57809b590)},
+{UINT64C(0x2a0), UINT64C(0x3), UINT64C(0x5a808f85fa746948)},
+{UINT64C(0x120), UINT64C(0x3), UINT64C(0x321e0ab000000000)},
+{UINT64C(0x320), UINT64C(0x3), UINT64C(0x1e0e4631271d520e)},
+{UINT64C(0x400), UINT64C(0x1000), UINT64C(0x261f22581146db18)},
+{UINT64C(0x4b0), UINT64C(0x1000), UINT64C(0xd31e1d4f3028cea)},
+{UINT64C(0x530), UINT64C(0x1000), UINT64C(0xae502d3900000000)},
+{UINT64C(0x4b1), UINT64C(0x1000), UINT64C(0x2d76d29099fb22ec)},
+{UINT64C(0x531), UINT64C(0x1000), UINT64C(0xdf36970200000000)},
+{UINT64C(0x4b2), UINT64C(0x1000), UINT64C(0x2fa06f3672c34330)},
+{UINT64C(0x532), UINT64C(0x1000), UINT64C(0xc64d5c7c923c2a4e)},
+{UINT64C(0x4a0), UINT64C(0x1000), UINT64C(0x804c71c729a559cf)},
+{UINT64C(0x520), UINT64C(0x1000), UINT64C(0xff2890c00000000)},
+{UINT64C(0x4a1), UINT64C(0x1000), UINT64C(0xdf50079b903369a4)},
+{UINT64C(0x521), UINT64C(0x1000), UINT64C(0xea935b1000000000)},
+{UINT64C(0x4a2), UINT64C(0x1000), UINT64C(0x9de253002800ea54)},
+{UINT64C(0x522), UINT64C(0x1000), UINT64C(0xebb9a3b522e681e)},
+{UINT64C(0x4d0), UINT64C(0x1000), UINT64C(0x718abd28a6b2f034)},
+};
diff --git a/tests/constants/checksums/2dDouble.h b/tests/constants/checksums/2dDouble.h
new file mode 100644
index 00000000..2a1b6487
--- /dev/null
+++ b/tests/constants/checksums/2dDouble.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _2dDoubleChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x3000003), UINT64C(0x1c772c230f3ccbb4)},
+{UINT64C(0xa0), UINT64C(0x3000003), UINT64C(0xc0a1814da6ce303b)},
+{UINT64C(0xd1), UINT64C(0x3000003), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x3000003), UINT64C(0x83b0d73d)},
+{UINT64C(0xd3), UINT64C(0x3000003), UINT64C(0x289bae9d)},
+{UINT64C(0xd4), UINT64C(0x3000003), UINT64C(0x5d6e57cf)},
+{UINT64C(0xd5), UINT64C(0x3000003), UINT64C(0x52a77478bd871422)},
+{UINT64C(0xd6), UINT64C(0x3000003), UINT64C(0x5f99e005089267e0)},
+{UINT64C(0xd7), UINT64C(0x3000003), UINT64C(0x7d108dc9451d2cb7)},
+{UINT64C(0xd8), UINT64C(0x3000003), UINT64C(0x34c72b14e6ed9d8c)},
+{UINT64C(0xd9), UINT64C(0x3000003), UINT64C(0xba67c09098a3d01a)},
+{UINT64C(0xda), UINT64C(0x3000003), UINT64C(0x7d108dc9271c8a60)},
+{UINT64C(0x2a0), UINT64C(0x3000003), UINT64C(0xf47a9d0740fd12f1)},
+{UINT64C(0x120), UINT64C(0x3000003), UINT64C(0x7ac02ede00000000)},
+{UINT64C(0x320), UINT64C(0x3000003), UINT64C(0x12ef3cd64903bcca)},
+{UINT64C(0x400), UINT64C(0x400000400), UINT64C(0x856a073a7252dd4)},
+{UINT64C(0x4b0), UINT64C(0x400000400), UINT64C(0xe4efc0e6e0c4937f)},
+{UINT64C(0x530), UINT64C(0x400000400), UINT64C(0x8e010bbc00000000)},
+{UINT64C(0x4b1), UINT64C(0x400000400), UINT64C(0x26ab1ab12b69d8e7)},
+{UINT64C(0x531), UINT64C(0x400000400), UINT64C(0xa296ec5400000000)},
+{UINT64C(0x4b2), UINT64C(0x400000400), UINT64C(0xd7605316605ae257)},
+{UINT64C(0x532), UINT64C(0x400000400), UINT64C(0x626c78e0852013ee)},
+{UINT64C(0x4a0), UINT64C(0x400000400), UINT64C(0x10288c2054631266)},
+{UINT64C(0x520), UINT64C(0x400000400), UINT64C(0xd5495117b8fe1c02)},
+{UINT64C(0x4a1), UINT64C(0x400000400), UINT64C(0xb1d8865622fe6fc0)},
+{UINT64C(0x521), UINT64C(0x400000400), UINT64C(0x9437836903fc33a1)},
+{UINT64C(0x4a2), UINT64C(0x400000400), UINT64C(0x816b6359b90eaba1)},
+{UINT64C(0x522), UINT64C(0x400000400), UINT64C(0x124ac89d7f6e6511)},
+{UINT64C(0x4c0), UINT64C(0x400000400), UINT64C(0x7cb428be5481bd7b)},
+{UINT64C(0x540), UINT64C(0x400000400), UINT64C(0x2229f480c522c420)},
+{UINT64C(0x4c1), UINT64C(0x400000400), UINT64C(0xf94462ab31afa215)},
+{UINT64C(0x541), UINT64C(0x400000400), UINT64C(0x25f62aac2713f851)},
+{UINT64C(0x4c2), UINT64C(0x400000400), UINT64C(0x8beb41214f9ee0d6)},
+{UINT64C(0x542), UINT64C(0x400000400), UINT64C(0x94fd382138403fb1)},
+{UINT64C(0x4d0), UINT64C(0x400000400), UINT64C(0x1481e46e30d0f3ab)},
+};
diff --git a/tests/constants/checksums/2dFloat.h b/tests/constants/checksums/2dFloat.h
new file mode 100644
index 00000000..801cc88d
--- /dev/null
+++ b/tests/constants/checksums/2dFloat.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _2dFloatChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x3000003), UINT64C(0xd61ebeeb)},
+{UINT64C(0xa0), UINT64C(0x3000003), UINT64C(0xda4c301a8e0f8cee)},
+{UINT64C(0xd1), UINT64C(0x3000003), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x3000003), UINT64C(0xeabd0942eaf3d5c4)},
+{UINT64C(0xd3), UINT64C(0x3000003), UINT64C(0x37364bbaeaf3d5c4)},
+{UINT64C(0xd4), UINT64C(0x3000003), UINT64C(0x1bab25dda42ac833)},
+{UINT64C(0xd5), UINT64C(0x3000003), UINT64C(0x15efe9eb467df9de)},
+{UINT64C(0xd6), UINT64C(0x3000003), UINT64C(0x646c26ae1386d3f)},
+{UINT64C(0xd7), UINT64C(0x3000003), UINT64C(0x927724ec9f90816d)},
+{UINT64C(0xd8), UINT64C(0x3000003), UINT64C(0x8d27f49059a9fe98)},
+{UINT64C(0xd9), UINT64C(0x3000003), UINT64C(0x9d4930c42f82c1fb)},
+{UINT64C(0xda), UINT64C(0x3000003), UINT64C(0x11acf6d756257748)},
+{UINT64C(0x2a0), UINT64C(0x3000003), UINT64C(0x584942f81bec40fb)},
+{UINT64C(0x120), UINT64C(0x3000003), UINT64C(0xd183b619)},
+{UINT64C(0x320), UINT64C(0x3000003), UINT64C(0x713809a7)},
+{UINT64C(0x400), UINT64C(0x400000400), UINT64C(0xe4bfe4e)},
+{UINT64C(0x4b0), UINT64C(0x400000400), UINT64C(0x8417e3e4287f38b5)},
+{UINT64C(0x530), UINT64C(0x400000400), UINT64C(0x9b77e022)},
+{UINT64C(0x4b1), UINT64C(0x400000400), UINT64C(0xf5356ab8f5b59e8a)},
+{UINT64C(0x531), UINT64C(0x400000400), UINT64C(0x541a3433)},
+{UINT64C(0x4b2), UINT64C(0x400000400), UINT64C(0xa537f64220d1fc1d)},
+{UINT64C(0x532), UINT64C(0x400000400), UINT64C(0xe4bfe4e)},
+{UINT64C(0x4a0), UINT64C(0x400000400), UINT64C(0xd183e05b7c3be5eb)},
+{UINT64C(0x520), UINT64C(0x400000400), UINT64C(0x5198a34b)},
+{UINT64C(0x4a1), UINT64C(0x400000400), UINT64C(0x254679da05758c1a)},
+{UINT64C(0x521), UINT64C(0x400000400), UINT64C(0xb9126f4)},
+{UINT64C(0x4a2), UINT64C(0x400000400), UINT64C(0x72cd5c52aa46c2da)},
+{UINT64C(0x522), UINT64C(0x400000400), UINT64C(0xe4bfe4e)},
+{UINT64C(0x4c0), UINT64C(0x400000400), UINT64C(0x211f16ea5922b678)},
+{UINT64C(0x540), UINT64C(0x400000400), UINT64C(0x2e0e3c8b)},
+{UINT64C(0x4c1), UINT64C(0x400000400), UINT64C(0xf2a1526474d8ee29)},
+{UINT64C(0x541), UINT64C(0x400000400), UINT64C(0xb6a7efcb)},
+{UINT64C(0x4c2), UINT64C(0x400000400), UINT64C(0x53ed9feb9ca6dd1a)},
+{UINT64C(0x542), UINT64C(0x400000400), UINT64C(0x18bad4a1)},
+{UINT64C(0x4d0), UINT64C(0x400000400), UINT64C(0xe91cd56d5db78ef)},
+};
diff --git a/tests/constants/checksums/2dInt32.h b/tests/constants/checksums/2dInt32.h
new file mode 100644
index 00000000..22c078df
--- /dev/null
+++ b/tests/constants/checksums/2dInt32.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _2dInt32Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x3000003), UINT64C(0x94aada73)},
+{UINT64C(0xa0), UINT64C(0x3000003), UINT64C(0x1264830f387e560)},
+{UINT64C(0x2a0), UINT64C(0x3000003), UINT64C(0xf09d2faf2ba66c16)},
+{UINT64C(0x120), UINT64C(0x3000003), UINT64C(0x3ece4105)},
+{UINT64C(0x320), UINT64C(0x3000003), UINT64C(0xbb514638)},
+{UINT64C(0x400), UINT64C(0x40000040), UINT64C(0xbafc4f7c)},
+{UINT64C(0x4b0), UINT64C(0x40000040), UINT64C(0x2a2ecc3532b9e47c)},
+{UINT64C(0x530), UINT64C(0x40000040), UINT64C(0xa0b51de9)},
+{UINT64C(0x4b1), UINT64C(0x40000040), UINT64C(0xa68050a6f03bbeac)},
+{UINT64C(0x531), UINT64C(0x40000040), UINT64C(0x8d1227ea)},
+{UINT64C(0x4b2), UINT64C(0x40000040), UINT64C(0x298cca6049cda102)},
+{UINT64C(0x532), UINT64C(0x40000040), UINT64C(0xb331c139)},
+{UINT64C(0x4a0), UINT64C(0x40000040), UINT64C(0x419666be07f8fd5b)},
+{UINT64C(0x520), UINT64C(0x40000040), UINT64C(0xc955273b)},
+{UINT64C(0x4a1), UINT64C(0x40000040), UINT64C(0x1bb735117e4b84c0)},
+{UINT64C(0x521), UINT64C(0x40000040), UINT64C(0xb2cff311)},
+{UINT64C(0x4a2), UINT64C(0x40000040), UINT64C(0x45e684a399d342bf)},
+{UINT64C(0x522), UINT64C(0x40000040), UINT64C(0xb331c139)},
+{UINT64C(0x4d0), UINT64C(0x40000040), UINT64C(0x55be045ea7268027)},
+};
diff --git a/tests/constants/checksums/2dInt64.h b/tests/constants/checksums/2dInt64.h
new file mode 100644
index 00000000..459a53a5
--- /dev/null
+++ b/tests/constants/checksums/2dInt64.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _2dInt64Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x3000003), UINT64C(0x60569371027435a7)},
+{UINT64C(0xa0), UINT64C(0x3000003), UINT64C(0x74905e21b1d68ae2)},
+{UINT64C(0x2a0), UINT64C(0x3000003), UINT64C(0xc83e2f319f07372e)},
+{UINT64C(0x120), UINT64C(0x3000003), UINT64C(0x8cdc228000000000)},
+{UINT64C(0x320), UINT64C(0x3000003), UINT64C(0x6bd17a493be325d1)},
+{UINT64C(0x400), UINT64C(0x40000040), UINT64C(0xf57fe1822b2a33c8)},
+{UINT64C(0x4b0), UINT64C(0x40000040), UINT64C(0x2a2ecc3532b9e47c)},
+{UINT64C(0x530), UINT64C(0x40000040), UINT64C(0xa0b51de900000000)},
+{UINT64C(0x4b1), UINT64C(0x40000040), UINT64C(0xa68050a6f03bbeac)},
+{UINT64C(0x531), UINT64C(0x40000040), UINT64C(0x8d1227ea00000000)},
+{UINT64C(0x4b2), UINT64C(0x40000040), UINT64C(0x7abe90820ae730a)},
+{UINT64C(0x532), UINT64C(0x40000040), UINT64C(0x4384aefdd310e015)},
+{UINT64C(0x4a0), UINT64C(0x40000040), UINT64C(0x419666be07f8fd5b)},
+{UINT64C(0x520), UINT64C(0x40000040), UINT64C(0xc955273b00000000)},
+{UINT64C(0x4a1), UINT64C(0x40000040), UINT64C(0xd9cd09fd7e4b84c0)},
+{UINT64C(0x521), UINT64C(0x40000040), UINT64C(0x74b2370100000000)},
+{UINT64C(0x4a2), UINT64C(0x40000040), UINT64C(0x8bed6d7ee10836ae)},
+{UINT64C(0x522), UINT64C(0x40000040), UINT64C(0xe0b475056c768219)},
+{UINT64C(0x4d0), UINT64C(0x40000040), UINT64C(0xaced76ad1c2ebb9)},
+};
diff --git a/tests/constants/checksums/3dDouble.h b/tests/constants/checksums/3dDouble.h
new file mode 100644
index 00000000..298c8bf2
--- /dev/null
+++ b/tests/constants/checksums/3dDouble.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _3dDoubleChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x300030003), UINT64C(0x5f9e82c4fef6f593)},
+{UINT64C(0xa0), UINT64C(0x300030003), UINT64C(0x20a6c761afd4380b)},
+{UINT64C(0xd1), UINT64C(0x300030003), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x300030003), UINT64C(0x927724ecdcf219fb)},
+{UINT64C(0xd3), UINT64C(0x300030003), UINT64C(0x393a6de095b240e0)},
+{UINT64C(0xd4), UINT64C(0x300030003), UINT64C(0x9c9d36f01c36b045)},
+{UINT64C(0xd5), UINT64C(0x300030003), UINT64C(0xa71ba2fe0b649fc)},
+{UINT64C(0xd6), UINT64C(0x300030003), UINT64C(0x7e8c15054d871bd9)},
+{UINT64C(0xd7), UINT64C(0x300030003), UINT64C(0xe4eab78245c08a26)},
+{UINT64C(0xd8), UINT64C(0x300030003), UINT64C(0x5ac46921892607c6)},
+{UINT64C(0xd9), UINT64C(0x300030003), UINT64C(0xbfb026919c6944c4)},
+{UINT64C(0xda), UINT64C(0x300030003), UINT64C(0xf3420697931ed828)},
+{UINT64C(0x2a0), UINT64C(0x300030003), UINT64C(0x9a658e0fe05b9657)},
+{UINT64C(0x120), UINT64C(0x300030003), UINT64C(0x4f653444ff3fdbe4)},
+{UINT64C(0x320), UINT64C(0x300030003), UINT64C(0x7e8c64faafedcb18)},
+{UINT64C(0x400), UINT64C(0x8000800080), UINT64C(0xb29ddfb4a7719b6a)},
+{UINT64C(0x4b0), UINT64C(0x8000800080), UINT64C(0x6b5b0dab297c9d33)},
+{UINT64C(0x530), UINT64C(0x8000800080), UINT64C(0x1e497b1f00000000)},
+{UINT64C(0x4b1), UINT64C(0x8000800080), UINT64C(0xe933645e8cf7a7c9)},
+{UINT64C(0x531), UINT64C(0x8000800080), UINT64C(0xdce089f900000000)},
+{UINT64C(0x4b2), UINT64C(0x8000800080), UINT64C(0xc3d061d1944a8106)},
+{UINT64C(0x532), UINT64C(0x8000800080), UINT64C(0x3817c78441377d10)},
+{UINT64C(0x4a0), UINT64C(0x8000800080), UINT64C(0xd3b75ae8488a556d)},
+{UINT64C(0x520), UINT64C(0x8000800080), UINT64C(0xf4bd2afd74af921)},
+{UINT64C(0x4a1), UINT64C(0x8000800080), UINT64C(0x8d8d80142436d812)},
+{UINT64C(0x521), UINT64C(0x8000800080), UINT64C(0x9103ee0106602bb1)},
+{UINT64C(0x4a2), UINT64C(0x8000800080), UINT64C(0x64e50911ed54c0ef)},
+{UINT64C(0x522), UINT64C(0x8000800080), UINT64C(0xcd7da85356f9db40)},
+{UINT64C(0x4c0), UINT64C(0x8000800080), UINT64C(0xca9d4c2be9c2a15b)},
+{UINT64C(0x540), UINT64C(0x8000800080), UINT64C(0x8eaf0fa126b3de89)},
+{UINT64C(0x4c1), UINT64C(0x8000800080), UINT64C(0x8f79006fd9e45619)},
+{UINT64C(0x541), UINT64C(0x8000800080), UINT64C(0xb0dd4ed6a7196f47)},
+{UINT64C(0x4c2), UINT64C(0x8000800080), UINT64C(0x5c056eecba4d5349)},
+{UINT64C(0x542), UINT64C(0x8000800080), UINT64C(0x3262044561f9cceb)},
+{UINT64C(0x4d0), UINT64C(0x8000800080), UINT64C(0xaf95ff6301796621)},
+};
diff --git a/tests/constants/checksums/3dFloat.h b/tests/constants/checksums/3dFloat.h
new file mode 100644
index 00000000..edb5f525
--- /dev/null
+++ b/tests/constants/checksums/3dFloat.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _3dFloatChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x300030003), UINT64C(0x54572f34)},
+{UINT64C(0xa0), UINT64C(0x300030003), UINT64C(0x6ad38b388f18d118)},
+{UINT64C(0xd1), UINT64C(0x300030003), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x300030003), UINT64C(0x4e0632d046a7a0e)},
+{UINT64C(0xd3), UINT64C(0x300030003), UINT64C(0xf375ed06da7f218c)},
+{UINT64C(0xd4), UINT64C(0x300030003), UINT64C(0x79aaf683295b4527)},
+{UINT64C(0xd5), UINT64C(0x300030003), UINT64C(0xc71006bf172ec200)},
+{UINT64C(0xd6), UINT64C(0x300030003), UINT64C(0x163602c727dbbba2)},
+{UINT64C(0xd7), UINT64C(0x300030003), UINT64C(0xda199ff1947e73d2)},
+{UINT64C(0xd8), UINT64C(0x300030003), UINT64C(0x84db4dd6885773b5)},
+{UINT64C(0xd9), UINT64C(0x300030003), UINT64C(0x68a0b34799c2f1f8)},
+{UINT64C(0xda), UINT64C(0x300030003), UINT64C(0xd4b6310ae6d2d4de)},
+{UINT64C(0x2a0), UINT64C(0x300030003), UINT64C(0x256107e3209389ee)},
+{UINT64C(0x120), UINT64C(0x300030003), UINT64C(0xd822c66d)},
+{UINT64C(0x320), UINT64C(0x300030003), UINT64C(0xc3e0b4fb)},
+{UINT64C(0x400), UINT64C(0x8000800080), UINT64C(0xdbe7e231)},
+{UINT64C(0x4b0), UINT64C(0x8000800080), UINT64C(0xe3b79c04a6174576)},
+{UINT64C(0x530), UINT64C(0x8000800080), UINT64C(0x6ea0403c)},
+{UINT64C(0x4b1), UINT64C(0x8000800080), UINT64C(0xb666d473ca7d7e1c)},
+{UINT64C(0x531), UINT64C(0x8000800080), UINT64C(0xc2408604)},
+{UINT64C(0x4b2), UINT64C(0x8000800080), UINT64C(0xb0c2a41ec2111183)},
+{UINT64C(0x532), UINT64C(0x8000800080), UINT64C(0x97a819ae)},
+{UINT64C(0x4a0), UINT64C(0x8000800080), UINT64C(0x5ec963fda5ed8273)},
+{UINT64C(0x520), UINT64C(0x8000800080), UINT64C(0xb0695ba7)},
+{UINT64C(0x4a1), UINT64C(0x8000800080), UINT64C(0xa72b103d6027cbef)},
+{UINT64C(0x521), UINT64C(0x8000800080), UINT64C(0xcdd7c8b6)},
+{UINT64C(0x4a2), UINT64C(0x8000800080), UINT64C(0x4e2c7e0bf502c3a1)},
+{UINT64C(0x522), UINT64C(0x8000800080), UINT64C(0x97a819ae)},
+{UINT64C(0x4c0), UINT64C(0x8000800080), UINT64C(0xdb9351a2125e34e4)},
+{UINT64C(0x540), UINT64C(0x8000800080), UINT64C(0x3518c38f)},
+{UINT64C(0x4c1), UINT64C(0x8000800080), UINT64C(0x2fd5a60cdd2227e)},
+{UINT64C(0x541), UINT64C(0x8000800080), UINT64C(0x4f0985dd)},
+{UINT64C(0x4c2), UINT64C(0x8000800080), UINT64C(0x73829fdec12a0374)},
+{UINT64C(0x542), UINT64C(0x8000800080), UINT64C(0xcb6afbd)},
+{UINT64C(0x4d0), UINT64C(0x8000800080), UINT64C(0xb6475a8758f10fe0)},
+};
diff --git a/tests/constants/checksums/3dInt32.h b/tests/constants/checksums/3dInt32.h
new file mode 100644
index 00000000..fb6c8435
--- /dev/null
+++ b/tests/constants/checksums/3dInt32.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _3dInt32Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x300030003), UINT64C(0xab8e83e9)},
+{UINT64C(0xa0), UINT64C(0x300030003), UINT64C(0xda55ac5950c74c2)},
+{UINT64C(0x2a0), UINT64C(0x300030003), UINT64C(0xb85a3bd936a5c392)},
+{UINT64C(0x120), UINT64C(0x300030003), UINT64C(0xdbb57cfa)},
+{UINT64C(0x320), UINT64C(0x300030003), UINT64C(0x205d2fad)},
+{UINT64C(0x400), UINT64C(0x1000100010), UINT64C(0xad7ade47)},
+{UINT64C(0x4b0), UINT64C(0x1000100010), UINT64C(0xc92ee0e3f6e6aa91)},
+{UINT64C(0x530), UINT64C(0x1000100010), UINT64C(0xd2482c01)},
+{UINT64C(0x4b1), UINT64C(0x1000100010), UINT64C(0x21b0a7777c2c5b2d)},
+{UINT64C(0x531), UINT64C(0x1000100010), UINT64C(0x9436e0c7)},
+{UINT64C(0x4b2), UINT64C(0x1000100010), UINT64C(0xfe72d7ca4ce4cd2b)},
+{UINT64C(0x532), UINT64C(0x1000100010), UINT64C(0xea428b3e)},
+{UINT64C(0x4a0), UINT64C(0x1000100010), UINT64C(0x32942f0afdb349c2)},
+{UINT64C(0x520), UINT64C(0x1000100010), UINT64C(0xb3d2ff2c)},
+{UINT64C(0x4a1), UINT64C(0x1000100010), UINT64C(0x3a036901bbfdee14)},
+{UINT64C(0x521), UINT64C(0x1000100010), UINT64C(0xb9258768)},
+{UINT64C(0x4a2), UINT64C(0x1000100010), UINT64C(0x8a8ae9c57224ef8e)},
+{UINT64C(0x522), UINT64C(0x1000100010), UINT64C(0xea428b3e)},
+{UINT64C(0x4d0), UINT64C(0x1000100010), UINT64C(0xf0ab4d96d89cc545)},
+};
diff --git a/tests/constants/checksums/3dInt64.h b/tests/constants/checksums/3dInt64.h
new file mode 100644
index 00000000..2a6fd4c9
--- /dev/null
+++ b/tests/constants/checksums/3dInt64.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _3dInt64Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x300030003), UINT64C(0xcc5133515849571c)},
+{UINT64C(0xa0), UINT64C(0x300030003), UINT64C(0x6c0ff959c2207d41)},
+{UINT64C(0x2a0), UINT64C(0x300030003), UINT64C(0xd6b771a93e2404f4)},
+{UINT64C(0x120), UINT64C(0x300030003), UINT64C(0x4b9f52d500000000)},
+{UINT64C(0x320), UINT64C(0x300030003), UINT64C(0xc78c8cca00000000)},
+{UINT64C(0x400), UINT64C(0x1000100010), UINT64C(0xee34e487f557278f)},
+{UINT64C(0x4b0), UINT64C(0x1000100010), UINT64C(0xc92ee0e3f6e6aa91)},
+{UINT64C(0x530), UINT64C(0x1000100010), UINT64C(0xd2482c0100000000)},
+{UINT64C(0x4b1), UINT64C(0x1000100010), UINT64C(0x21b0a7777c2c5b2d)},
+{UINT64C(0x531), UINT64C(0x1000100010), UINT64C(0x9436e0c700000000)},
+{UINT64C(0x4b2), UINT64C(0x1000100010), UINT64C(0xa8b1239155fdd8ab)},
+{UINT64C(0x532), UINT64C(0x1000100010), UINT64C(0xc723b42e1e4f2274)},
+{UINT64C(0x4a0), UINT64C(0x1000100010), UINT64C(0x32942f0afdb349c2)},
+{UINT64C(0x520), UINT64C(0x1000100010), UINT64C(0xb3d2ff2c00000000)},
+{UINT64C(0x4a1), UINT64C(0x1000100010), UINT64C(0x84e238f16919a151)},
+{UINT64C(0x521), UINT64C(0x1000100010), UINT64C(0x879bc89700000000)},
+{UINT64C(0x4a2), UINT64C(0x1000100010), UINT64C(0x4e6417e960207269)},
+{UINT64C(0x522), UINT64C(0x1000100010), UINT64C(0xc348c52175d9ec77)},
+{UINT64C(0x4d0), UINT64C(0x1000100010), UINT64C(0x43c8d544f70dccc5)},
+};
diff --git a/tests/constants/checksums/4dDouble.h b/tests/constants/checksums/4dDouble.h
new file mode 100644
index 00000000..8f4b776d
--- /dev/null
+++ b/tests/constants/checksums/4dDouble.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _4dDoubleChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x3003003003), UINT64C(0x61f9b8c3ddcbe9b)},
+{UINT64C(0xa0), UINT64C(0x3003003003), UINT64C(0x3bd0d8f2da9e9acf)},
+{UINT64C(0xd1), UINT64C(0x3003003003), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x3003003003), UINT64C(0xd7e189562d39c484)},
+{UINT64C(0xd3), UINT64C(0x3003003003), UINT64C(0x10254b13cbda2b97)},
+{UINT64C(0xd4), UINT64C(0x3003003003), UINT64C(0x8126589d3735e9d)},
+{UINT64C(0xd5), UINT64C(0x3003003003), UINT64C(0x81c6ab2ae3cbfbac)},
+{UINT64C(0xd6), UINT64C(0x3003003003), UINT64C(0xb7521b04e50f0123)},
+{UINT64C(0xd7), UINT64C(0x3003003003), UINT64C(0x2d382b62747da555)},
+{UINT64C(0xd8), UINT64C(0x3003003003), UINT64C(0xdff214dccadfe445)},
+{UINT64C(0xd9), UINT64C(0x3003003003), UINT64C(0xdab8e4ea9761352b)},
+{UINT64C(0xda), UINT64C(0x3003003003), UINT64C(0x65fa916b3e3e928e)},
+{UINT64C(0x2a0), UINT64C(0x3003003003), UINT64C(0x1dbd79f0a52ec95b)},
+{UINT64C(0x120), UINT64C(0x3003003003), UINT64C(0xbf4272427fcd2646)},
+{UINT64C(0x320), UINT64C(0x3003003003), UINT64C(0xdf41e51abd93ea8a)},
+{UINT64C(0x400), UINT64C(0x20020020020), UINT64C(0xe1c8a968261e4559)},
+{UINT64C(0x4b0), UINT64C(0x20020020020), UINT64C(0x7a0d035888f5d7e3)},
+{UINT64C(0x530), UINT64C(0x20020020020), UINT64C(0x9940d71100000000)},
+{UINT64C(0x4b1), UINT64C(0x20020020020), UINT64C(0xc0f286466ade809e)},
+{UINT64C(0x531), UINT64C(0x20020020020), UINT64C(0xaead20c8a88f2622)},
+{UINT64C(0x4b2), UINT64C(0x20020020020), UINT64C(0xbfd8f5f591cb0f2d)},
+{UINT64C(0x532), UINT64C(0x20020020020), UINT64C(0x4de9a84f4ab886fa)},
+{UINT64C(0x4a0), UINT64C(0x20020020020), UINT64C(0x94219b32ec93e2a9)},
+{UINT64C(0x520), UINT64C(0x20020020020), UINT64C(0xa1ca18c21794908b)},
+{UINT64C(0x4a1), UINT64C(0x20020020020), UINT64C(0x464485425bf411aa)},
+{UINT64C(0x521), UINT64C(0x20020020020), UINT64C(0x7abae3fe33d0ce6a)},
+{UINT64C(0x4a2), UINT64C(0x20020020020), UINT64C(0x8ca875e7386e2cea)},
+{UINT64C(0x522), UINT64C(0x20020020020), UINT64C(0x6a44d79a5a33d47d)},
+{UINT64C(0x4c0), UINT64C(0x20020020020), UINT64C(0xc0b867f744b71cc0)},
+{UINT64C(0x540), UINT64C(0x20020020020), UINT64C(0xb8f1525cd842fbd5)},
+{UINT64C(0x4c1), UINT64C(0x20020020020), UINT64C(0x6a1908a569eb1a99)},
+{UINT64C(0x541), UINT64C(0x20020020020), UINT64C(0xb14abd4386fddb81)},
+{UINT64C(0x4c2), UINT64C(0x20020020020), UINT64C(0xb920196fca3513eb)},
+{UINT64C(0x542), UINT64C(0x20020020020), UINT64C(0x249da35a6e8ca411)},
+{UINT64C(0x4d0), UINT64C(0x20020020020), UINT64C(0xd72af5ab206ebe50)},
+};
diff --git a/tests/constants/checksums/4dFloat.h b/tests/constants/checksums/4dFloat.h
new file mode 100644
index 00000000..dc3d44e1
--- /dev/null
+++ b/tests/constants/checksums/4dFloat.h
@@ -0,0 +1,37 @@
+static const checksum_tuples _4dFloatChecksums[35] = {
+{UINT64C(0x0), UINT64C(0x3003003003), UINT64C(0x8c5867f7)},
+{UINT64C(0xa0), UINT64C(0x3003003003), UINT64C(0x26580b0af77ece38)},
+{UINT64C(0xd1), UINT64C(0x3003003003), UINT64C(0x0)},
+{UINT64C(0xd2), UINT64C(0x3003003003), UINT64C(0xbd6cb9cd6d2735e7)},
+{UINT64C(0xd3), UINT64C(0x3003003003), UINT64C(0x943b526033810b7b)},
+{UINT64C(0xd4), UINT64C(0x3003003003), UINT64C(0x4a1da930b29a371d)},
+{UINT64C(0xd5), UINT64C(0x3003003003), UINT64C(0x276e4805f1ee4de9)},
+{UINT64C(0xd6), UINT64C(0x3003003003), UINT64C(0xcd3a562a15f5f5e9)},
+{UINT64C(0xd7), UINT64C(0x3003003003), UINT64C(0x241372c19e9d3507)},
+{UINT64C(0xd8), UINT64C(0x3003003003), UINT64C(0xbfeec5a9344e5b48)},
+{UINT64C(0xd9), UINT64C(0x3003003003), UINT64C(0xeac7292d88f982bf)},
+{UINT64C(0xda), UINT64C(0x3003003003), UINT64C(0x667025a3a09f4198)},
+{UINT64C(0x2a0), UINT64C(0x3003003003), UINT64C(0xb47c6e115d00b400)},
+{UINT64C(0x120), UINT64C(0x3003003003), UINT64C(0xda2b72f9)},
+{UINT64C(0x320), UINT64C(0x3003003003), UINT64C(0xfc12ceb2)},
+{UINT64C(0x400), UINT64C(0x20020020020), UINT64C(0x725f89ff)},
+{UINT64C(0x4b0), UINT64C(0x20020020020), UINT64C(0x59e13fe363db5c6f)},
+{UINT64C(0x530), UINT64C(0x20020020020), UINT64C(0xb444287b)},
+{UINT64C(0x4b1), UINT64C(0x20020020020), UINT64C(0x23d6299eeaa79a9e)},
+{UINT64C(0x531), UINT64C(0x20020020020), UINT64C(0x52fe0450)},
+{UINT64C(0x4b2), UINT64C(0x20020020020), UINT64C(0x69ff59c816afd8bd)},
+{UINT64C(0x532), UINT64C(0x20020020020), UINT64C(0xd916b61)},
+{UINT64C(0x4a0), UINT64C(0x20020020020), UINT64C(0xeaae9de596da1479)},
+{UINT64C(0x520), UINT64C(0x20020020020), UINT64C(0xf29a4049)},
+{UINT64C(0x4a1), UINT64C(0x20020020020), UINT64C(0xf0e8cdfbb12d5bdb)},
+{UINT64C(0x521), UINT64C(0x20020020020), UINT64C(0x8bc47f0d)},
+{UINT64C(0x4a2), UINT64C(0x20020020020), UINT64C(0x669314f4f9637698)},
+{UINT64C(0x522), UINT64C(0x20020020020), UINT64C(0xd916b61)},
+{UINT64C(0x4c0), UINT64C(0x20020020020), UINT64C(0xc22cce8aa431fffb)},
+{UINT64C(0x540), UINT64C(0x20020020020), UINT64C(0x83f41e)},
+{UINT64C(0x4c1), UINT64C(0x20020020020), UINT64C(0x4b0af02b26351468)},
+{UINT64C(0x541), UINT64C(0x20020020020), UINT64C(0x425b2a0d)},
+{UINT64C(0x4c2), UINT64C(0x20020020020), UINT64C(0xedc915189e4764f2)},
+{UINT64C(0x542), UINT64C(0x20020020020), UINT64C(0x3ca6456a)},
+{UINT64C(0x4d0), UINT64C(0x20020020020), UINT64C(0xf2a9c72c87868054)},
+};
diff --git a/tests/constants/checksums/4dInt32.h b/tests/constants/checksums/4dInt32.h
new file mode 100644
index 00000000..a6998a7f
--- /dev/null
+++ b/tests/constants/checksums/4dInt32.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _4dInt32Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x3003003003), UINT64C(0x8b21ff0)},
+{UINT64C(0xa0), UINT64C(0x3003003003), UINT64C(0xf89b3fdf64ff5b5b)},
+{UINT64C(0x2a0), UINT64C(0x3003003003), UINT64C(0x8d094f52b8fd6250)},
+{UINT64C(0x120), UINT64C(0x3003003003), UINT64C(0xcaa8e882)},
+{UINT64C(0x320), UINT64C(0x3003003003), UINT64C(0x86320cb4)},
+{UINT64C(0x400), UINT64C(0x8008008008), UINT64C(0x89f6c535)},
+{UINT64C(0x4b0), UINT64C(0x8008008008), UINT64C(0x38d58bf8bf7f5b07)},
+{UINT64C(0x530), UINT64C(0x8008008008), UINT64C(0xbd347efd)},
+{UINT64C(0x4b1), UINT64C(0x8008008008), UINT64C(0xb9f8a476db61b946)},
+{UINT64C(0x531), UINT64C(0x8008008008), UINT64C(0x6f0e9866)},
+{UINT64C(0x4b2), UINT64C(0x8008008008), UINT64C(0xb44975c2cdae2907)},
+{UINT64C(0x532), UINT64C(0x8008008008), UINT64C(0x539b74c9)},
+{UINT64C(0x4a0), UINT64C(0x8008008008), UINT64C(0xabd0b79d9c135337)},
+{UINT64C(0x520), UINT64C(0x8008008008), UINT64C(0x5a8a7db4)},
+{UINT64C(0x4a1), UINT64C(0x8008008008), UINT64C(0xe331fda805ba7319)},
+{UINT64C(0x521), UINT64C(0x8008008008), UINT64C(0xec560874)},
+{UINT64C(0x4a2), UINT64C(0x8008008008), UINT64C(0xc934178cb9e06ff5)},
+{UINT64C(0x522), UINT64C(0x8008008008), UINT64C(0x539b74c9)},
+{UINT64C(0x4d0), UINT64C(0x8008008008), UINT64C(0x8c888a65b12c884)},
+};
diff --git a/tests/constants/checksums/4dInt64.h b/tests/constants/checksums/4dInt64.h
new file mode 100644
index 00000000..a04e7f89
--- /dev/null
+++ b/tests/constants/checksums/4dInt64.h
@@ -0,0 +1,21 @@
+static const checksum_tuples _4dInt64Checksums[19] = {
+{UINT64C(0x0), UINT64C(0x3003003003), UINT64C(0xc9f0cadc2b040375)},
+{UINT64C(0xa0), UINT64C(0x3003003003), UINT64C(0xbe695ffaef2d6055)},
+{UINT64C(0x2a0), UINT64C(0x3003003003), UINT64C(0x3bf1627a5fd514a7)},
+{UINT64C(0x120), UINT64C(0x3003003003), UINT64C(0x83e0508600000000)},
+{UINT64C(0x320), UINT64C(0x3003003003), UINT64C(0xa194665700000000)},
+{UINT64C(0x400), UINT64C(0x8008008008), UINT64C(0x3c7a84c24a0d97db)},
+{UINT64C(0x4b0), UINT64C(0x8008008008), UINT64C(0x38d58bf8bf7f5b07)},
+{UINT64C(0x530), UINT64C(0x8008008008), UINT64C(0xbd347efd00000000)},
+{UINT64C(0x4b1), UINT64C(0x8008008008), UINT64C(0xb9f8a476db61b946)},
+{UINT64C(0x531), UINT64C(0x8008008008), UINT64C(0x6f0e986600000000)},
+{UINT64C(0x4b2), UINT64C(0x8008008008), UINT64C(0xf1324a2092943e33)},
+{UINT64C(0x532), UINT64C(0x8008008008), UINT64C(0x6b2d4650a70cb4be)},
+{UINT64C(0x4a0), UINT64C(0x8008008008), UINT64C(0xabd0b79d9c135337)},
+{UINT64C(0x520), UINT64C(0x8008008008), UINT64C(0x5a8a7db400000000)},
+{UINT64C(0x4a1), UINT64C(0x8008008008), UINT64C(0x4269d84b05ba7319)},
+{UINT64C(0x521), UINT64C(0x8008008008), UINT64C(0x73c78b5d00000000)},
+{UINT64C(0x4a2), UINT64C(0x8008008008), UINT64C(0x3009aef996d98fa)},
+{UINT64C(0x522), UINT64C(0x8008008008), UINT64C(0x230d83c5490fa7dd)},
+{UINT64C(0x4d0), UINT64C(0x8008008008), UINT64C(0x6e014f2638fd24d2)},
+};
diff --git a/tests/constants/doubleConsts.h b/tests/constants/doubleConsts.h
new file mode 100644
index 00000000..a48c9a50
--- /dev/null
+++ b/tests/constants/doubleConsts.h
@@ -0,0 +1,3 @@
+#define FL_PT_DATA
+#define SCALAR_BITS 64
+#define ZFP_TYPE zfp_type_double
diff --git a/tests/constants/floatConsts.h b/tests/constants/floatConsts.h
new file mode 100644
index 00000000..67916857
--- /dev/null
+++ b/tests/constants/floatConsts.h
@@ -0,0 +1,3 @@
+#define FL_PT_DATA
+#define SCALAR_BITS 32
+#define ZFP_TYPE zfp_type_float
diff --git a/tests/constants/int32Consts.h b/tests/constants/int32Consts.h
new file mode 100644
index 00000000..69ec2d28
--- /dev/null
+++ b/tests/constants/int32Consts.h
@@ -0,0 +1,2 @@
+#define SCALAR_BITS 32
+#define ZFP_TYPE zfp_type_int32
diff --git a/tests/constants/int64Consts.h b/tests/constants/int64Consts.h
new file mode 100644
index 00000000..6c1093c3
--- /dev/null
+++ b/tests/constants/int64Consts.h
@@ -0,0 +1,2 @@
+#define SCALAR_BITS 64
+#define ZFP_TYPE zfp_type_int64
diff --git a/tests/constants/universalConsts.h b/tests/constants/universalConsts.h
new file mode 100644
index 00000000..f9a43d8c
--- /dev/null
+++ b/tests/constants/universalConsts.h
@@ -0,0 +1,4 @@
+#define BLOCK_SIDE_LEN 4
+
+#define ZFP_RATE_PARAM_BITS 19
+#define ZFP_PREC_PARAM_BITS 22
diff --git a/tests/fortran/CMakeLists.txt b/tests/fortran/CMakeLists.txt
new file mode 100644
index 00000000..a0490c9c
--- /dev/null
+++ b/tests/fortran/CMakeLists.txt
@@ -0,0 +1,18 @@
+enable_language(Fortran)
+
+if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
+  set(dialect "-ffree-form -std=f2008 -fimplicit-none")
+  set(bounds "-fbounds-check")
+endif()
+if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
+  set(dialect "-stand f08 -free -implicitnone")
+  set(bounds "-check bounds")
+endif()
+
+set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/modules)
+set(CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} ${bounds}")
+set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${dialect}")
+
+add_executable(testFortran testFortran.f)
+target_link_libraries(testFortran zFORp)
+add_test(NAME testFortran COMMAND testFortran)
diff --git a/tests/fortran/testFortran.f b/tests/fortran/testFortran.f
new file mode 100644
index 00000000..3e1771fc
--- /dev/null
+++ b/tests/fortran/testFortran.f
@@ -0,0 +1,103 @@
+program main
+  use zfp
+  use iso_c_binding
+
+  ! loop counters
+  integer i, j
+
+  ! input/decompressed arrays
+  integer xLen, yLen
+  integer, dimension(:, :), allocatable, target :: input_array
+  integer, dimension(:, :), allocatable, target :: decompressed_array
+  type(c_ptr) :: array_c_ptr
+  integer error, max_abs_error
+
+  ! zfp_field
+  type(zFORp_field) :: field
+
+  ! bitstream
+  character, dimension(:), allocatable, target :: buffer
+  type(c_ptr) :: buffer_c_ptr
+  integer (kind=8) buffer_size_bytes, bitstream_offset_bytes
+  type(zFORp_bitstream) :: bitstream, queried_bitstream
+
+  ! zfp_stream
+  type(zFORp_stream) :: stream
+  real (kind=8) :: desired_rate, rate_result
+  integer :: dims, wra
+  integer :: zfp_type
+
+  ! initialize input and decompressed arrays
+  xLen = 8
+  yLen = 8
+  allocate(input_array(xLen, yLen))
+  do i = 1, xLen
+    do j = 1, yLen
+      input_array(i, j) = i * i + j * (j + 1)
+    enddo
+  enddo
+
+  allocate(decompressed_array(xLen, yLen))
+
+  ! setup zfp_field
+  array_c_ptr = c_loc(input_array)
+  zfp_type = zFORp_type_int32
+  field = zFORp_field_2d(array_c_ptr, zfp_type, xLen, yLen)
+
+  ! setup bitstream
+  buffer_size_bytes = 256
+  allocate(buffer(buffer_size_bytes))
+  buffer_c_ptr = c_loc(buffer)
+  bitstream = zFORp_bitstream_stream_open(buffer_c_ptr, buffer_size_bytes)
+
+  ! setup zfp_stream
+  stream = zFORp_stream_open(bitstream)
+
+  desired_rate = 8.0
+  dims = 2
+  wra = 0
+  zfp_type = zFORp_type_float
+  rate_result = zFORp_stream_set_rate(stream, desired_rate, zfp_type, dims, wra)
+
+  queried_bitstream = zFORp_stream_bit_stream(stream)
+
+  ! compress
+  bitstream_offset_bytes = zFORp_compress(stream, field)
+  write(*, *) "After compression, bitstream offset at "
+  write(*, *) bitstream_offset_bytes
+
+  ! decompress
+  call zFORp_stream_rewind(stream)
+  array_c_ptr = c_loc(decompressed_array)
+  call zFORp_field_set_pointer(field, array_c_ptr)
+
+  bitstream_offset_bytes = zFORp_decompress(stream, field)
+  write(*, *) "After decompression, bitstream offset at "
+  write(*, *) bitstream_offset_bytes
+
+  max_abs_error = 0
+  do i = 1, xLen
+    do j = 1, yLen
+      error = abs(decompressed_array(i, j) - input_array(i, j))
+      max_abs_error = max(error, max_abs_error)
+    enddo
+  enddo
+  write(*, *) "Max absolute error: "
+  write(*, *) max_abs_error
+
+  write(*, *) "Absolute errors: "
+  write(*, *) abs(input_array - decompressed_array)
+
+  ! zfp library info
+  write(*, *) zFORp_version_string
+  write(*, *) zFORp_meta_null
+
+  ! deallocations
+  call zFORp_stream_close(stream)
+  call zFORp_bitstream_stream_close(queried_bitstream)
+  call zFORp_field_free(field)
+
+  deallocate(buffer)
+  deallocate(input_array)
+  deallocate(decompressed_array)
+end program main
diff --git a/tests/gitlab/corona-jobs.yml b/tests/gitlab/corona-jobs.yml
new file mode 100644
index 00000000..fe75a6e5
--- /dev/null
+++ b/tests/gitlab/corona-jobs.yml
@@ -0,0 +1,17 @@
+###########
+# HIP GPU #
+###########
+
+rocm-3.10.0_build:
+    variables:
+        ci_cmake: "cmake/3.21.1"
+        ci_cmp_mod: "rocm/3.10.0"
+        ci_cmp_path: "/opt/rocm-3.10.0/hip"
+    extends: [.hip, .corona_build_gpu]
+    needs: []
+
+rocm-3.10.0_test:
+    variables:
+       ci_test_regex: "Hip"
+    extends: [.corona_test_gpu]
+    needs: [rocm-3.10.0_build]
diff --git a/tests/gitlab/corona-templates.yml b/tests/gitlab/corona-templates.yml
new file mode 100644
index 00000000..770bd5a8
--- /dev/null
+++ b/tests/gitlab/corona-templates.yml
@@ -0,0 +1,12 @@
+.corona_job:
+    tags:
+        - batch
+        - corona
+
+.corona_build_gpu:
+    extends: [.build_gpu, .corona_job]
+
+.corona_test_gpu:
+    variables:
+       ci_test_regex: "."
+    extends: [.test_gpu, .corona_job]
diff --git a/tests/gitlab/gitlab-ci.yml b/tests/gitlab/gitlab-ci.yml
new file mode 100644
index 00000000..a544a6bc
--- /dev/null
+++ b/tests/gitlab/gitlab-ci.yml
@@ -0,0 +1,143 @@
+#####################
+# Global Parameters #
+#####################
+
+variables:
+    GIT_SUBMODULE_STRATEGY: recursive
+    LLNL_SLURM_SCHEDULER_PARAMETERS: "--nodes=1 -A asccasc -t 00:20:00"
+    LLNL_SERVICE_USER: zfp
+
+stages:
+    - build
+    - test
+
+
+####################
+# Global Templates #
+####################
+
+# Build Stage Templates
+
+.build:
+    stage: build
+    artifacts:
+        when: always
+        paths:
+            - build
+
+.build_cpu:
+    before_script:
+        - |- 
+            if [ "$ci_c_cmp" != "gcc" ]; then
+                module --latest load gcc
+                if (( $(gcc -dumpversion | sed 's/\..*//') < 5 )); then
+                    echo "unable to find new enough gcc to support ${ci_c_cmp} build"
+                    exit 1
+                fi
+                export GXX_PATH=$(dirname $(which gcc))/../
+            fi
+        - module reset
+        - module load $ci_cmake
+        - module load $ci_cmp_mod
+        - |-
+            if [ "$ci_lang" == "cpp" ]; then
+                export CXX=$(which $ci_cxx_cmp)
+                export CC=$(which $ci_c_cmp)
+                if [ -z ${CXX} ]; then
+                    echo "cxx compiler not set"
+                    exit 1
+                elif [ -z ${CC} ]; then
+                    echo "c compiler not set"
+                    exit 1
+                fi
+            elif [ "$ci_lang" == "c" ]; then
+                export CC=$(which $ci_c_cmp)
+                if [ -z ${CC} ]; then
+                    echo "c compiler not set"
+                    exit 1
+                fi
+            fi
+    script:
+        - mkdir build
+        - cd build
+        - |-
+            export ci_cmake_cmp_flags=""
+            if [ "$ci_c_cmp" == "icc" ]; then
+                export ci_cmake_cmp_flags="-DCMAKE_CXX_FLAGS=-gcc-name=${GXX_PATH}/bin/gcc -DCMAKE_C_FLAGS=-gcc-name=${GXX_PATH}/bin/gcc"
+            elif [ "$ci_c_cmp" == "clang" ]; then
+                export ci_cmake_cmp_flags="-DCMAKE_CXX_FLAGS=--gcc-toolchain=${GXX_PATH} -DCMAKE_C_FLAGS=--gcc-toolchain=${GXX_PATH}"
+            fi
+        - cmake -DBUILD_TESTING_FULL=ON -DBUILD_UTILITIES=OFF -DZFP_WITH_CUDA=OFF ${ci_cmake_flags} ${ci_cmake_cmp_flags} ..
+        - cmake --build .
+    extends: [.build]
+
+.build_gpu:
+    before_script:
+        - module reset
+        - module load $ci_cmake
+        - module load $ci_cmp_mod
+        - module load $ci_gcc_mod
+    script:
+        - mkdir build
+        - cd build
+        - cmake -DBUILD_TESTING_FULL=ON -DZFP_WITH_OPENMP=OFF -DBUILD_UTILITIES=OFF ${ci_cmake_flags} ..
+        - make -j
+    extends: [.build]
+
+
+# Test Stage Templates
+
+.test:
+    stage: test
+    artifacts:
+        when: on_failure
+        paths:
+            - build/Testing
+
+.test_cpu:
+    script:
+        - cd build
+        - ctest -E "(Cuda|Hip)" -R "${ci_test_regex}"
+    extends: [.test]
+
+.test_gpu:
+    script:
+        - cd build
+        - ctest -R "${ci_test_regex}"
+    extends: [.test]
+
+
+# Language Templates
+
+.cpp:
+    variables:
+        ci_lang: "cpp"
+        ci_cmake_flags: "-DBUILD_CFP=OFF -DBUILD_ZFPY=OFF -DBUILD_ZFORP=OFF" 
+
+.c:
+    variables:
+        ci_lang: "c"
+        ci_cmake_flags: "-DBUILD_CFP=ON -DBUILD_ZFPY=OFF -DBUILD_ZFORP=OFF -DZFP_WITH_OPENMP=OFF" 
+
+.cuda:
+    variables:
+        ci_lang: "cuda"
+        ci_cmake_flags: "-DZFP_WITH_CUDA=ON"
+
+#.hip:
+#    variables:
+#        ci_lang: "hip"
+#        ci_cmake_flags: "-DZFP_WITH_HIP=ON -DHIP_PATH=${ci_cmp_path} -DCMAKE_CXX_STANDARD=11 -DCMAKE_C_STANDARD=11 -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc"
+
+
+############
+# Includes #
+############
+
+include:
+    - local: tests/gitlab/pascal-templates.yml
+    - local: tests/gitlab/pascal-jobs.yml
+    - local: tests/gitlab/quartz-templates.yml
+    - local: tests/gitlab/quartz-jobs.yml
+#    - local: tests/gitlab/corona-templates.yml
+#    - local: tests/gitlab/corona-jobs.yml
diff --git a/tests/gitlab/pascal-jobs.yml b/tests/gitlab/pascal-jobs.yml
new file mode 100644
index 00000000..7d73f17a
--- /dev/null
+++ b/tests/gitlab/pascal-jobs.yml
@@ -0,0 +1,17 @@
+############
+# CUDA GPU #
+############
+
+cuda-11.8.0_build:
+    variables:
+        ci_cmake: "cmake/3.14.5"
+        ci_cmp_mod: "cuda/11.8.0"
+        ci_gcc_mod: "gcc/10.3.1"
+    extends: [.cuda, .pascal_build_gpu]
+    needs: []
+
+cuda-11.8.0_test:
+    variables:
+       ci_test_regex: "Cuda"
+    extends: [.pascal_test_gpu]
+    needs: [cuda-11.8.0_build]
diff --git a/tests/gitlab/pascal-templates.yml b/tests/gitlab/pascal-templates.yml
new file mode 100644
index 00000000..30b26269
--- /dev/null
+++ b/tests/gitlab/pascal-templates.yml
@@ -0,0 +1,12 @@
+.pascal_job:
+    tags:
+        - batch
+        - pascal
+
+.pascal_build_gpu:
+    extends: [.build_gpu, .pascal_job]
+
+.pascal_test_gpu:
+    variables:
+       ci_test_regex: "."
+    extends: [.test_gpu, .pascal_job]
diff --git a/tests/gitlab/quartz-jobs.yml b/tests/gitlab/quartz-jobs.yml
new file mode 100644
index 00000000..672c68a5
--- /dev/null
+++ b/tests/gitlab/quartz-jobs.yml
@@ -0,0 +1,64 @@
+###########
+# CXX CPU #
+###########
+
+cpp_gnu-10.3.1_build:
+    variables:
+        ci_cmake: "cmake/3.14.5"
+        ci_cxx_cmp: "g++"
+        ci_c_cmp: "gcc"
+        ci_cmp_mod: "gcc/10.3.1"
+    extends: [.cpp, .quartz_build_cpu]
+    needs: []
+
+cpp_gnu-10.3.1_test:
+    extends: [.quartz_test_cpu]
+    needs: [cpp_gnu-10.3.1_build]
+
+
+cpp_clang-14.0.6_build:
+    variables:
+        ci_cmake: "cmake/3.14.5"
+        ci_cxx_cmp: "clang++"
+        ci_c_cmp: "clang"
+        ci_cmp_mod: "clang/14.0.6"
+    extends: [.cpp, .quartz_build_cpu]
+    needs: []
+
+cpp_clang-14.0.6_test:
+    extends: [.quartz_test_cpu]
+    needs: [cpp_clang-14.0.6_build]
+
+
+cpp_intel-2022.1.0_build:
+    variables:
+        ci_cmake: "cmake/3.14.5"
+        ci_cxx_cmp: "icpc"
+        ci_c_cmp: "icc"
+        ci_cmp_mod: "intel/2022.1.0"
+    extends: [.cpp, .quartz_build_cpu]
+    needs: []
+
+cpp_intel-2022.1.0_test:
+    extends: [.quartz_test_cpu]
+    needs: [cpp_intel-2022.1.0_build]
+
+
+
+#########
+# C CPU #
+#########
+
+c_gnu-10.3.1_build:
+    variables:
+        ci_cmake: "cmake/3.14.5"
+        ci_c_cmp: "gcc"
+        ci_cmp_mod: "gcc/10.3.1"
+    extends: [.c, .quartz_build_cpu]
+    needs: []
+
+c_gnu-10.3.1_test:
+    variables:
+       ci_test_regex: "Cfp"
+    extends: [.quartz_test_cpu]
+    needs: [c_gnu-10.3.1_build]
diff --git a/tests/gitlab/quartz-templates.yml b/tests/gitlab/quartz-templates.yml
new file mode 100644
index 00000000..d4d18533
--- /dev/null
+++ b/tests/gitlab/quartz-templates.yml
@@ -0,0 +1,12 @@
+.quartz_job:
+    tags:
+        - batch
+        - quartz
+
+.quartz_build_cpu:
+    extends: [.build_cpu, .quartz_job]
+
+.quartz_test_cpu:
+    variables:
+       ci_test_regex: "."
+    extends: [.test_cpu, .quartz_job]
diff --git a/tests/python/CMakeLists.txt b/tests/python/CMakeLists.txt
new file mode 100644
index 00000000..589ac8af
--- /dev/null
+++ b/tests/python/CMakeLists.txt
@@ -0,0 +1,40 @@
+if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.27.0)
+    cmake_policy(SET CMP0148 OLD)
+endif ()
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}/python/scikit-build-cmake)
+
+find_package(PythonInterp REQUIRED)
+find_package(PythonLibs REQUIRED)
+find_package(PythonExtensions REQUIRED)
+find_package(Cython REQUIRED)
+find_package(NumPy REQUIRED)
+
+include_directories(${ZFP_SOURCE_DIR}/include)
+include_directories(${ZFP_SOURCE_DIR}/python)
+include_directories(${NumPy_INCLUDE_DIR})
+
+include_directories(${ZFP_SOURCE_DIR}/tests/python)
+include_directories(${ZFP_SOURCE_DIR}/tests/utils)
+include_directories(${ZFP_SOURCE_DIR})
+add_cython_target(test_utils test_utils.pyx C PY3)
+add_library(test_utils MODULE ${test_utils})
+target_link_libraries(test_utils zfp genSmoothRandNumsLib stridedOperationsLib zfpCompressionParamsLib zfpChecksumsLib zfpHashLib)
+python_extension_module(test_utils)
+set_target_properties(test_utils PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PYLIB_BUILD_DIR})
+
+set(TEST_PYTHON_PATH "${PYLIB_BUILD_DIR}")
+if(MSVC)
+  set(TEST_PYTHON_PATH "${TEST_PYTHON_PATH}/${CMAKE_BUILD_TYPE}")
+endif()
+
+if(DEFINED ENV{PYTHONPATH})
+  set(TEST_PYTHON_PATH "${TEST_PYTHON_PATH}:$ENV{PYTHONPATH}")
+endif()
+
+add_test(NAME test_numpy
+  COMMAND ${PYTHON_EXECUTABLE} test_numpy.py
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+set_tests_properties(test_numpy PROPERTIES
+  ENVIRONMENT PYTHONPATH=${TEST_PYTHON_PATH})
diff --git a/tests/python/test_numpy.py b/tests/python/test_numpy.py
new file mode 100644
index 00000000..e35ed043
--- /dev/null
+++ b/tests/python/test_numpy.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+
+import unittest
+
+import zfpy
+import test_utils
+import numpy as np
+try:
+    from packaging.version import parse as version_parse
+except ImportError:
+    version_parse = None
+
+
+class TestNumpy(unittest.TestCase):
+    def lossless_round_trip(self, orig_array):
+        compressed_array = zfpy.compress_numpy(orig_array, write_header=True)
+        decompressed_array = zfpy.decompress_numpy(compressed_array)
+        self.assertIsNone(np.testing.assert_array_equal(decompressed_array, orig_array))
+
+    def test_different_dimensions(self):
+        for dimensions in range(1, 5):
+            shape = [5] * dimensions
+            c_array = np.random.rand(*shape)
+            self.lossless_round_trip(c_array)
+
+            shape = range(2, 2 + dimensions)
+            c_array = np.random.rand(*shape)
+            self.lossless_round_trip(c_array)
+
+    def test_different_dtypes(self):
+        shape = (5, 5)
+        num_elements = shape[0] * shape[1]
+
+        for dtype in [np.float32, np.float64]:
+            elements = np.random.random_sample(num_elements)
+            elements = elements.astype(dtype, casting="same_kind")
+            array = np.reshape(elements, newshape=shape)
+            self.lossless_round_trip(array)
+
+        if (version_parse is not None and
+            (version_parse(np.__version__) >= version_parse("1.11.0"))
+        ):
+            for dtype in [np.int32, np.int64]:
+                array = np.random.randint(2**30, size=shape, dtype=dtype)
+                self.lossless_round_trip(array)
+        else:
+            array = np.random.randint(2**30, size=shape)
+            self.lossless_round_trip(array)
+
+    def test_advanced_decompression_checksum(self):
+        ndims = 2
+        ztype = zfpy.type_float
+        random_array = test_utils.getRandNumpyArray(ndims, ztype)
+        mode = zfpy.mode_fixed_accuracy
+        compress_param_num = 1
+        compression_kwargs = {
+            "tolerance": test_utils.computeParameterValue(
+                mode,
+                compress_param_num
+            ),
+        }
+        compressed_array = zfpy.compress_numpy(
+            random_array,
+            write_header=False,
+            **compression_kwargs
+        )
+
+        # Decompression using the "advanced" interface which enforces no header,
+        # and the user must provide all the metadata
+        decompressed_array = np.empty_like(random_array)
+        zfpy._decompress(
+            compressed_array,
+            ztype,
+            random_array.shape,
+            out=decompressed_array,
+            **compression_kwargs
+        )
+        decompressed_array_dims = decompressed_array.shape + tuple(0 for i in range(4 - decompressed_array.ndim))
+        decompressed_checksum = test_utils.getChecksumDecompArray(
+            decompressed_array_dims,
+            ztype,
+            mode,
+            compress_param_num
+        )
+        actual_checksum = test_utils.hashNumpyArray(
+            decompressed_array
+        )
+        self.assertEqual(decompressed_checksum, actual_checksum)
+
+    def test_memview_advanced_decompression_checksum(self):
+        ndims = 2
+        ztype = zfpy.type_float
+        random_array = test_utils.getRandNumpyArray(ndims, ztype)
+        mode = zfpy.mode_fixed_accuracy
+        compress_param_num = 1
+        compression_kwargs = {
+            "tolerance": test_utils.computeParameterValue(
+                mode,
+                compress_param_num
+            ),
+        }
+        compressed_array_tmp = zfpy.compress_numpy(
+            random_array,
+            write_header=False,
+            **compression_kwargs
+        )
+        mem = memoryview(compressed_array_tmp)
+        compressed_array = np.array(mem, copy=False)
+        # Decompression using the "advanced" interface which enforces no header,
+        # and the user must provide all the metadata
+        decompressed_array = np.empty_like(random_array)
+        zfpy._decompress(
+            compressed_array,
+            ztype,
+            random_array.shape,
+            out=decompressed_array,
+            **compression_kwargs
+        )
+        decompressed_array_dims = decompressed_array.shape + tuple(0 for i in range(4 - decompressed_array.ndim))
+        decompressed_checksum = test_utils.getChecksumDecompArray(
+            decompressed_array_dims,
+            ztype,
+            mode,
+            compress_param_num
+        )
+        actual_checksum = test_utils.hashNumpyArray(
+            decompressed_array
+        )
+        self.assertEqual(decompressed_checksum, actual_checksum)
+
+    def test_advanced_decompression_nonsquare(self):
+        for dimensions in range(1, 5):
+            shape = range(2, 2 + dimensions)
+            random_array = np.random.rand(*shape)
+
+            decompressed_array = np.empty_like(random_array)
+            compressed_array = zfpy.compress_numpy(
+                random_array,
+                write_header=False,
+            )
+            zfpy._decompress(
+                compressed_array,
+                zfpy.dtype_to_ztype(random_array.dtype),
+                random_array.shape,
+                out= decompressed_array,
+            )
+            self.assertIsNone(np.testing.assert_array_equal(decompressed_array, random_array))
+
+    def test_utils(self):
+        for ndims in range(1, 5):
+            for ztype, ztype_str in [
+                    (zfpy.type_float,  "float"),
+                    (zfpy.type_double, "double"),
+                    (zfpy.type_int32,  "int32"),
+                    (zfpy.type_int64,  "int64"),
+            ]:
+                orig_random_array = test_utils.getRandNumpyArray(ndims, ztype)
+                orig_random_array_dims = orig_random_array.shape + tuple(0 for i in range(4 - orig_random_array.ndim))
+                orig_checksum = test_utils.getChecksumOrigArray(orig_random_array_dims, ztype)
+                actual_checksum = test_utils.hashNumpyArray(orig_random_array)
+                self.assertEqual(orig_checksum, actual_checksum)
+
+                for stride_str, stride_config in [
+                        ("as_is", test_utils.stride_as_is),
+                        ("permuted", test_utils.stride_permuted),
+                        ("interleaved", test_utils.stride_interleaved),
+                        #("reversed", test_utils.stride_reversed),
+                ]:
+                    # permuting a 1D array is not supported
+                    if stride_config == test_utils.stride_permuted and ndims == 1:
+                        continue
+                    random_array = test_utils.generateStridedRandomNumpyArray(
+                        stride_config,
+                        orig_random_array
+                    )
+                    random_array_dims = random_array.shape + tuple(0 for i in range(4 - random_array.ndim))
+                    self.assertTrue(np.equal(orig_random_array, random_array).all())
+
+                    for compress_param_num in range(3):
+                        modes = [(zfpy.mode_fixed_accuracy, "tolerance"),
+                                 (zfpy.mode_fixed_precision, "precision"),
+                                 (zfpy.mode_fixed_rate, "rate")]
+                        if ztype in [zfpy.type_int32, zfpy.type_int64]:
+                            modes = [modes[-1]] # only fixed-rate is supported for integers
+                        for mode, mode_str in modes:
+                            # Compression
+                            compression_kwargs = {
+                                mode_str: test_utils.computeParameterValue(
+                                    mode,
+                                    compress_param_num
+                                ),
+                            }
+
+                            compressed_array = zfpy.compress_numpy(
+                                random_array,
+                                write_header=False,
+                                **compression_kwargs
+                            )
+                            compressed_checksum = test_utils.getChecksumCompArray(
+                                random_array_dims,
+                                ztype,
+                                mode,
+                                compress_param_num
+                            )
+                            actual_checksum = test_utils.hashCompressedArray(
+                                compressed_array
+                            )
+                            self.assertEqual(compressed_checksum, actual_checksum)
+
+                            # Decompression
+                            decompressed_checksum = test_utils.getChecksumDecompArray(
+                                random_array_dims,
+                                ztype,
+                                mode,
+                                compress_param_num
+                            )
+
+                            # Decompression using the "public" interface
+                            # requires a header, so re-compress with the header
+                            # included in the stream
+                            compressed_array_tmp = zfpy.compress_numpy(
+                                random_array,
+                                write_header=True,
+                                **compression_kwargs
+                            )
+                            mem = memoryview(compressed_array_tmp)
+                            compressed_array = np.array(mem, copy=False)
+                            decompressed_array = zfpy.decompress_numpy(
+                                compressed_array,
+                            )
+                            actual_checksum = test_utils.hashNumpyArray(
+                                decompressed_array
+                            )
+                            self.assertEqual(decompressed_checksum, actual_checksum)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/tests/python/test_utils.pyx b/tests/python/test_utils.pyx
new file mode 100644
index 00000000..e792e61e
--- /dev/null
+++ b/tests/python/test_utils.pyx
@@ -0,0 +1,512 @@
+# TODO: update zfpChecksums cython
+import cython
+from libc.stdlib cimport malloc, free
+cimport libc.stdint as stdint
+from libc.stddef cimport ptrdiff_t
+from cython cimport view
+from itertools import islice, repeat, chain
+
+import zfpy
+cimport zfpy
+
+import numpy as np
+cimport numpy as np
+
+ctypedef stdint.int32_t int32_t
+ctypedef stdint.int64_t int64_t
+ctypedef stdint.uint32_t uint32_t
+ctypedef stdint.uint64_t uint64_t
+
+cdef extern from "genSmoothRandNums.h":
+    size_t intPow(size_t base, int exponent)
+    void generateSmoothRandInts64(size_t minTotalElements,
+                                  int numDims,
+                                  int amplitudeExp,
+                                  int64_t** outputArr,
+                                  size_t* outputSideLen,
+                                  size_t* outputTotalLen)
+    void generateSmoothRandInts32(size_t minTotalElements,
+                                  int numDims,
+                                  int amplitudeExp,
+                                  int32_t** outputArr32Ptr,
+                                  size_t* outputSideLen,
+                                  size_t* outputTotalLen)
+    void generateSmoothRandFloats(size_t minTotalElements,
+                                  int numDims,
+                                  float** outputArrPtr,
+                                  size_t* outputSideLen,
+                                  size_t* outputTotalLen)
+    void generateSmoothRandDoubles(size_t minTotalElements,
+                                   int numDims,
+                                   double** outputArrPtr,
+                                   size_t* outputSideLen,
+                                   size_t* outputTotalLen)
+
+cdef extern from "stridedOperations.h":
+    ctypedef enum stride_config:
+        AS_IS = 0,
+        PERMUTED = 1,
+        INTERLEAVED = 2,
+        REVERSED = 3
+
+    void reverseArray(void* inputArr,
+                      void* outputArr,
+                      size_t inputArrLen,
+                      zfpy.zfp_type zfpType)
+    void interleaveArray(void* inputArr,
+                         void* outputArr,
+                         size_t inputArrLen,
+                         zfpy.zfp_type zfpType)
+    int permuteSquareArray(void* inputArr,
+                           void* outputArr,
+                           size_t sideLen,
+                           int dims,
+                           zfpy.zfp_type zfpType)
+    void getReversedStrides(int dims,
+                            size_t n[4],
+                            ptrdiff_t s[4])
+    void getInterleavedStrides(int dims,
+                               size_t n[4],
+                               ptrdiff_t s[4])
+    void getPermutedStrides(int dims,
+                            size_t n[4],
+                            ptrdiff_t s[4])
+
+cdef extern from "zfpCompressionParams.h":
+    int computeFixedPrecisionParam(int param)
+    size_t computeFixedRateParam(int param)
+    double computeFixedAccuracyParam(int param)
+
+cdef extern from "zfp.h":
+    ctypedef enum zfp_type:
+        zfp_type_none = 0,
+        zfp_type_int32 = 1,
+        zfp_type_int64 = 2,
+        zfp_type_float = 3,
+        zfp_type_double = 4 
+
+cdef extern from "zfpChecksums.h":
+    ctypedef enum test_type:
+        BLOCK_FULL_TEST = 0,
+        BLOCK_PARTIAL_TEST = 1,
+        ARRAY_TEST = 2
+
+    ctypedef enum subject:
+        ORIGINAL_INPUT = 0,
+        COMPRESSED_BITSTREAM = 1,
+        DECOMPRESSED_ARRAY = 2,
+
+    void computeKeyOriginalInput(test_type tt,
+                                 size_t n[4],
+                                 uint64_t* key1,
+                                 uint64_t* key2)
+    void computeKey(test_type tt,
+                    subject sjt,
+                    size_t n[4],
+                    zfpy.zfp_mode mode,
+                    int miscParam,
+                    uint64_t* key1,
+                    uint64_t* key2)
+    uint64_t getChecksumByKey(int dims,
+                              zfp_type type,
+                              uint64_t key1,
+                              uint64_t key2)
+
+cdef extern from "zfpHash.h":
+    uint64_t hashBitstream(uint64_t* ptrStart,
+                           size_t bufsizeBytes)
+    uint32_t hashArray32(const uint32_t* arr,
+                         size_t nx,
+                         ptrdiff_t sx)
+    uint32_t hashStridedArray32(const uint32_t* arr,
+                                size_t n[4],
+                                ptrdiff_t s[4])
+    uint64_t hashArray64(const uint64_t* arr,
+                         size_t nx,
+                         ptrdiff_t sx)
+    uint64_t hashStridedArray64(const uint64_t* arr,
+                                size_t n[4],
+                                ptrdiff_t s[4])
+
+# enums
+stride_as_is = AS_IS
+stride_permuted = PERMUTED
+stride_interleaved = INTERLEAVED
+stride_reversed = REVERSED
+
+# functions
+cdef validate_num_dimensions(int dims):
+    if dims > 4 or dims < 1:
+        raise ValueError("Unsupported number of dimensions: {}".format(dims))
+
+cdef validate_ztype(zfpy.zfp_type ztype):
+    if ztype not in [
+            zfpy.type_float,
+            zfpy.type_double,
+            zfpy.type_int32,
+            zfpy.type_int64
+    ]:
+        raise ValueError("Unsupported ztype: {}".format(ztype))
+
+cdef validate_mode(zfpy.zfp_mode mode):
+    if mode not in [
+            zfpy.mode_fixed_rate,
+            zfpy.mode_fixed_precision,
+            zfpy.mode_fixed_accuracy,
+    ]:
+        raise ValueError("Unsupported mode: {}".format(mode))
+
+cdef validate_compress_param(int comp_param):
+    if comp_param not in range(3): # i.e., [0, 1, 2]
+        raise ValueError(
+            "Unsupported compression parameter number: {}".format(comp_param)
+        )
+
+cpdef getRandNumpyArray(
+    int numDims,
+    zfpy.zfp_type ztype,
+):
+    validate_num_dimensions(numDims)
+    validate_ztype(ztype)
+
+    cdef size_t minTotalElements = 0
+    cdef int amplitudeExp = 0
+
+    if ztype in [zfpy.type_float, zfpy.type_double]:
+        minTotalElements = 1000000
+    elif ztype in [zfpy.type_int32, zfpy.type_int64]:
+        minTotalElements = 4096
+
+    cdef int64_t* outputArrInt64 = NULL
+    cdef int32_t* outputArrInt32 = NULL
+    cdef float* outputArrFloat = NULL
+    cdef double* outputArrDouble = NULL
+    cdef size_t outputSideLen = 0
+    cdef size_t outputTotalLen = 0
+    cdef view.array viewArr = None
+
+    if ztype == zfpy.type_int64:
+        amplitudeExp = 64 - 2
+        generateSmoothRandInts64(minTotalElements,
+                                 numDims,
+                                 amplitudeExp,
+                                 &outputArrInt64,
+                                 &outputSideLen,
+                                 &outputTotalLen)
+        if numDims == 1:
+            viewArr = <int64_t[:outputSideLen]> outputArrInt64
+        elif numDims == 2:
+            viewArr = <int64_t[:outputSideLen, :outputSideLen]> outputArrInt64
+        elif numDims == 3:
+            viewArr = <int64_t[:outputSideLen, :outputSideLen, :outputSideLen]> outputArrInt64
+        elif numDims == 4:
+            viewArr = <int64_t[:outputSideLen, :outputSideLen, :outputSideLen, :outputSideLen]> outputArrInt64
+    elif ztype == zfpy.type_int32:
+        amplitudeExp = 32 - 2
+        generateSmoothRandInts32(minTotalElements,
+                                 numDims,
+                                 amplitudeExp,
+                                 &outputArrInt32,
+                                 &outputSideLen,
+                                 &outputTotalLen)
+        if numDims == 1:
+            viewArr = <int32_t[:outputSideLen]> outputArrInt32
+        elif numDims == 2:
+            viewArr = <int32_t[:outputSideLen, :outputSideLen]> outputArrInt32
+        elif numDims == 3:
+            viewArr = <int32_t[:outputSideLen, :outputSideLen, :outputSideLen]> outputArrInt32
+        elif numDims == 4:
+            viewArr = <int32_t[:outputSideLen, :outputSideLen, :outputSideLen, :outputSideLen]> outputArrInt32
+    elif ztype == zfpy.type_float:
+        generateSmoothRandFloats(minTotalElements,
+                                 numDims,
+                                 &outputArrFloat,
+                                 &outputSideLen,
+                                 &outputTotalLen)
+        if numDims == 1:
+            viewArr = <float[:outputSideLen]> outputArrFloat
+        elif numDims == 2:
+            viewArr = <float[:outputSideLen, :outputSideLen]> outputArrFloat
+        elif numDims == 3:
+            viewArr = <float[:outputSideLen, :outputSideLen, :outputSideLen]> outputArrFloat
+        elif numDims == 4:
+            viewArr = <float[:outputSideLen, :outputSideLen, :outputSideLen, :outputSideLen]> outputArrFloat
+    elif ztype == zfpy.type_double:
+        generateSmoothRandDoubles(minTotalElements,
+                                 numDims,
+                                 &outputArrDouble,
+                                 &outputSideLen,
+                                 &outputTotalLen)
+        if numDims == 1:
+            viewArr = <double[:outputSideLen]> outputArrDouble
+        elif numDims == 2:
+            viewArr = <double[:outputSideLen, :outputSideLen]> outputArrDouble
+        elif numDims == 3:
+            viewArr = <double[:outputSideLen, :outputSideLen, :outputSideLen]> outputArrDouble
+        elif numDims == 4:
+            viewArr = <double[:outputSideLen, :outputSideLen, :outputSideLen, :outputSideLen]> outputArrDouble
+    else:
+        raise ValueError("Unknown zfp_type: {}".format(ztype))
+
+    return np.asarray(viewArr)
+
+# ======================================================
+# TODO: examine best way to add python block level support
+cdef uint64_t getChecksumOriginalDataBlock(
+    int dims,
+    zfpy.zfp_type ztype
+):
+    return 0
+
+
+cdef uint64_t getChecksumEncodedBlock(
+    int dims,
+    zfpy.zfp_type ztype
+):
+    return 0
+
+
+cdef uint64_t getChecksumEncodedPartialBlock(
+    int dims,
+    zfpy.zfp_type ztype
+):
+    return 0
+
+
+cdef uint64_t getChecksumDecodedBlock(
+    int dims,
+    zfpy.zfp_type ztype
+):
+    return 0
+
+
+cdef uint64_t getChecksumDecodedPartialBlock(
+    int dims,
+    zfpy.zfp_type ztype
+):
+    return 0
+# ======================================================
+
+cdef uint64_t getChecksumOriginalDataArray(
+    int ndims,
+    size_t[4] dims,
+    zfpy.zfp_type ztype
+):
+    cdef uint64_t[1] key1, key2
+    computeKeyOriginalInput(ARRAY_TEST, dims, key1, key2)
+    return getChecksumByKey(ndims, ztype, key1[0], key2[0])
+
+cdef  uint64_t getChecksumCompressedBitstream(
+    int ndims,
+    size_t[4] dims,
+    zfpy.zfp_type ztype,
+    zfpy.zfp_mode mode,
+    int compressParamNum
+):
+    cdef uint64_t[1] key1, key2
+    computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dims, mode, compressParamNum, key1, key2)
+    return getChecksumByKey(ndims, ztype, key1[0], key2[0])
+
+cdef uint64_t getChecksumDecompressedArray(
+    int ndims,
+    size_t[4] dims,
+    zfpy.zfp_type ztype,
+    zfpy.zfp_mode mode,
+    int compressParamNum
+):
+    cdef uint64_t[1] key1, key2
+    computeKey(ARRAY_TEST, DECOMPRESSED_ARRAY, dims, mode, compressParamNum, key1, key2)
+    return getChecksumByKey(ndims, ztype, key1[0], key2[0])
+
+
+cpdef uint64_t getChecksumOrigArray(
+    dims,
+    zfpy.zfp_type ztype
+):
+    cdef int ndims = 4-dims.count(0)
+    validate_num_dimensions(ndims)
+    validate_ztype(ztype)
+
+    cdef size_t[4] d
+    for i in range(len(dims)):
+        d[i] = dims[i]
+    return getChecksumOriginalDataArray(ndims, d, ztype)
+
+cpdef uint64_t getChecksumCompArray(
+    dims,
+    zfpy.zfp_type ztype,
+    zfpy.zfp_mode mode,
+    int compressParamNum
+):
+    cdef int ndims = 4-dims.count(0)
+    validate_num_dimensions(ndims)
+    validate_ztype(ztype)
+    validate_mode(mode)
+    validate_compress_param(compressParamNum)
+
+    cdef size_t[4] d
+    for i in range(len(dims)):
+        d[i] = dims[i]
+    return getChecksumCompressedBitstream(ndims, d, ztype, mode, compressParamNum)
+
+cpdef uint64_t getChecksumDecompArray(
+    dims,
+    zfpy.zfp_type ztype,
+    zfpy.zfp_mode mode,
+    int compressParamNum
+):
+    cdef int ndims = 4-dims.count(0)
+    validate_num_dimensions(ndims)
+    validate_ztype(ztype)
+    validate_mode(mode)
+    validate_compress_param(compressParamNum)
+
+    cdef size_t[4] d
+    for i in range(len(dims)):
+        d[i] = dims[i]
+    return getChecksumDecompressedArray(ndims, d, ztype, mode, compressParamNum)
+
+
+cpdef computeParameterValue(zfpy.zfp_mode mode, int param):
+    validate_mode(mode)
+    validate_compress_param(param)
+
+    if mode == zfpy.mode_fixed_accuracy:
+        return computeFixedAccuracyParam(param)
+    elif mode == zfpy.mode_fixed_precision:
+        return computeFixedPrecisionParam(param)
+    elif mode == zfpy.mode_fixed_rate:
+        return computeFixedRateParam(param)
+
+cpdef hashStridedArray(
+    bytes inarray,
+    zfpy.zfp_type ztype,
+    shape,
+    strides,
+):
+    cdef char* array = inarray
+    cdef size_t[4] padded_shape
+    for i in range(4):
+        padded_shape[i] = zfpy.gen_padded_int_list(shape)[i]
+    cdef ptrdiff_t[4] padded_strides
+    for i in range(4):
+        padded_strides[i] = zfpy.gen_padded_int_list(strides)[i]
+
+    if ztype == zfpy.type_int32 or ztype == zfpy.type_float:
+        return hashStridedArray32(<uint32_t*>array, padded_shape, padded_strides)
+    elif ztype == zfpy.type_int64 or ztype == zfpy.type_double:
+        return hashStridedArray64(<uint64_t*>array, padded_shape, padded_strides)
+
+cpdef hashNumpyArray(
+    np.ndarray nparray,
+    stride_config stride_conf = AS_IS,
+):
+    dtype = nparray.dtype
+    if dtype not in [np.int32, np.float32, np.int64, np.float64]:
+        raise ValueError("Unsupported numpy type: {}".format(dtype))
+    if stride_conf not in [AS_IS, PERMUTED, INTERLEAVED, REVERSED]:
+        raise ValueError("Unsupported stride config: {}".format(stride_conf))
+
+    size = int(nparray.size)
+    cdef ptrdiff_t[4] strides
+    cdef size_t[4] shape
+    if stride_conf in [AS_IS, INTERLEAVED]:
+        stride_width = 1 if stride_conf is AS_IS else 2
+        if dtype == np.int32 or dtype == np.float32:
+            return hashArray32(<uint32_t*>nparray.data, size, stride_width)
+        elif dtype == np.int64 or dtype == np.float64:
+            return hashArray64(<uint64_t*>nparray.data, size, stride_width)
+    elif stride_conf in [REVERSED, PERMUTED]:
+        for i in range(4):
+            strides[i] = zfpy.gen_padded_int_list(
+                [x for x in nparray.strides[:nparray.ndim]][i]
+        )
+        for i in range(4):
+            shape[i] = zfpy.gen_padded_int_list(
+                [x for x in nparray.shape[:nparray.ndim]][i]
+        )
+        if dtype == np.int32 or dtype == np.float32:
+            return hashStridedArray32(<uint32_t*>nparray.data, shape, strides)
+        elif dtype == np.int64 or dtype == np.float64:
+            return hashStridedArray64(<uint64_t*>nparray.data, shape, strides)
+
+
+cpdef hashCompressedArray(
+    bytes array,
+):
+    cdef const char* c_array = array
+    return hashBitstream(<uint64_t*> c_array, len(array))
+
+
+cpdef generateStridedRandomNumpyArray(
+    stride_config stride,
+    np.ndarray randomArray,
+):
+    cdef int ndim = randomArray.ndim
+    shape = [int(x) for x in randomArray.shape[:ndim]]
+    dtype = randomArray.dtype
+    cdef zfpy.zfp_type ztype = zfpy.dtype_to_ztype(dtype)
+    cdef ptrdiff_t[4] strides
+    for i in range(4):
+        strides[i] = 0
+    cdef size_t[4] dims
+    for i in range(4):
+        dims[i] = zfpy.gen_padded_int_list(shape)[i]
+    cdef size_t inputLen = len(randomArray)
+    cdef void* output_array_ptr = NULL
+    cdef np.ndarray output_array = None
+    cdef view.array output_array_view = None
+
+    if stride == AS_IS:
+        # return an unmodified copy
+        return randomArray.copy(order='K')
+    elif stride == PERMUTED:
+        if ndim == 1:
+            raise ValueError("Permutation not supported on 1D arrays")
+        output_array = np.empty_like(randomArray, order='K')
+        getPermutedStrides(ndim, dims, strides)
+        for i in range(4):
+            strides[i] = int(strides[i]) * (randomArray.itemsize)
+        ret = permuteSquareArray(
+            randomArray.data,
+            output_array.data,
+            dims[0],
+            ndim,
+            ztype
+        )
+        if ret != 0:
+            raise RuntimeError("Error permuting square array")
+
+        return np.lib.stride_tricks.as_strided(
+            output_array,
+            shape=[x for x in dims[:ndim]],
+            strides=reversed([x for x in strides[:ndim]]),
+        )
+
+    elif stride == INTERLEAVED:
+        num_elements = np.prod(shape)
+        new_shape = [x for x in dims if x > 0]
+        new_shape[-1] *= 2
+        for i in range(4):
+            dims[i] = zfpy.gen_padded_int_list(new_shape, pad=0, length=4)[i]
+
+        output_array = np.empty(
+            new_shape,
+            dtype=dtype
+        )
+        interleaveArray(
+            randomArray.data,
+            output_array.data,
+            num_elements,
+            ztype
+        )
+        getInterleavedStrides(ndim, dims, strides)
+        for i in range(4):
+            strides[i] = int(strides[i]) * (randomArray.itemsize)
+        return np.lib.stride_tricks.as_strided(
+            output_array,
+            shape=shape,
+            strides=reversed([x for x in strides[:ndim]]),
+        )
+    else:
+        raise ValueError("Unsupported_config: {|}".format(stride))
diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt
new file mode 100644
index 00000000..81185f9b
--- /dev/null
+++ b/tests/src/CMakeLists.txt
@@ -0,0 +1,10 @@
+# compile tests
+if(NOT DEFINED ZFP_OMP_TESTS_ONLY)
+  add_subdirectory(inline)
+  add_subdirectory(misc)
+  add_subdirectory(encode)
+  add_subdirectory(decode)
+endif()
+
+add_subdirectory(endtoend)
+add_subdirectory(execPolicy)
diff --git a/tests/src/decode/CMakeLists.txt b/tests/src/decode/CMakeLists.txt
new file mode 100644
index 00000000..50e29df3
--- /dev/null
+++ b/tests/src/decode/CMakeLists.txt
@@ -0,0 +1,49 @@
+function(zfp_add_block_tests dims type bits)
+  set(block_test_name testZfpDecodeBlock${dims}d${type})
+  add_executable(${block_test_name} ${block_test_name}.c)
+  target_link_libraries(${block_test_name}
+    cmocka zfp rand${bits}Lib zfpHashLib zfpChecksumsLib)
+  if(HAVE_LIBM_MATH)
+    target_link_libraries(${block_test_name} m)
+  endif()
+
+  target_compile_definitions(${block_test_name} PRIVATE ${zfp_private_defs})
+  if(PRINT_CHECKSUMS)
+    target_compile_definitions(${block_test_name} PUBLIC PRINT_CHECKSUMS)
+  endif()
+
+  add_test(NAME ${block_test_name} COMMAND ${block_test_name})
+
+  set(strided_block_test_name testZfpDecodeBlockStrided${dims}d${type})
+  add_executable(${strided_block_test_name} ${strided_block_test_name}.c)
+  target_link_libraries(${strided_block_test_name}
+    cmocka zfp rand${bits}Lib zfpHashLib zfpChecksumsLib)
+  if(HAVE_LIBM_MATH)
+    target_link_libraries(${strided_block_test_name} m)
+  endif()
+
+  target_compile_definitions(${strided_block_test_name} PRIVATE ${zfp_private_defs})
+  if(PRINT_CHECKSUMS)
+    target_compile_definitions(${strided_block_test_name} PUBLIC PRINT_CHECKSUMS)
+  endif()
+
+  add_test(NAME ${strided_block_test_name} COMMAND ${strided_block_test_name})
+endfunction()
+
+zfp_add_block_tests(1 Int32 32)
+zfp_add_block_tests(1 Int64 64)
+zfp_add_block_tests(2 Int32 32)
+zfp_add_block_tests(2 Int64 64)
+zfp_add_block_tests(3 Int32 32)
+zfp_add_block_tests(3 Int64 64)
+zfp_add_block_tests(4 Int32 32)
+zfp_add_block_tests(4 Int64 64)
+
+zfp_add_block_tests(1 Float 32)
+zfp_add_block_tests(1 Double 64)
+zfp_add_block_tests(2 Float 32)
+zfp_add_block_tests(2 Double 64)
+zfp_add_block_tests(3 Float 32)
+zfp_add_block_tests(3 Double 64)
+zfp_add_block_tests(4 Float 32)
+zfp_add_block_tests(4 Double 64)
diff --git a/tests/src/decode/testZfpDecodeBlock1dDouble.c b/tests/src/decode/testZfpDecodeBlock1dDouble.c
new file mode 100644
index 00000000..98a19e4f
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode1d.c"
+
+#include "constants/1dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock1dFloat.c b/tests/src/decode/testZfpDecodeBlock1dFloat.c
new file mode 100644
index 00000000..3e386c1e
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode1f.c"
+
+#include "constants/1dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock1dInt32.c b/tests/src/decode/testZfpDecodeBlock1dInt32.c
new file mode 100644
index 00000000..4925159c
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode1i.c"
+
+#include "constants/1dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock1dInt64.c b/tests/src/decode/testZfpDecodeBlock1dInt64.c
new file mode 100644
index 00000000..028b32ff
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode1l.c"
+
+#include "constants/1dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock2dDouble.c b/tests/src/decode/testZfpDecodeBlock2dDouble.c
new file mode 100644
index 00000000..dc8973f2
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode2d.c"
+
+#include "constants/2dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock2dFloat.c b/tests/src/decode/testZfpDecodeBlock2dFloat.c
new file mode 100644
index 00000000..8a0eb127
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode2f.c"
+
+#include "constants/2dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock2dInt32.c b/tests/src/decode/testZfpDecodeBlock2dInt32.c
new file mode 100644
index 00000000..dd44b463
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode2i.c"
+
+#include "constants/2dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock2dInt64.c b/tests/src/decode/testZfpDecodeBlock2dInt64.c
new file mode 100644
index 00000000..2f079bd0
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode2l.c"
+
+#include "constants/2dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock3dDouble.c b/tests/src/decode/testZfpDecodeBlock3dDouble.c
new file mode 100644
index 00000000..1f3b5bd9
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode3d.c"
+
+#include "constants/3dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock3dFloat.c b/tests/src/decode/testZfpDecodeBlock3dFloat.c
new file mode 100644
index 00000000..1f6e0471
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode3f.c"
+
+#include "constants/3dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock3dInt32.c b/tests/src/decode/testZfpDecodeBlock3dInt32.c
new file mode 100644
index 00000000..35576d52
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode3i.c"
+
+#include "constants/3dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock3dInt64.c b/tests/src/decode/testZfpDecodeBlock3dInt64.c
new file mode 100644
index 00000000..9d96cfee
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode3l.c"
+
+#include "constants/3dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock4dDouble.c b/tests/src/decode/testZfpDecodeBlock4dDouble.c
new file mode 100644
index 00000000..3c030800
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode4d.c"
+
+#include "constants/4dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock4dFloat.c b/tests/src/decode/testZfpDecodeBlock4dFloat.c
new file mode 100644
index 00000000..ed373729
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode4f.c"
+
+#include "constants/4dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock4dInt32.c b/tests/src/decode/testZfpDecodeBlock4dInt32.c
new file mode 100644
index 00000000..87c60dff
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode4i.c"
+
+#include "constants/4dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlock4dInt64.c b/tests/src/decode/testZfpDecodeBlock4dInt64.c
new file mode 100644
index 00000000..84464d1c
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlock4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode4l.c"
+
+#include "constants/4dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided1dDouble.c b/tests/src/decode/testZfpDecodeBlockStrided1dDouble.c
new file mode 100644
index 00000000..d6f9af3c
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode1d.c"
+
+#include "constants/1dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided1dFloat.c b/tests/src/decode/testZfpDecodeBlockStrided1dFloat.c
new file mode 100644
index 00000000..a67fd330
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode1f.c"
+
+#include "constants/1dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided1dInt32.c b/tests/src/decode/testZfpDecodeBlockStrided1dInt32.c
new file mode 100644
index 00000000..02ae01fb
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode1i.c"
+
+#include "constants/1dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided1dInt64.c b/tests/src/decode/testZfpDecodeBlockStrided1dInt64.c
new file mode 100644
index 00000000..e66c0765
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode1l.c"
+
+#include "constants/1dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided2dDouble.c b/tests/src/decode/testZfpDecodeBlockStrided2dDouble.c
new file mode 100644
index 00000000..9e2691a9
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode2d.c"
+
+#include "constants/2dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided2dFloat.c b/tests/src/decode/testZfpDecodeBlockStrided2dFloat.c
new file mode 100644
index 00000000..ebf0b0bf
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode2f.c"
+
+#include "constants/2dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided2dInt32.c b/tests/src/decode/testZfpDecodeBlockStrided2dInt32.c
new file mode 100644
index 00000000..72659aa0
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode2i.c"
+
+#include "constants/2dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided2dInt64.c b/tests/src/decode/testZfpDecodeBlockStrided2dInt64.c
new file mode 100644
index 00000000..6244fd90
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode2l.c"
+
+#include "constants/2dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided3dDouble.c b/tests/src/decode/testZfpDecodeBlockStrided3dDouble.c
new file mode 100644
index 00000000..b823ba6e
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode3d.c"
+
+#include "constants/3dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided3dFloat.c b/tests/src/decode/testZfpDecodeBlockStrided3dFloat.c
new file mode 100644
index 00000000..cb0d40f8
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode3f.c"
+
+#include "constants/3dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided3dInt32.c b/tests/src/decode/testZfpDecodeBlockStrided3dInt32.c
new file mode 100644
index 00000000..285cd490
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode3i.c"
+
+#include "constants/3dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided3dInt64.c b/tests/src/decode/testZfpDecodeBlockStrided3dInt64.c
new file mode 100644
index 00000000..7c98991b
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode3l.c"
+
+#include "constants/3dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided4dDouble.c b/tests/src/decode/testZfpDecodeBlockStrided4dDouble.c
new file mode 100644
index 00000000..da43d419
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/decode4d.c"
+
+#include "constants/4dDouble.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided4dFloat.c b/tests/src/decode/testZfpDecodeBlockStrided4dFloat.c
new file mode 100644
index 00000000..bb7ed200
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/decode4f.c"
+
+#include "constants/4dFloat.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided4dInt32.c b/tests/src/decode/testZfpDecodeBlockStrided4dInt32.c
new file mode 100644
index 00000000..89ec1eed
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/decode4i.c"
+
+#include "constants/4dInt32.h"
+#include "utils/rand32.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testZfpDecodeBlockStrided4dInt64.c b/tests/src/decode/testZfpDecodeBlockStrided4dInt64.c
new file mode 100644
index 00000000..6d22fc20
--- /dev/null
+++ b/tests/src/decode/testZfpDecodeBlockStrided4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/decode4l.c"
+
+#include "constants/4dInt64.h"
+#include "utils/rand64.h"
+#include "zfpDecodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/decode/testcases/block.c b/tests/src/decode/testcases/block.c
new file mode 100644
index 00000000..19fa3663
--- /dev/null
+++ b/tests/src/decode/testcases/block.c
@@ -0,0 +1,13 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+#ifndef PRINT_CHECKSUMS
+_cmocka_unit_test_setup_teardown(when_seededRandomDataGenerated_expect_ChecksumMatches, setup, teardown),
+#endif
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlock_expect_ReturnValReflectsNumBitsReadFromBitstream), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlock_expect_ArrayChecksumMatches), setup, teardown),
+
+#ifdef FL_PT_DATA
+// reversible compression and decompression of blocks containing special floating-point values
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodeSpecialBlocks_expect_ArraysMatchBitForBit), setupSpecial, teardown),
+#endif
diff --git a/tests/src/decode/testcases/blockStrided.c b/tests/src/decode/testcases/blockStrided.c
new file mode 100644
index 00000000..d9e45733
--- /dev/null
+++ b/tests/src/decode/testcases/blockStrided.c
@@ -0,0 +1,18 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+// remove redundant checksum tests already run in non-strided tests
+#ifndef PRINT_CHECKSUMS
+
+_cmocka_unit_test_setup_teardown(when_seededRandomDataGenerated_expect_ChecksumMatches, setup, teardown),
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlockStrided_expect_ReturnValReflectsNumBitsReadFromBitstream), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlockStrided_expect_OnlyStridedEntriesChangedInDestinationArray), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlockStrided_expect_ArrayChecksumMatches), setup, teardown),
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_ReturnValReflectsNumBitsReadFromBitstream), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_NonStridedEntriesUnchangedInDestinationArray), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_EntriesOutsidePartialBlockBoundsUnchangedInDestinationArray), setup, teardown),
+
+#endif
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_ArrayChecksumMatches), setup, teardown),
diff --git a/tests/src/decode/zfpDecodeBlockBase.c b/tests/src/decode/zfpDecodeBlockBase.c
new file mode 100644
index 00000000..caeb90a3
--- /dev/null
+++ b/tests/src/decode/zfpDecodeBlockBase.c
@@ -0,0 +1,278 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils/testMacros.h"
+#include "utils/zfpChecksums.h"
+#include "utils/zfpHash.h"
+
+struct setupVars {
+  size_t dimLens[4];
+  Scalar* dataArr;
+  void* buffer;
+  size_t bufsizeBytes;
+  zfp_stream* stream;
+};
+
+static void
+populateInitialArray(Scalar** dataArrPtr)
+{
+  size_t i;
+  *dataArrPtr = malloc(sizeof(Scalar) * BLOCK_SIZE);
+  assert_non_null(*dataArrPtr);
+
+  for (i = 0; i < BLOCK_SIZE; i++) {
+#ifdef FL_PT_DATA
+    (*dataArrPtr)[i] = nextSignedRandFlPt();
+#else
+    (*dataArrPtr)[i] = nextSignedRandInt();
+#endif
+  }
+
+}
+
+static void
+populateInitialArraySpecial(Scalar* dataArr, int index)
+{
+#ifdef FL_PT_DATA
+  // IEEE-754 special values
+  static const uint32 special_float_values[] = {
+    0x00000000u, // +0
+    0x80000000u, // -0
+    0x00000001u, // +FLT_TRUE_MIN
+    0x80000001u, // -FLT_TRUE_MIN
+    0x7f7fffffu, // +FLT_MAX
+    0xff7fffffu, // -FLT_MAX
+    0x7f800000u, // +infinity
+    0xff800000u, // -infinity
+    0x7fc00000u, // qNaN
+    0x7fa00000u, // sNaN
+  };
+  static const uint64 special_double_values[] = {
+    UINT64C(0x0000000000000000), // +0
+    UINT64C(0x8000000000000000), // -0
+    UINT64C(0x0000000000000001), // +DBL_TRUE_MIN
+    UINT64C(0x8000000000000001), // -DBL_TRUE_MIN
+    UINT64C(0x7fefffffffffffff), // +DBL_MAX
+    UINT64C(0xffefffffffffffff), // -DBL_MAX
+    UINT64C(0x7ff0000000000000), // +infinity
+    UINT64C(0xfff0000000000000), // -infinity
+    UINT64C(0x7ff8000000000000), // qNaN
+    UINT64C(0x7ff4000000000000), // sNaN
+  };
+#endif
+  size_t i;
+
+  for (i = 0; i < BLOCK_SIZE; i++) {
+#ifdef FL_PT_DATA
+    // generate special values
+    if ((i & 3u) == 0) {
+      switch(ZFP_TYPE) {
+        case zfp_type_float:
+          memcpy(dataArr + i, &special_float_values[index], sizeof(Scalar));
+          break;
+        case zfp_type_double:
+          memcpy(dataArr + i, &special_double_values[index], sizeof(Scalar));
+          break;
+      }
+    }
+    else
+      dataArr[i] = 0;
+#else
+    dataArr[i] = nextSignedRandInt();
+#endif
+  }
+}
+
+static void
+setupZfpStream(struct setupVars* bundle, int specialValueIndex)
+{
+  memset(bundle->dimLens, 0, sizeof(bundle->dimLens));
+#if DIMS >= 1
+  bundle->dimLens[0] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 2
+  bundle->dimLens[1] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 3
+  bundle->dimLens[2] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 4
+  bundle->dimLens[3] = BLOCK_SIDE_LEN;
+#endif
+  size_t* n = bundle->dimLens;
+
+  zfp_type type = ZFP_TYPE;
+  zfp_field* field;
+  switch(DIMS) {
+    case 1:
+      field = zfp_field_1d(bundle->dataArr, type, n[0]);
+      break;
+    case 2:
+      field = zfp_field_2d(bundle->dataArr, type, n[0], n[1]);
+      break;
+    case 3:
+      field = zfp_field_3d(bundle->dataArr, type, n[0], n[1], n[2]);
+      break;
+    case 4:
+      field = zfp_field_4d(bundle->dataArr, type, n[0], n[1], n[2], n[3]);
+      break;
+  }
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  if (specialValueIndex >= 0) {
+    zfp_stream_set_reversible(stream);
+  } else {
+    zfp_stream_set_rate(stream, ZFP_RATE_PARAM_BITS, type, DIMS, zfp_false);
+  }
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  char* buffer = calloc(bufsizeBytes, sizeof(char));
+  assert_non_null(buffer);
+
+  bitstream* s = stream_open(buffer, bufsizeBytes);
+  assert_non_null(s);
+
+  zfp_stream_set_bit_stream(stream, s);
+  zfp_stream_rewind(stream);
+  zfp_field_free(field);
+
+  bundle->bufsizeBytes = bufsizeBytes;
+  bundle->buffer = buffer;
+  bundle->stream = stream;
+}
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  resetRandGen();
+  populateInitialArray(&bundle->dataArr);
+  setupZfpStream(bundle, -1);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+setupSpecial(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  bundle->dataArr = malloc(sizeof(Scalar) * BLOCK_SIZE);
+  assert_non_null(bundle->dataArr);
+
+  resetRandGen();
+  setupZfpStream(bundle, 0);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  stream_close(bundle->stream->stream);
+  zfp_stream_close(bundle->stream);
+  free(bundle->buffer);
+  free(bundle->dataArr);
+  free(bundle);
+
+  return 0;
+}
+
+static void
+when_seededRandomDataGenerated_expect_ChecksumMatches(void **state)
+{
+  struct setupVars *bundle = *state;
+  UInt checksum = _catFunc2(hashArray, SCALAR_BITS)((const UInt*)bundle->dataArr, BLOCK_SIZE, 1);
+  uint64 key1, key2;
+  computeKeyOriginalInput(BLOCK_FULL_TEST, bundle->dimLens, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlock_expect_ReturnValReflectsNumBitsReadFromBitstream)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  _t2(zfp_encode_block, Scalar, DIMS)(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  size_t returnValBits = _t2(zfp_decode_block, Scalar, DIMS)(stream, bundle->dataArr);
+
+  assert_int_equal(returnValBits, stream_rtell(s));
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlock_expect_ArrayChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  _t2(zfp_encode_block, Scalar, DIMS)(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  Scalar* decodedDataArr = calloc(BLOCK_SIZE, sizeof(Scalar));
+  assert_non_null(decodedDataArr);
+  _t2(zfp_decode_block, Scalar, DIMS)(stream, decodedDataArr);
+
+  UInt checksum = _catFunc2(hashArray, SCALAR_BITS)((const UInt*)decodedDataArr, BLOCK_SIZE, 1);
+  free(decodedDataArr);
+
+  uint64 key1, key2;
+  computeKey(BLOCK_FULL_TEST, DECOMPRESSED_ARRAY, bundle->dimLens, zfp_mode_fixed_rate, 0, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodeSpecialBlocks_expect_ArraysMatchBitForBit)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  int failures = 0;
+  int i;
+  for (i = 0; i < 10; i++) {
+    populateInitialArraySpecial(bundle->dataArr, i);
+
+    _t2(zfp_encode_block, Scalar, DIMS)(stream, bundle->dataArr);
+    zfp_stream_flush(stream);
+    zfp_stream_rewind(stream);
+
+    Scalar* decodedDataArr = calloc(BLOCK_SIZE, sizeof(Scalar));
+    assert_non_null(decodedDataArr);
+    _t2(zfp_decode_block, Scalar, DIMS)(stream, decodedDataArr);
+
+    if (memcmp(bundle->dataArr, decodedDataArr, BLOCK_SIZE * sizeof(Scalar)) != 0) {
+      printf("Decode special Block testcase %d failed\n", i);
+      failures++;
+    }
+
+    free(decodedDataArr);
+
+    // reset/zero bitstream, rewind for next iteration
+    memset(bundle->buffer, 0, bundle->bufsizeBytes);
+    zfp_stream_rewind(stream);
+  }
+
+  if (failures > 0) {
+    fail_msg("At least 1 special block testcase failed\n");
+  }
+}
diff --git a/tests/src/decode/zfpDecodeBlockStridedBase.c b/tests/src/decode/zfpDecodeBlockStridedBase.c
new file mode 100644
index 00000000..cf921225
--- /dev/null
+++ b/tests/src/decode/zfpDecodeBlockStridedBase.c
@@ -0,0 +1,621 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "utils/testMacros.h"
+#include "utils/zfpChecksums.h"
+#include "utils/zfpHash.h"
+
+#define SX 2
+#define SY (3 * BLOCK_SIDE_LEN*SX)
+#define SZ (2 * BLOCK_SIDE_LEN*SY)
+#define SW (3 * BLOCK_SIDE_LEN*SZ)
+#define PX 1
+#define PY 2
+#define PZ 3
+#define PW 4
+
+#define DUMMY_VAL 99
+
+struct setupVars {
+  size_t dimLens[4];
+  Scalar* dataArr;
+  Scalar* decodedDataArr;
+  void* buffer;
+  zfp_stream* stream;
+};
+
+// write random output to strided entries, dummyVal elsewhere
+// returns number of elements in allocated array
+size_t
+initializeStridedArray(Scalar** dataArrPtr, Scalar dummyVal)
+{
+  size_t arrayLen;
+
+  int i, j, k, l, countX, countY, countZ, countW;
+  // absolute entry (i,j,k,l)
+  //   0 <= i < countX, (same for j,countY and k,countZ and l,countW)
+  // strided entry iff
+  //   i % countX/BLOCK_SIDE_LEN == 0 (and so on for j, k, l)
+  switch (DIMS) {
+    case 1:
+      countX = BLOCK_SIDE_LEN * SX;
+      arrayLen = countX;
+
+      *dataArrPtr = malloc(sizeof(Scalar) * arrayLen);
+      assert_non_null(*dataArrPtr);
+
+      for (i = 0; i < countX; i++) {
+        if (i % SX) {
+          (*dataArrPtr)[i] = dummyVal;
+        } else {
+#ifdef FL_PT_DATA
+          (*dataArrPtr)[i] = nextSignedRandFlPt();
+#else
+          (*dataArrPtr)[i] = nextSignedRandInt();
+#endif
+        }
+      }
+
+      break;
+
+    case 2:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      arrayLen = countX * countY;
+
+      *dataArrPtr = malloc(sizeof(Scalar) * arrayLen);
+      assert_non_null(*dataArrPtr);
+
+      for (j = 0; j < countY; j++) {
+        for (i = 0; i < countX; i++) {
+          size_t index = countX*j + i;
+          if (i % (countX/BLOCK_SIDE_LEN)
+              || j % (countY/BLOCK_SIDE_LEN)) {
+            (*dataArrPtr)[index] = dummyVal;
+          } else {
+#ifdef FL_PT_DATA
+            (*dataArrPtr)[index] = nextSignedRandFlPt();
+#else
+            (*dataArrPtr)[index] = nextSignedRandInt();
+#endif
+          }
+        }
+      }
+
+      break;
+
+    case 3:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+      arrayLen = countX * countY * countZ;
+
+      *dataArrPtr = malloc(sizeof(Scalar) * arrayLen);
+      assert_non_null(*dataArrPtr);
+
+      for (k = 0; k < countZ; k++) {
+        for (j = 0; j < countY; j++) {
+          for (i = 0; i < countX; i++) {
+            size_t index = countX*countY*k + countX*j + i;
+            if (i % (countX/BLOCK_SIDE_LEN)
+                || j % (countY/BLOCK_SIDE_LEN)
+                || k % (countZ/BLOCK_SIDE_LEN)) {
+              (*dataArrPtr)[index] = dummyVal;
+            } else {
+#ifdef FL_PT_DATA
+              (*dataArrPtr)[index] = nextSignedRandFlPt();
+#else
+              (*dataArrPtr)[index] = nextSignedRandInt();
+#endif
+            }
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+      countW = SW / SZ;
+      arrayLen = countX * countY * countZ * countW;
+
+      *dataArrPtr = malloc(sizeof(Scalar) * arrayLen);
+      assert_non_null(*dataArrPtr);
+
+      for (l = 0; l < countW; l++) {
+        for (k = 0; k < countZ; k++) {
+          for (j = 0; j < countY; j++) {
+            for (i = 0; i < countX; i++) {
+              size_t index = countX*countY*countZ*l + countX*countY*k + countX*j + i;
+              if (i % (countX/BLOCK_SIDE_LEN)
+                  || j % (countY/BLOCK_SIDE_LEN)
+                  || k % (countZ/BLOCK_SIDE_LEN)
+                  || l % (countW/BLOCK_SIDE_LEN)) {
+                (*dataArrPtr)[index] = dummyVal;
+              } else {
+#ifdef FL_PT_DATA
+                (*dataArrPtr)[index] = nextSignedRandFlPt();
+#else
+                (*dataArrPtr)[index] = nextSignedRandInt();
+#endif
+              }
+            }
+          }
+        }
+      }
+
+      break;
+  }
+
+  return arrayLen;
+}
+
+static void
+setupZfpStream(struct setupVars* bundle)
+{
+  memset(bundle->dimLens, 0, sizeof(bundle->dimLens));
+#if DIMS >= 1
+  bundle->dimLens[0] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 2
+  bundle->dimLens[1] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 3
+  bundle->dimLens[2] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 4
+  bundle->dimLens[3] = BLOCK_SIDE_LEN;
+#endif
+  size_t* n = bundle->dimLens;
+
+  zfp_type type = ZFP_TYPE;
+  zfp_field* field;
+  switch(DIMS) {
+    case 1:
+      field = zfp_field_1d(bundle->dataArr, type, n[0]);
+      zfp_field_set_stride_1d(field, SX);
+      break;
+    case 2:
+      field = zfp_field_2d(bundle->dataArr, type, n[0], n[1]);
+      zfp_field_set_stride_2d(field, SX, SY);
+      break;
+    case 3:
+      field = zfp_field_3d(bundle->dataArr, type, n[0], n[1], n[2]);
+      zfp_field_set_stride_3d(field, SX, SY, SZ);
+      break;
+    case 4:
+      field = zfp_field_4d(bundle->dataArr, type, n[0], n[1], n[2], n[3]);
+      zfp_field_set_stride_4d(field, SX, SY, SZ, SW);
+      break;
+  }
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  zfp_stream_set_rate(stream, ZFP_RATE_PARAM_BITS, type, DIMS, zfp_false);
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  char* buffer = calloc(bufsizeBytes, sizeof(char));
+  assert_non_null(buffer);
+
+  bitstream* s = stream_open(buffer, bufsizeBytes);
+  assert_non_null(s);
+
+  zfp_stream_set_bit_stream(stream, s);
+  zfp_stream_rewind(stream);
+  zfp_field_free(field);
+
+  bundle->buffer = buffer;
+  bundle->stream = stream;
+}
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  resetRandGen();
+
+  size_t arrayLen = initializeStridedArray(&bundle->dataArr, DUMMY_VAL);
+  bundle->decodedDataArr = calloc(arrayLen, sizeof(Scalar));
+  assert_non_null(bundle->decodedDataArr);
+
+  setupZfpStream(bundle);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  stream_close(bundle->stream->stream);
+  zfp_stream_close(bundle->stream);
+  free(bundle->buffer);
+  free(bundle->decodedDataArr);
+  free(bundle->dataArr);
+  free(bundle);
+
+  return 0;
+}
+
+UInt
+hashStridedBlock(Scalar* dataArr)
+{
+  ptrdiff_t s[4] = {SX, SY, SZ, SW};
+  size_t n[4];
+  int i;
+
+  for (i = 0; i < 4; i++) {
+    n[i] = (i < DIMS) ? BLOCK_SIDE_LEN : 0;
+  }
+
+  return _catFunc2(hashStridedArray, SCALAR_BITS)((const UInt*)dataArr, n, s);
+}
+
+size_t
+encodeBlockStrided(zfp_stream* stream, Scalar* dataArr)
+{
+  size_t numBitsWritten;
+  switch (DIMS) {
+    case 1:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 1)(stream, dataArr, SX);
+      break;
+    case 2:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 2)(stream, dataArr, SX, SY);
+      break;
+    case 3:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 3)(stream, dataArr, SX, SY, SZ);
+      break;
+    case 4:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 4)(stream, dataArr, SX, SY, SZ, SW);
+      break;
+  }
+
+  return numBitsWritten;
+}
+
+size_t
+encodePartialBlockStrided(zfp_stream* stream, Scalar* dataArr)
+{
+  size_t numBitsWritten;
+  switch (DIMS) {
+    case 1:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 1)(stream, dataArr, PX, SX);
+      break;
+    case 2:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 2)(stream, dataArr, PX, PY, SX, SY);
+      break;
+    case 3:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 3)(stream, dataArr, PX, PY, PZ, SX, SY, SZ);
+      break;
+    case 4:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 4)(stream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+      break;
+  }
+
+  return numBitsWritten;
+}
+
+size_t
+decodeBlockStrided(zfp_stream* stream, Scalar* dataArr)
+{
+  size_t numBitsRead;
+  switch (DIMS) {
+    case 1:
+      numBitsRead = _t2(zfp_decode_block_strided, Scalar, 1)(stream, dataArr, SX);
+      break;
+    case 2:
+      numBitsRead = _t2(zfp_decode_block_strided, Scalar, 2)(stream, dataArr, SX, SY);
+      break;
+    case 3:
+      numBitsRead = _t2(zfp_decode_block_strided, Scalar, 3)(stream, dataArr, SX, SY, SZ);
+      break;
+    case 4:
+      numBitsRead = _t2(zfp_decode_block_strided, Scalar, 4)(stream, dataArr, SX, SY, SZ, SW);
+      break;
+  }
+
+  return numBitsRead;
+}
+
+size_t
+decodePartialBlockStrided(zfp_stream* stream, Scalar* dataArr)
+{
+  size_t numBitsRead;
+  switch (DIMS) {
+    case 1:
+      numBitsRead = _t2(zfp_decode_partial_block_strided, Scalar, 1)(stream, dataArr, PX, SX);
+      break;
+    case 2:
+      numBitsRead = _t2(zfp_decode_partial_block_strided, Scalar, 2)(stream, dataArr, PX, PY, SX, SY);
+      break;
+    case 3:
+      numBitsRead = _t2(zfp_decode_partial_block_strided, Scalar, 3)(stream, dataArr, PX, PY, PZ, SX, SY, SZ);
+      break;
+    case 4:
+      numBitsRead = _t2(zfp_decode_partial_block_strided, Scalar, 4)(stream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+      break;
+  }
+
+  return numBitsRead;
+}
+
+void
+assertNonStridedEntriesZero(Scalar* data)
+{
+  size_t i, j, k, l, countX, countY, countZ, countW;
+  switch (DIMS) {
+    case 1:
+      countX = BLOCK_SIDE_LEN * SX;
+
+      for (i = 0; i < countX; i++) {
+        if (i % SX) {
+          assert_true(data[i] == 0.);
+        }
+      }
+
+      break;
+
+    case 2:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+
+      for (j = 0; j < countY; j++) {
+        for (i = 0; i < countX; i++) {
+          if (i % (countX/BLOCK_SIDE_LEN)
+              || j % (countY/BLOCK_SIDE_LEN)) {
+            assert_true(data[countX*j + i] == 0.);
+          }
+        }
+      }
+
+      break;
+
+    case 3:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+
+      for (k = 0; k < countZ; k++) {
+        for (j = 0; j < countY; j++) {
+          for (i = 0; i < countX; i++) {
+            if (i % (countX/BLOCK_SIDE_LEN)
+                || j % (countY/BLOCK_SIDE_LEN)
+                || k % (countZ/BLOCK_SIDE_LEN)) {
+              assert_true(data[countX*countY*k + countX*j + i] == 0.);
+            }
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+      countW = SW / SZ;
+
+      for (l = 0; l < countW; l++) {
+        for (k = 0; k < countZ; k++) {
+          for (j = 0; j < countY; j++) {
+            for (i = 0; i < countX; i++) {
+              if (i % (countX/BLOCK_SIDE_LEN)
+                  || j % (countY/BLOCK_SIDE_LEN)
+                  || k % (countZ/BLOCK_SIDE_LEN)
+                  || l % (countW/BLOCK_SIDE_LEN)) {
+                assert_true(data[countX*countY*countZ*l + countX*countY*k + countX*j + i] == 0.);
+              }
+            }
+          }
+        }
+      }
+
+      break;
+  }
+}
+
+void
+assertEntriesOutsidePartialBlockBoundsZero(Scalar* data)
+{
+  size_t i, j, k, l, countX, countY, countZ, countW;
+  switch (DIMS) {
+    case 1:
+      countX = BLOCK_SIDE_LEN * SX;
+
+      for (i = 0; i < countX; i++) {
+        if (i/SX >= PX) {
+          assert_true(data[i] == 0.);
+        }
+      }
+
+      break;
+
+    case 2:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+
+      for (j = 0; j < countY; j++) {
+        for (i = 0; i < countX; i++) {
+          if (i/(countX/BLOCK_SIDE_LEN) >= PX
+              || j/(countY/BLOCK_SIDE_LEN) >= PY) {
+            assert_true(data[countX*j + i] == 0.);
+          }
+        }
+      }
+
+      break;
+
+    case 3:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+
+      for (k = 0; k < countZ; k++) {
+        for (j = 0; j < countY; j++) {
+          for (i = 0; i < countX; i++) {
+            if (i/(countX/BLOCK_SIDE_LEN) >= PX
+                || j/(countY/BLOCK_SIDE_LEN) >= PY
+                || k/(countZ/BLOCK_SIDE_LEN) >= PZ) {
+              assert_true(data[countX*countY*k + countX*j + i] == 0.);
+            }
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+      countW = SW / SZ;
+
+      for (l = 0; l < countW; l++) {
+        for (k = 0; k < countZ; k++) {
+          for (j = 0; j < countY; j++) {
+            for (i = 0; i < countX; i++) {
+              if (i/(countX/BLOCK_SIDE_LEN) >= PX
+                  || j/(countY/BLOCK_SIDE_LEN) >= PY
+                  || k/(countZ/BLOCK_SIDE_LEN) >= PZ
+                  || l/(countW/BLOCK_SIDE_LEN) >= PW) {
+                assert_true(data[countX*countY*countZ*l + countX*countY*k + countX*j + i] == 0.);
+              }
+            }
+          }
+        }
+      }
+
+      break;
+  }
+}
+
+static void
+when_seededRandomDataGenerated_expect_ChecksumMatches(void **state)
+{
+  struct setupVars *bundle = *state;
+  UInt checksum = hashStridedBlock(bundle->dataArr);
+  uint64 key1, key2;
+  computeKeyOriginalInput(BLOCK_FULL_TEST, bundle->dimLens, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlockStrided_expect_ReturnValReflectsNumBitsReadFromBitstream)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  encodeBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  size_t returnValBits = decodeBlockStrided(stream, bundle->decodedDataArr);
+
+  assert_int_equal(returnValBits, stream_rtell(s));
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlockStrided_expect_OnlyStridedEntriesChangedInDestinationArray)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  encodeBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+  decodeBlockStrided(stream, bundle->decodedDataArr);
+
+  assertNonStridedEntriesZero(bundle->decodedDataArr);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodeBlockStrided_expect_ArrayChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  encodeBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  decodeBlockStrided(stream, bundle->decodedDataArr);
+
+  UInt checksum = hashStridedBlock(bundle->decodedDataArr);
+  uint64 key1, key2;
+  computeKey(BLOCK_FULL_TEST, DECOMPRESSED_ARRAY, bundle->dimLens, zfp_mode_fixed_rate, 0, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_ReturnValReflectsNumBitsReadFromBitstream)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  size_t returnValBits = decodePartialBlockStrided(stream, bundle->decodedDataArr);
+
+  assert_int_equal(returnValBits, stream_rtell(s));
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_NonStridedEntriesUnchangedInDestinationArray)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+  decodePartialBlockStrided(stream, bundle->decodedDataArr);
+
+  assertNonStridedEntriesZero(bundle->decodedDataArr);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_EntriesOutsidePartialBlockBoundsUnchangedInDestinationArray)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+  decodePartialBlockStrided(stream, bundle->decodedDataArr);
+
+  assertEntriesOutsidePartialBlockBoundsZero(bundle->decodedDataArr);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_DecodePartialBlockStrided_expect_ArrayChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  decodePartialBlockStrided(stream, bundle->decodedDataArr);
+
+  UInt checksum = hashStridedBlock(bundle->decodedDataArr);
+  uint64 key1, key2;
+  computeKey(BLOCK_PARTIAL_TEST, DECOMPRESSED_ARRAY, bundle->dimLens, zfp_mode_fixed_rate, 0, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
diff --git a/tests/src/encode/CMakeLists.txt b/tests/src/encode/CMakeLists.txt
new file mode 100644
index 00000000..195e4566
--- /dev/null
+++ b/tests/src/encode/CMakeLists.txt
@@ -0,0 +1,49 @@
+function(zfp_add_block_tests dims type bits)
+  set(block_test_name testZfpEncodeBlock${dims}d${type})
+  add_executable(${block_test_name} ${block_test_name}.c)
+  target_link_libraries(${block_test_name}
+    cmocka zfp rand${bits}Lib zfpHashLib zfpChecksumsLib)
+  if(HAVE_LIBM_MATH)
+    target_link_libraries(${block_test_name} m)
+  endif()
+
+  target_compile_definitions(${block_test_name} PRIVATE ${zfp_private_defs})
+  if(PRINT_CHECKSUMS)
+    target_compile_definitions(${block_test_name} PUBLIC PRINT_CHECKSUMS)
+  endif()
+
+  add_test(NAME ${block_test_name} COMMAND ${block_test_name})
+
+  set(strided_block_test_name testZfpEncodeBlockStrided${dims}d${type})
+  add_executable(${strided_block_test_name} ${strided_block_test_name}.c)
+  target_link_libraries(${strided_block_test_name}
+    cmocka zfp rand${bits}Lib zfpHashLib zfpChecksumsLib)
+  if(HAVE_LIBM_MATH)
+    target_link_libraries(${strided_block_test_name} m)
+  endif()
+
+  target_compile_definitions(${strided_block_test_name} PRIVATE ${zfp_private_defs})
+  if(PRINT_CHECKSUMS)
+    target_compile_definitions(${strided_block_test_name} PUBLIC PRINT_CHECKSUMS)
+  endif()
+
+  add_test(NAME ${strided_block_test_name} COMMAND ${strided_block_test_name})
+endfunction()
+
+zfp_add_block_tests(1 Int32 32)
+zfp_add_block_tests(1 Int64 64)
+zfp_add_block_tests(2 Int32 32)
+zfp_add_block_tests(2 Int64 64)
+zfp_add_block_tests(3 Int32 32)
+zfp_add_block_tests(3 Int64 64)
+zfp_add_block_tests(4 Int32 32)
+zfp_add_block_tests(4 Int64 64)
+
+zfp_add_block_tests(1 Float 32)
+zfp_add_block_tests(1 Double 64)
+zfp_add_block_tests(2 Float 32)
+zfp_add_block_tests(2 Double 64)
+zfp_add_block_tests(3 Float 32)
+zfp_add_block_tests(3 Double 64)
+zfp_add_block_tests(4 Float 32)
+zfp_add_block_tests(4 Double 64)
diff --git a/tests/src/encode/testZfpEncodeBlock1dDouble.c b/tests/src/encode/testZfpEncodeBlock1dDouble.c
new file mode 100644
index 00000000..eeb43f25
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode1d.c"
+
+#include "constants/1dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock1dFloat.c b/tests/src/encode/testZfpEncodeBlock1dFloat.c
new file mode 100644
index 00000000..6584318b
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode1f.c"
+
+#include "constants/1dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock1dInt32.c b/tests/src/encode/testZfpEncodeBlock1dInt32.c
new file mode 100644
index 00000000..8a93ebe2
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode1i.c"
+
+#include "constants/1dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock1dInt64.c b/tests/src/encode/testZfpEncodeBlock1dInt64.c
new file mode 100644
index 00000000..3e8f2d75
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode1l.c"
+
+#include "constants/1dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock2dDouble.c b/tests/src/encode/testZfpEncodeBlock2dDouble.c
new file mode 100644
index 00000000..2986dfbd
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode2d.c"
+
+#include "constants/2dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock2dFloat.c b/tests/src/encode/testZfpEncodeBlock2dFloat.c
new file mode 100644
index 00000000..84d52ff3
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode2f.c"
+
+#include "constants/2dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock2dInt32.c b/tests/src/encode/testZfpEncodeBlock2dInt32.c
new file mode 100644
index 00000000..e8c46a3d
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode2i.c"
+
+#include "constants/2dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock2dInt64.c b/tests/src/encode/testZfpEncodeBlock2dInt64.c
new file mode 100644
index 00000000..54ee9e59
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode2l.c"
+
+#include "constants/2dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock3dDouble.c b/tests/src/encode/testZfpEncodeBlock3dDouble.c
new file mode 100644
index 00000000..cd0593a6
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode3d.c"
+
+#include "constants/3dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock3dFloat.c b/tests/src/encode/testZfpEncodeBlock3dFloat.c
new file mode 100644
index 00000000..dca90736
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode3f.c"
+
+#include "constants/3dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock3dInt32.c b/tests/src/encode/testZfpEncodeBlock3dInt32.c
new file mode 100644
index 00000000..0524302d
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode3i.c"
+
+#include "constants/3dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock3dInt64.c b/tests/src/encode/testZfpEncodeBlock3dInt64.c
new file mode 100644
index 00000000..a6b381e2
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode3l.c"
+
+#include "constants/3dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock4dDouble.c b/tests/src/encode/testZfpEncodeBlock4dDouble.c
new file mode 100644
index 00000000..2306de9a
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode4d.c"
+
+#include "constants/4dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock4dFloat.c b/tests/src/encode/testZfpEncodeBlock4dFloat.c
new file mode 100644
index 00000000..997cf307
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode4f.c"
+
+#include "constants/4dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock4dInt32.c b/tests/src/encode/testZfpEncodeBlock4dInt32.c
new file mode 100644
index 00000000..4cad3599
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode4i.c"
+
+#include "constants/4dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlock4dInt64.c b/tests/src/encode/testZfpEncodeBlock4dInt64.c
new file mode 100644
index 00000000..01002391
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlock4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode4l.c"
+
+#include "constants/4dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/block.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided1dDouble.c b/tests/src/encode/testZfpEncodeBlockStrided1dDouble.c
new file mode 100644
index 00000000..59e52e80
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode1d.c"
+
+#include "constants/1dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided1dFloat.c b/tests/src/encode/testZfpEncodeBlockStrided1dFloat.c
new file mode 100644
index 00000000..9e2cc08b
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode1f.c"
+
+#include "constants/1dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided1dInt32.c b/tests/src/encode/testZfpEncodeBlockStrided1dInt32.c
new file mode 100644
index 00000000..0f8ba3fa
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode1i.c"
+
+#include "constants/1dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided1dInt64.c b/tests/src/encode/testZfpEncodeBlockStrided1dInt64.c
new file mode 100644
index 00000000..1b3d7ca5
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode1l.c"
+
+#include "constants/1dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided2dDouble.c b/tests/src/encode/testZfpEncodeBlockStrided2dDouble.c
new file mode 100644
index 00000000..7cbc3036
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode2d.c"
+
+#include "constants/2dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided2dFloat.c b/tests/src/encode/testZfpEncodeBlockStrided2dFloat.c
new file mode 100644
index 00000000..4421f625
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode2f.c"
+
+#include "constants/2dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided2dInt32.c b/tests/src/encode/testZfpEncodeBlockStrided2dInt32.c
new file mode 100644
index 00000000..ee898c82
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode2i.c"
+
+#include "constants/2dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided2dInt64.c b/tests/src/encode/testZfpEncodeBlockStrided2dInt64.c
new file mode 100644
index 00000000..82617f02
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode2l.c"
+
+#include "constants/2dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided3dDouble.c b/tests/src/encode/testZfpEncodeBlockStrided3dDouble.c
new file mode 100644
index 00000000..98eb0563
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode3d.c"
+
+#include "constants/3dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided3dFloat.c b/tests/src/encode/testZfpEncodeBlockStrided3dFloat.c
new file mode 100644
index 00000000..6bb49a7e
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode3f.c"
+
+#include "constants/3dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided3dInt32.c b/tests/src/encode/testZfpEncodeBlockStrided3dInt32.c
new file mode 100644
index 00000000..54d0741d
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode3i.c"
+
+#include "constants/3dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided3dInt64.c b/tests/src/encode/testZfpEncodeBlockStrided3dInt64.c
new file mode 100644
index 00000000..7e3bfa3e
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode3l.c"
+
+#include "constants/3dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided4dDouble.c b/tests/src/encode/testZfpEncodeBlockStrided4dDouble.c
new file mode 100644
index 00000000..71d578c2
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode4d.c"
+
+#include "constants/4dDouble.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided4dFloat.c b/tests/src/encode/testZfpEncodeBlockStrided4dFloat.c
new file mode 100644
index 00000000..a002e0cb
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode4f.c"
+
+#include "constants/4dFloat.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided4dInt32.c b/tests/src/encode/testZfpEncodeBlockStrided4dInt32.c
new file mode 100644
index 00000000..c48410bd
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode4i.c"
+
+#include "constants/4dInt32.h"
+#include "utils/rand32.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testZfpEncodeBlockStrided4dInt64.c b/tests/src/encode/testZfpEncodeBlockStrided4dInt64.c
new file mode 100644
index 00000000..e9f8c622
--- /dev/null
+++ b/tests/src/encode/testZfpEncodeBlockStrided4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode4l.c"
+
+#include "constants/4dInt64.h"
+#include "utils/rand64.h"
+#include "zfpEncodeBlockStridedBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/blockStrided.c"
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/encode/testcases/block.c b/tests/src/encode/testcases/block.c
new file mode 100644
index 00000000..d576e318
--- /dev/null
+++ b/tests/src/encode/testcases/block.c
@@ -0,0 +1,11 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+_cmocka_unit_test_setup_teardown(when_seededRandomDataGenerated_expect_ChecksumMatches, setup, teardown),
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlock_expect_ReturnValReflectsNumBitsWrittenToBitstream), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlock_expect_BitstreamChecksumMatches), setup, teardown),
+
+#ifdef FL_PT_DATA
+// reversible compression of blocks containing special floating-point values
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodeSpecialBlocks_expect_BitstreamChecksumMatches), setupSpecial, teardown),
+#endif
diff --git a/tests/src/encode/testcases/blockStrided.c b/tests/src/encode/testcases/blockStrided.c
new file mode 100644
index 00000000..b3d82a66
--- /dev/null
+++ b/tests/src/encode/testcases/blockStrided.c
@@ -0,0 +1,18 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+// omit redundant checksums covered in non-strided block tests
+#ifndef PRINT_CHECKSUMS
+
+_cmocka_unit_test_setup_teardown(when_seededRandomDataGenerated_expect_ChecksumMatches, setup, teardown),
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlockStrided_expect_ReturnValReflectsNumBitsWrittenToBitstream), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlockStrided_expect_OnlyStridedEntriesUsed), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlockStrided_expect_BitstreamChecksumMatches), setup, teardown),
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_ReturnValReflectsNumBitsWrittenToBitstream), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_OnlyStridedEntriesUsed), setup, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_OnlyEntriesWithinPartialBlockBoundsUsed), setup, teardown),
+
+#endif
+
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_BitstreamChecksumMatches), setup, teardown),
diff --git a/tests/src/encode/zfpEncodeBlockBase.c b/tests/src/encode/zfpEncodeBlockBase.c
new file mode 100644
index 00000000..23917c62
--- /dev/null
+++ b/tests/src/encode/zfpEncodeBlockBase.c
@@ -0,0 +1,266 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils/testMacros.h"
+#include "utils/zfpChecksums.h"
+#include "utils/zfpHash.h"
+
+struct setupVars {
+  size_t dimLens[4];
+  Scalar* dataArr;
+  void* buffer;
+  size_t bufsizeBytes;
+  zfp_stream* stream;
+};
+
+static void
+populateInitialArray(Scalar** dataArrPtr)
+{
+  size_t i;
+  *dataArrPtr = malloc(sizeof(Scalar) * BLOCK_SIZE);
+  assert_non_null(*dataArrPtr);
+
+  for (i = 0; i < BLOCK_SIZE; i++) {
+#ifdef FL_PT_DATA
+    (*dataArrPtr)[i] = nextSignedRandFlPt();
+#else
+    (*dataArrPtr)[i] = nextSignedRandInt();
+#endif
+  }
+}
+
+static void
+populateInitialArraySpecial(Scalar* dataArr, int index)
+{
+#ifdef FL_PT_DATA
+  // IEEE-754 special values
+  static const uint32 special_float_values[] = {
+    0x00000000u, // +0
+    0x80000000u, // -0
+    0x00000001u, // +FLT_TRUE_MIN
+    0x80000001u, // -FLT_TRUE_MIN
+    0x7f7fffffu, // +FLT_MAX
+    0xff7fffffu, // -FLT_MAX
+    0x7f800000u, // +infinity
+    0xff800000u, // -infinity
+    0x7fc00000u, // qNaN
+    0x7fa00000u, // sNaN
+  };
+  static const uint64 special_double_values[] = {
+    UINT64C(0x0000000000000000), // +0
+    UINT64C(0x8000000000000000), // -0
+    UINT64C(0x0000000000000001), // +DBL_TRUE_MIN
+    UINT64C(0x8000000000000001), // -DBL_TRUE_MIN
+    UINT64C(0x7fefffffffffffff), // +DBL_MAX
+    UINT64C(0xffefffffffffffff), // -DBL_MAX
+    UINT64C(0x7ff0000000000000), // +infinity
+    UINT64C(0xfff0000000000000), // -infinity
+    UINT64C(0x7ff8000000000000), // qNaN
+    UINT64C(0x7ff4000000000000), // sNaN
+  };
+#endif
+  size_t i;
+
+  for (i = 0; i < BLOCK_SIZE; i++) {
+#ifdef FL_PT_DATA
+    // generate special values
+    if ((i & 3u) == 0) {
+      switch(ZFP_TYPE) {
+        case zfp_type_float:
+          memcpy(dataArr + i, &special_float_values[index], sizeof(Scalar));
+          break;
+        case zfp_type_double:
+          memcpy(dataArr + i, &special_double_values[index], sizeof(Scalar));
+          break;
+      }
+    }
+    else
+      dataArr[i] = 0;
+#else
+    dataArr[i] = nextSignedRandInt();
+#endif
+  }
+}
+
+// specialValueIndex -1 implies test without special values
+static void
+setupZfpStream(struct setupVars* bundle, int specialValueIndex)
+{
+  memset(bundle->dimLens, 0, sizeof(bundle->dimLens));
+#if DIMS >= 1
+  bundle->dimLens[0] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 2
+  bundle->dimLens[1] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 3
+  bundle->dimLens[2] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 4
+  bundle->dimLens[3] = BLOCK_SIDE_LEN;
+#endif
+  size_t* n = bundle->dimLens;
+
+  zfp_type type = ZFP_TYPE;
+  zfp_field* field;
+  switch (DIMS) {
+    case 1:
+      field = zfp_field_1d(bundle->dataArr, type, n[0]);
+      break;
+    case 2:
+      field = zfp_field_2d(bundle->dataArr, type, n[0], n[1]);
+      break;
+    case 3:
+      field = zfp_field_3d(bundle->dataArr, type, n[0], n[1], n[2]);
+      break;
+    case 4:
+      field = zfp_field_4d(bundle->dataArr, type, n[0], n[1], n[2], n[3]);
+      break;
+  }
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  if (specialValueIndex >= 0) {
+    zfp_stream_set_reversible(stream);
+  } else {
+    zfp_stream_set_rate(stream, ZFP_RATE_PARAM_BITS, type, DIMS, zfp_false);
+  }
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  char* buffer = calloc(bufsizeBytes, sizeof(char));
+  assert_non_null(buffer);
+
+  bitstream* s = stream_open(buffer, bufsizeBytes);
+  assert_non_null(s);
+
+  zfp_stream_set_bit_stream(stream, s);
+  zfp_stream_rewind(stream);
+  zfp_field_free(field);
+
+  bundle->bufsizeBytes = bufsizeBytes;
+  bundle->buffer = buffer;
+  bundle->stream = stream;
+}
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  resetRandGen();
+  populateInitialArray(&bundle->dataArr);
+  setupZfpStream(bundle, -1);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+setupSpecial(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  bundle->dataArr = malloc(sizeof(Scalar) * BLOCK_SIZE);
+  assert_non_null(bundle->dataArr);
+
+  setupZfpStream(bundle, 0);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  stream_close(bundle->stream->stream);
+  zfp_stream_close(bundle->stream);
+  free(bundle->buffer);
+  free(bundle->dataArr);
+  free(bundle);
+
+  return 0;
+}
+
+static void
+when_seededRandomDataGenerated_expect_ChecksumMatches(void **state)
+{
+  struct setupVars *bundle = *state;
+  UInt checksum = _catFunc2(hashArray, SCALAR_BITS)((const UInt*)bundle->dataArr, BLOCK_SIZE, 1);
+  uint64 key1, key2;
+  computeKeyOriginalInput(BLOCK_FULL_TEST, bundle->dimLens, &key1, &key2);
+  // random data checksum only run for full block, and for non-special values
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlock_expect_ReturnValReflectsNumBitsWrittenToBitstream)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  size_t returnValBits = _t2(zfp_encode_block, Scalar, DIMS)(stream, bundle->dataArr);
+  // do not flush, otherwise extra zeros included in count
+
+  assert_int_equal(returnValBits, stream_wtell(s));
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlock_expect_BitstreamChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  _t2(zfp_encode_block, Scalar, DIMS)(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+
+  uint64 checksum = hashBitstream(stream_data(s), stream_size(s));
+  uint64 key1, key2;
+  computeKey(BLOCK_FULL_TEST, COMPRESSED_BITSTREAM, bundle->dimLens, zfp_mode_fixed_rate, 0, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodeSpecialBlocks_expect_BitstreamChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  int failures = 0;
+  int i;
+  for (i = 0; i < 10; i++) {
+    populateInitialArraySpecial(bundle->dataArr, i);
+
+    _t2(zfp_encode_block, Scalar, DIMS)(stream, bundle->dataArr);
+    zfp_stream_flush(stream);
+
+    uint64 checksum = hashBitstream(stream_data(s), stream_size(s));
+    uint64 key1, key2;
+    computeKey(BLOCK_FULL_TEST, COMPRESSED_BITSTREAM, bundle->dimLens, zfp_mode_reversible, i + 1, &key1, &key2);
+    if (COMPARE_NEQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2)) {
+      printf("Special Block testcase %d failed\n", i);
+      failures++;
+    }
+
+    // reset/zero bitstream, rewind for next iteration
+    memset(bundle->buffer, 0, bundle->bufsizeBytes);
+    zfp_stream_rewind(stream);
+  }
+
+  if (failures > 0) {
+    fail_msg("At least 1 special block testcase failed\n");
+  }
+}
diff --git a/tests/src/encode/zfpEncodeBlockStridedBase.c b/tests/src/encode/zfpEncodeBlockStridedBase.c
new file mode 100644
index 00000000..10294ac2
--- /dev/null
+++ b/tests/src/encode/zfpEncodeBlockStridedBase.c
@@ -0,0 +1,490 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "utils/testMacros.h"
+#include "utils/zfpChecksums.h"
+#include "utils/zfpHash.h"
+
+#define SX 2
+#define SY (3 * BLOCK_SIDE_LEN*SX)
+#define SZ (2 * BLOCK_SIDE_LEN*SY)
+#define SW (3 * BLOCK_SIDE_LEN*SZ)
+#define PX 1
+#define PY 2
+#define PZ 3
+#define PW 4
+
+#define DUMMY_VAL 99
+
+struct setupVars {
+  size_t dimLens[4];
+  Scalar* dataArr;
+  void* buffer;
+  zfp_stream* stream;
+};
+
+// write random output to strided entries, dummyVal elsewhere
+void
+initializeStridedArray(Scalar** dataArrPtr, Scalar dummyVal)
+{
+  size_t i, j, k, l, countX, countY, countZ, countW;
+  // absolute entry (i,j,k,l)
+  //   0 <= i < countX, (same for j,countY and k,countZ and l,countW)
+  // strided entry iff
+  //   i % countX/BLOCK_SIDE_LEN == 0 (and so on for j,k,l)
+  switch(DIMS) {
+    case 1:
+      countX = BLOCK_SIDE_LEN * SX;
+      *dataArrPtr = malloc(sizeof(Scalar) * countX);
+      assert_non_null(*dataArrPtr);
+
+      for (i = 0; i < countX; i++) {
+        if (i % SX) {
+          (*dataArrPtr)[i] = dummyVal;
+        } else {
+#ifdef FL_PT_DATA
+	  (*dataArrPtr)[i] = nextSignedRandFlPt();
+#else
+	  (*dataArrPtr)[i] = nextSignedRandInt();
+#endif
+        }
+      }
+
+      break;
+
+    case 2:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      *dataArrPtr = malloc(sizeof(Scalar) * countX * countY);
+      assert_non_null(*dataArrPtr);
+
+      for (j = 0; j < countY; j++) {
+        for (i = 0; i < countX; i++) {
+          size_t index = countX*j + i;
+          if (i % (countX/BLOCK_SIDE_LEN)
+              || j % (countY/BLOCK_SIDE_LEN)) {
+            (*dataArrPtr)[index] = dummyVal;
+          } else {
+#ifdef FL_PT_DATA
+	    (*dataArrPtr)[index] = nextSignedRandFlPt();
+#else
+	    (*dataArrPtr)[index] = nextSignedRandInt();
+#endif
+          }
+        }
+      }
+
+      break;
+
+    case 3:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+      *dataArrPtr = malloc(sizeof(Scalar) * countX * countY * countZ);
+      assert_non_null(*dataArrPtr);
+
+      for (k = 0; k < countZ; k++) {
+        for (j = 0; j < countY; j++) {
+          for (i = 0; i < countX; i++) {
+            size_t index = countX*countY*k + countX*j + i;
+            if (i % (countX/BLOCK_SIDE_LEN)
+                || j % (countY/BLOCK_SIDE_LEN)
+                || k % (countZ/BLOCK_SIDE_LEN)) {
+              (*dataArrPtr)[index] = dummyVal;
+            } else {
+#ifdef FL_PT_DATA
+              (*dataArrPtr)[index] = nextSignedRandFlPt();
+#else
+              (*dataArrPtr)[index] = nextSignedRandInt();
+#endif
+            }
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      countX = BLOCK_SIDE_LEN * SX;
+      countY = SY / SX;
+      countZ = SZ / SY;
+      countW = SW / SZ;
+      *dataArrPtr = malloc(sizeof(Scalar) * countX * countY * countZ * countW);
+      assert_non_null(*dataArrPtr);
+
+      for (l = 0; l < countW; l++) {
+        for (k = 0; k < countZ; k++) {
+          for (j = 0; j < countY; j++) {
+            for (i = 0; i < countX; i++) {
+              size_t index = countX*countY*countZ*l + countX*countY*k + countX*j + i;
+              if (i % (countX/BLOCK_SIDE_LEN)
+                  || j % (countY/BLOCK_SIDE_LEN)
+                  || k % (countZ/BLOCK_SIDE_LEN)
+                  || l % (countW/BLOCK_SIDE_LEN)) {
+                (*dataArrPtr)[index] = dummyVal;
+              } else {
+#ifdef FL_PT_DATA
+                (*dataArrPtr)[index] = nextSignedRandFlPt();
+#else
+                (*dataArrPtr)[index] = nextSignedRandInt();
+#endif
+              }
+            }
+          }
+        }
+      }
+
+      break;
+  }
+
+}
+
+static void
+setupZfpStream(struct setupVars* bundle)
+{
+  memset(bundle->dimLens, 0, sizeof(bundle->dimLens));
+#if DIMS >= 1
+  bundle->dimLens[0] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 2
+  bundle->dimLens[1] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 3
+  bundle->dimLens[2] = BLOCK_SIDE_LEN;
+#endif
+#if DIMS >= 4
+  bundle->dimLens[3] = BLOCK_SIDE_LEN;
+#endif
+  size_t* n = bundle->dimLens;
+
+  zfp_type type = ZFP_TYPE;
+  zfp_field* field;
+  switch(DIMS) {
+    case 1:
+      field = zfp_field_1d(bundle->dataArr, type, n[0]);
+      zfp_field_set_stride_1d(field, SX);
+      break;
+    case 2:
+      field = zfp_field_2d(bundle->dataArr, type, n[0], n[1]);
+      zfp_field_set_stride_2d(field, SX, SY);
+      break;
+    case 3:
+      field = zfp_field_3d(bundle->dataArr, type, n[0], n[1], n[2]);
+      zfp_field_set_stride_3d(field, SX, SY, SZ);
+      break;
+    case 4:
+      field = zfp_field_4d(bundle->dataArr, type, n[0], n[1], n[2], n[3]);
+      zfp_field_set_stride_4d(field, SX, SY, SZ, SW);
+      break;
+  }
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  zfp_stream_set_rate(stream, ZFP_RATE_PARAM_BITS, type, DIMS, zfp_false);
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  char* buffer = calloc(bufsizeBytes, sizeof(char));
+  assert_non_null(buffer);
+
+  bitstream* s = stream_open(buffer, bufsizeBytes);
+  assert_non_null(s);
+
+  zfp_stream_set_bit_stream(stream, s);
+  zfp_stream_rewind(stream);
+  zfp_field_free(field);
+
+  bundle->buffer = buffer;
+  bundle->stream = stream;
+}
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  resetRandGen();
+  initializeStridedArray(&bundle->dataArr, DUMMY_VAL);
+  setupZfpStream(bundle);
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  stream_close(bundle->stream->stream);
+  zfp_stream_close(bundle->stream);
+  free(bundle->buffer);
+  free(bundle->dataArr);
+  free(bundle);
+
+  return 0;
+}
+
+size_t
+encodeBlockStrided(zfp_stream* stream, Scalar* dataArr)
+{
+  size_t numBitsWritten;
+  switch (DIMS) {
+    case 1:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 1)(stream, dataArr, SX);
+      break;
+    case 2:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 2)(stream, dataArr, SX, SY);
+      break;
+    case 3:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 3)(stream, dataArr, SX, SY, SZ);
+      break;
+    case 4:
+      numBitsWritten = _t2(zfp_encode_block_strided, Scalar, 4)(stream, dataArr, SX, SY, SZ, SW);
+      break;
+  }
+
+  return numBitsWritten;
+}
+
+size_t
+encodePartialBlockStrided(zfp_stream* stream, Scalar* dataArr)
+{
+  size_t numBitsWritten;
+  switch (DIMS) {
+    case 1:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 1)(stream, dataArr, PX, SX);
+      break;
+    case 2:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 2)(stream, dataArr, PX, PY, SX, SY);
+      break;
+    case 3:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 3)(stream, dataArr, PX, PY, PZ, SX, SY, SZ);
+      break;
+    case 4:
+      numBitsWritten = _t2(zfp_encode_partial_block_strided, Scalar, 4)(stream, dataArr, PX, PY, PZ, PW, SX, SY, SZ, SW);
+      break;
+  }
+
+  return numBitsWritten;
+}
+
+static void
+when_seededRandomDataGenerated_expect_ChecksumMatches(void **state)
+{
+  struct setupVars *bundle = *state;
+  ptrdiff_t s[4] = {SX, SY, SZ, SW};
+  size_t n[4];
+  int i;
+
+  for (i = 0; i < 4; i++) {
+    n[i] = (i < DIMS) ? BLOCK_SIDE_LEN : 0;
+  }
+
+  UInt checksum = _catFunc2(hashStridedArray, SCALAR_BITS)((const UInt*)bundle->dataArr, n, s);
+  uint64 key1, key2;
+  computeKeyOriginalInput(BLOCK_FULL_TEST, bundle->dimLens, &key1, &key2);
+  // entire block is populated, but later tests restrict to reading partial block
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlockStrided_expect_ReturnValReflectsNumBitsWrittenToBitstream)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  size_t returnValBits = encodeBlockStrided(stream, bundle->dataArr);
+  // do not flush, otherwise extra zeros included in count
+
+  assert_int_equal(returnValBits, stream_wtell(s));
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlockStrided_expect_OnlyStridedEntriesUsed)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  // encode original block
+  encodeBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  uint64 originalChecksum = hashBitstream(stream_data(s), stream_size(s));
+
+  // zero bitstream's memory
+  size_t writtenBits = stream_wtell(s);
+  stream_rewind(s);
+  stream_pad(s, (uint)writtenBits);
+  stream_rewind(s);
+
+  // tweak non-strided (unused) entries
+  resetRandGen();
+  free(bundle->dataArr);
+  initializeStridedArray(&bundle->dataArr, DUMMY_VAL + 1);
+
+  // encode new block
+  encodeBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  uint64 newChecksum = hashBitstream(stream_data(s), stream_size(s));
+
+  // do not use ASSERT_CHECKSUM macro because both always computed locally
+  assert_int_equal(newChecksum, originalChecksum);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodeBlockStrided_expect_BitstreamChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  encodeBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+
+  uint64 checksum = hashBitstream(stream_data(s), stream_size(s));
+  uint64 key1, key2;
+  computeKey(BLOCK_FULL_TEST, COMPRESSED_BITSTREAM, bundle->dimLens, zfp_mode_fixed_rate, 0, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_ReturnValReflectsNumBitsWrittenToBitstream)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  size_t returnValBits = encodePartialBlockStrided(stream, bundle->dataArr);
+  // do not flush, otherwise extra zeros included in count
+
+  assert_int_equal(returnValBits, stream_wtell(s));
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_OnlyStridedEntriesUsed)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  // encode original block
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  uint64 originalChecksum = hashBitstream(stream_data(s), stream_size(s));
+
+  // zero bitstream's memory
+  size_t writtenBits = stream_wtell(s);
+  stream_rewind(s);
+  stream_pad(s, (uint)writtenBits);
+  stream_rewind(s);
+
+  // tweak non-strided (unused) entries
+  resetRandGen();
+  free(bundle->dataArr);
+  initializeStridedArray(&bundle->dataArr, DUMMY_VAL + 1);
+
+  // encode new block
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  uint64 newChecksum = hashBitstream(stream_data(s), stream_size(s));
+
+  // do not use ASSERT_CHECKSUM macro because both always computed locally
+  assert_int_equal(newChecksum, originalChecksum);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_OnlyEntriesWithinPartialBlockBoundsUsed)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  // encode original block
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  uint64 originalChecksum = hashBitstream(stream_data(s), stream_size(s));
+
+  // zero bitstream's memory
+  size_t writtenBits = stream_wtell(s);
+  stream_rewind(s);
+  stream_pad(s, (uint)writtenBits);
+  stream_rewind(s);
+
+  // tweak block entries outside partial block subset
+  // block entry (i, j, k, l)
+  size_t i, j, k, l;
+  switch(DIMS) {
+    case 1:
+      for (i = PX; i < BLOCK_SIDE_LEN; i++) {
+        bundle->dataArr[SX*i] = DUMMY_VAL;
+      }
+      break;
+
+    case 2:
+      for (j = 0; j < BLOCK_SIDE_LEN; j++) {
+        for (i = 0; i < BLOCK_SIDE_LEN; i++) {
+          if (i >= PX || j >= PY) {
+            bundle->dataArr[SY*j + SX*i] = DUMMY_VAL;
+          }
+        }
+      }
+      break;
+
+    case 3:
+      for (k = 0; k < BLOCK_SIDE_LEN; k++) {
+        for (j = 0; j < BLOCK_SIDE_LEN; j++) {
+          for (i = 0; i < BLOCK_SIDE_LEN; i++) {
+            if (i >= PX || j >= PY || k >= PZ) {
+              bundle->dataArr[SZ*k + SY*j + SX*i] = DUMMY_VAL;
+            }
+          }
+        }
+      }
+      break;
+
+    case 4:
+      for (l = 0; l < BLOCK_SIDE_LEN; l++) {
+        for (k = 0; k < BLOCK_SIDE_LEN; k++) {
+          for (j = 0; j < BLOCK_SIDE_LEN; j++) {
+            for (i = 0; i < BLOCK_SIDE_LEN; i++) {
+              if (i >= PX || j >= PY || k >= PZ) {
+                bundle->dataArr[SW*l + SZ*k + SY*j + SX*i] = DUMMY_VAL;
+              }
+            }
+          }
+        }
+      }
+      break;
+  }
+
+  // encode new block
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+  uint64 newChecksum = hashBitstream(stream_data(s), stream_size(s));
+
+  // do not use ASSERT_CHECKSUM macro because both always computed locally
+  assert_int_equal(newChecksum, originalChecksum);
+}
+
+static void
+_catFunc3(given_, DIM_INT_STR, Block_when_EncodePartialBlockStrided_expect_BitstreamChecksumMatches)(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  encodePartialBlockStrided(stream, bundle->dataArr);
+  zfp_stream_flush(stream);
+
+  uint64 checksum = hashBitstream(stream_data(s), stream_size(s));
+  uint64 key1, key2;
+  computeKey(BLOCK_PARTIAL_TEST, COMPRESSED_BITSTREAM, bundle->dimLens, zfp_mode_fixed_rate, 0, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
diff --git a/tests/src/endtoend/CMakeLists.txt b/tests/src/endtoend/CMakeLists.txt
new file mode 100644
index 00000000..5c9aca20
--- /dev/null
+++ b/tests/src/endtoend/CMakeLists.txt
@@ -0,0 +1,70 @@
+function(zfp_add_test dims type bits)
+  if(NOT DEFINED ZFP_OMP_TESTS_ONLY)
+    set(serial_test_name testZfpSerial${dims}d${type})
+    add_executable(${serial_test_name} ${serial_test_name}.c)
+    target_link_libraries(${serial_test_name}
+      cmocka zfp zfpHashLib genSmoothRandNumsLib stridedOperationsLib
+      zfpChecksumsLib zfpCompressionParamsLib zfpTimerLib)
+    if(HAVE_LIBM_MATH)
+      target_link_libraries(${serial_test_name} m)
+    endif()
+
+    target_compile_definitions(${serial_test_name} PRIVATE ${zfp_private_defs})
+    if(PRINT_CHECKSUMS)
+      target_compile_definitions(${serial_test_name} PUBLIC PRINT_CHECKSUMS)
+    endif()
+
+    add_test(NAME ${serial_test_name} COMMAND ${serial_test_name})
+  endif()
+
+  if(ZFP_WITH_OPENMP)
+    set(omp_test_name testZfpOmp${dims}d${type})
+    add_executable(${omp_test_name} ${omp_test_name}.c)
+    target_compile_definitions(${omp_test_name} PRIVATE ${zfp_private_defs})
+    target_link_libraries(${omp_test_name}
+      cmocka zfp zfpHashLib genSmoothRandNumsLib stridedOperationsLib
+      zfpChecksumsLib zfpTimerLib zfpCompressionParamsLib
+      OpenMP::OpenMP_C)
+    if(HAVE_LIBM_MATH)
+      target_link_libraries(${omp_test_name} m)
+    endif()
+    add_test(NAME ${omp_test_name} COMMAND ${omp_test_name})
+    set_property(TEST ${omp_test_name} PROPERTY RUN_SERIAL TRUE)
+  endif()
+
+  if(NOT DEFINED ZFP_OMP_TESTS_ONLY)
+    if(ZFP_WITH_CUDA)
+      add_definitions(-DZFP_WITH_CUDA)
+
+      set(cuda_test_name testZfpCuda${dims}d${type})
+      add_executable(${cuda_test_name} ${cuda_test_name}.c)
+      target_compile_definitions(${cuda_test_name} PRIVATE ${zfp_private_defs})
+      target_link_libraries(${cuda_test_name}
+        cmocka zfp zfpHashLib genSmoothRandNumsLib stridedOperationsLib
+        zfpChecksumsLib zfpTimerLib zfpCompressionParamsLib)
+      if(HAVE_LIBM_MATH)
+        target_link_libraries(${cuda_test_name} m)
+      endif()
+      add_test(NAME ${cuda_test_name} COMMAND ${cuda_test_name})
+      set_property(TEST ${cuda_test_name} PROPERTY RUN_SERIAL TRUE)
+    endif()
+  endif()
+endfunction()
+
+zfp_add_test(1 Int32 32)
+zfp_add_test(1 Int64 64)
+zfp_add_test(2 Int32 32)
+zfp_add_test(2 Int64 64)
+zfp_add_test(3 Int32 32)
+zfp_add_test(3 Int64 64)
+zfp_add_test(4 Int32 32)
+zfp_add_test(4 Int64 64)
+
+zfp_add_test(1 Float 32)
+zfp_add_test(1 Double 64)
+zfp_add_test(2 Float 32)
+zfp_add_test(2 Double 64)
+zfp_add_test(3 Float 32)
+zfp_add_test(3 Double 64)
+zfp_add_test(4 Float 32)
+zfp_add_test(4 Double 64)
diff --git a/tests/src/endtoend/cudaExecBase.c b/tests/src/endtoend/cudaExecBase.c
new file mode 100644
index 00000000..6e675032
--- /dev/null
+++ b/tests/src/endtoend/cudaExecBase.c
@@ -0,0 +1,202 @@
+#ifdef ZFP_WITH_CUDA
+
+#include <math.h>
+
+#define PREPEND_CUDA(x) Cuda_ ## x
+#define DESCRIPTOR_INTERMEDIATE(x) PREPEND_CUDA(x)
+#define DESCRIPTOR DESCRIPTOR_INTERMEDIATE(DIM_INT_STR)
+
+#define ZFP_TEST_CUDA
+#include "zfpEndtoendBase.c"
+
+// cuda entry functions
+static void
+_catFunc3(given_, DESCRIPTOR, ReversedArray_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != REVERSED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressDecompressTests(state, zfp_mode_fixed_rate, 1);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, PermutedArray_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != PERMUTED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressDecompressTests(state, zfp_mode_fixed_rate, 1);
+}
+
+// returns 0 on success, 1 on test failure
+static int
+runZfpCompressDecompressIsNoop(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+  zfp_stream* stream = bundle->stream;
+  bitstream* s = zfp_stream_bit_stream(stream);
+
+  // grab bitstream member vars
+  bitstream_count bits = s->bits;
+  bitstream_word buffer = s->buffer;
+  bitstream_word* ptr = s->ptr;
+  size_t streamSize = stream_size(s);
+
+  // perform compression, expect bitstream not to advance
+  if (zfp_compress(stream, field) != streamSize) {
+    printf("Compression advanced the bitstream when expected to be a no-op\n");
+    return 1;
+  }
+
+  // expect bitstream untouched
+  if ((s->bits != bits) ||
+      (s->buffer != buffer) ||
+      (s->ptr != ptr) ||
+      (*s->ptr != *ptr)) {
+    printf("Compression modified the bitstream when expected to be a no-op\n");
+    return 1;
+  }
+
+  // perform decompression, expect bitstream not to advance
+  if (zfp_decompress(stream, field) != streamSize) {
+    printf("Decompression advanced the bitstream when expected to be a no-op\n");
+    return 1;
+  }
+
+  // expect bitstream untouched
+  if ((s->bits != bits) ||
+      (s->buffer != buffer) ||
+      (s->ptr != ptr) ||
+      (*s->ptr != *ptr)) {
+    printf("Decompression modified the bitstream when expected to be a no-op\n");
+    return 1;
+  }
+
+  return 0;
+}
+
+static void
+runCompressDecompressNoopTest(void **state, zfp_mode mode)
+{
+  struct setupVars *bundle = *state;
+  if (setupCompressParam(bundle, mode, 1) == 1) {
+    fail_msg("ERROR while setting zfp mode");
+  }
+
+  if (runZfpCompressDecompressIsNoop(state) == 1) {
+    fail_msg("Compression/Decompression no-op test failed");
+  }
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, InterleavedArray_when_ZfpCompressDecompressFixedRate_expect_BitstreamUntouchedAndReturnsZero)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != INTERLEAVED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressDecompressNoopTest(state, zfp_mode_fixed_rate);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  runCompressDecompressTests(state, zfp_mode_fixed_rate, 3);
+}
+
+// cover all non=fixed-rate modes (except expert)
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressDecompressNonFixedRate_expect_BitstreamUntouchedAndReturnsZero)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  // loop over fixed prec, fixed acc, reversible
+  zfp_mode mode;
+  int failures = 0;
+  for (mode = zfp_mode_fixed_precision; mode <= zfp_mode_reversible; mode++) {
+    zfp_type type = zfp_field_type(bundle->field);
+    if ((mode == zfp_mode_fixed_accuracy) && (type == zfp_type_int32 || type == zfp_type_int64)) {
+      // skip fixed accuracy when unsupported
+      continue;
+    }
+
+    if (setupCompressParam(bundle, mode, 1) == 1) {
+      failures++;
+      continue;
+    }
+
+    if (runZfpCompressDecompressIsNoop(state) == 1) {
+      failures++;
+    }
+  }
+
+  if (failures > 0) {
+    fail_msg("Compression/Decompression no-op test failed\n");
+  }
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, InterleavedArray_when_ZfpCompressFixedRate_expect_BitstreamUntouchedAndReturnsZero)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != INTERLEAVED) {
+    fail_msg("Invalid stride during test");
+  } else if (zfp_stream_compression_mode(bundle->stream) != zfp_mode_fixed_rate) {
+    fail_msg("Invalid zfp mode during test");
+  }
+
+  runCompressDecompressNoopTest(state, zfp_mode_fixed_rate);
+}
+
+#if DIMS == 4
+static void
+_catFunc3(given_Cuda_, DIM_INT_STR, Array_when_ZfpCompressDecompress_expect_BitstreamUntouchedAndReturnsZero)(void **state)
+{
+  runCompressDecompressNoopTest(state, zfp_mode_fixed_rate);
+}
+#endif
+
+/* setup functions */
+
+static int
+setupCudaConfig(void **state, stride_config stride)
+{
+  int result = initZfpStreamAndField(state, stride);
+
+  struct setupVars *bundle = *state;
+  assert_int_equal(zfp_stream_set_execution(bundle->stream, zfp_exec_cuda), 1);
+
+  return result;
+}
+
+static int
+setupPermuted(void **state)
+{
+  return setupCudaConfig(state, PERMUTED);
+}
+
+static int
+setupInterleaved(void **state)
+{
+  return setupCudaConfig(state, INTERLEAVED);
+}
+
+static int
+setupReversed(void **state)
+{
+  return setupCudaConfig(state, REVERSED);
+}
+
+static int
+setupDefaultStride(void **state)
+{
+  return setupCudaConfig(state, AS_IS);
+}
+
+#endif
diff --git a/tests/src/endtoend/ompExecBase.c b/tests/src/endtoend/ompExecBase.c
new file mode 100644
index 00000000..98628663
--- /dev/null
+++ b/tests/src/endtoend/ompExecBase.c
@@ -0,0 +1,235 @@
+#ifdef _OPENMP
+
+#include <math.h>
+#include <omp.h>
+
+#define PREPEND_OPENMP(x) OpenMP_ ## x
+#define DESCRIPTOR_INTERMEDIATE(x) PREPEND_OPENMP(x)
+#define DESCRIPTOR DESCRIPTOR_INTERMEDIATE(DIM_INT_STR)
+
+#define ZFP_TEST_OMP
+#include "zfpEndtoendBase.c"
+
+static size_t
+computeTotalBlocks(zfp_field* field)
+{
+  size_t bx = 1;
+  size_t by = 1;
+  size_t bz = 1;
+  size_t bw = 1;
+  switch (zfp_field_dimensionality(field)) {
+    case 4:
+      bw = (field->nw + 3) / 4;
+    case 3:
+      bz = (field->nz + 3) / 4;
+    case 2:
+      by = (field->ny + 3) / 4;
+    case 1:
+      bx = (field->nx + 3) / 4;
+      return bx * by * bz * bw;
+  }
+
+  return 0;
+}
+
+/* returns actual chunk size (in blocks), not the parameter stored (zero implies failure) */
+static uint
+setChunkSize(void **state, uint threadCount, int param)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  uint chunk_size = 0;
+  switch (param) {
+    case 2:
+      // largest chunk size: total num blocks
+      chunk_size = (uint)computeTotalBlocks(bundle->field);
+      break;
+
+    case 1:
+      // smallest chunk size: 1 block
+      chunk_size = 1u;
+      break;
+
+    case 0:
+      // default chunk size (0 implies 1 chunk per thread)
+      chunk_size = (uint)((computeTotalBlocks(bundle->field) + threadCount - 1) / threadCount);
+      break;
+
+    default:
+      printf("Unsupported chunkParam\n");
+      return 0;
+  }
+
+  if (chunk_size == 0) {
+    printf("Chunk size was computed to be 0 blocks\n");
+    return 0;
+  } else if (zfp_stream_set_omp_chunk_size(stream, chunk_size) == 0) {
+    printf("zfp_stream_set_omp_chunk_size(stream, %u) failed (returned 0)\n", chunk_size);
+    return 0;
+  }
+
+  return chunk_size;
+}
+
+static uint
+setThreadCount(struct setupVars *bundle, int param)
+{
+  zfp_stream* stream = bundle->stream;
+
+  uint threadParam = (uint)param;
+  uint actualThreadCount = threadParam ? threadParam : omp_get_max_threads();
+
+  if (zfp_stream_set_omp_threads(stream, threadParam) == 0) {
+    return 0;
+  } else {
+    return actualThreadCount;
+  }
+}
+
+// OpenMP endtoend entry functions
+// pass doDecompress=0 because decompression not yet supported
+// loop across 3 compression parameters
+
+// returns 0 on success, 1 on test failure
+static int
+runCompressAcrossThreadsChunks(void **state, zfp_mode mode)
+{
+  struct setupVars *bundle = *state;
+
+  int failures = 0;
+  int threadParam, chunkParam;
+  // run across 3 thread counts
+  for (threadParam = 0; threadParam < 3; threadParam++) {
+    uint threadCount = setThreadCount(bundle, threadParam);
+    if (threadCount == 0) {
+      printf("Threadcount was 0\n");
+      failures += 3;
+      continue;
+    } else {
+      printf("\t\tThread count: %u\n", threadCount);
+    }
+
+    for (chunkParam = 0; chunkParam < 3; chunkParam++) {
+      uint chunkSize = setChunkSize(state, threadCount, chunkParam);
+      if (chunkSize == 0) {
+        printf("ERROR: Computed chunk size was 0 blocks\n");
+        failures++;
+        continue;
+      } else {
+        printf("\t\t\tChunk size: %u blocks\n", chunkSize);
+      }
+
+      int numCompressParams = (mode == zfp_mode_reversible) ? 1 : 3;
+      failures += runCompressDecompressAcrossParamsGivenMode(state, 0, mode, numCompressParams);
+    }
+  }
+
+  if (failures > 0) {
+    fail_msg("Overall compress/decompress test failure\n");
+  }
+
+  return failures > 0;
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch)(void **state)
+{
+  runCompressAcrossThreadsChunks(state, zfp_mode_fixed_precision);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressFixedRate_expect_BitstreamChecksumsMatch)(void **state)
+{
+  runCompressAcrossThreadsChunks(state, zfp_mode_fixed_rate);
+}
+
+#ifdef FL_PT_DATA
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressFixedAccuracy_expect_BitstreamChecksumsMatch)(void **state)
+{
+  runCompressAcrossThreadsChunks(state, zfp_mode_fixed_accuracy);
+}
+#endif
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressReversible_expect_BitstreamChecksumsMatch)(void **state)
+{
+  runCompressAcrossThreadsChunks(state, zfp_mode_reversible);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, ReversedArray_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != REVERSED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressAcrossThreadsChunks(state, zfp_mode_fixed_precision);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, InterleavedArray_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != INTERLEAVED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressAcrossThreadsChunks(state, zfp_mode_fixed_precision);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, PermutedArray_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != PERMUTED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressAcrossThreadsChunks(state, zfp_mode_fixed_precision);
+}
+
+
+/* setup functions (pre-test) */
+
+static int
+setupOmpConfig(void **state, stride_config stride)
+{
+  int result = initZfpStreamAndField(state, stride);
+
+  struct setupVars *bundle = *state;
+  assert_int_equal(zfp_stream_set_execution(bundle->stream, zfp_exec_omp), 1);
+
+  return result;
+}
+
+/* entry functions */
+
+static int
+setupPermuted(void **state)
+{
+  return setupOmpConfig(state, PERMUTED);
+}
+
+static int
+setupInterleaved(void **state)
+{
+  return setupOmpConfig(state, INTERLEAVED);
+}
+
+static int
+setupReversed(void **state)
+{
+  return setupOmpConfig(state, REVERSED);
+}
+
+static int
+setupDefaultStride(void **state)
+{
+  return setupOmpConfig(state, AS_IS);
+}
+
+// end #ifdef _OPENMP
+#endif
diff --git a/tests/src/endtoend/serialExecBase.c b/tests/src/endtoend/serialExecBase.c
new file mode 100644
index 00000000..067957b1
--- /dev/null
+++ b/tests/src/endtoend/serialExecBase.c
@@ -0,0 +1,299 @@
+#define DESCRIPTOR DIM_INT_STR
+#define ZFP_TEST_SERIAL
+#include "zfpEndtoendBase.c"
+
+// entry functions
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  runCompressDecompressTests(state, zfp_mode_fixed_precision, 3);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  runCompressDecompressTests(state, zfp_mode_fixed_rate, 3);
+}
+
+#ifdef FL_PT_DATA
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressDecompressFixedAccuracy_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  runCompressDecompressTests(state, zfp_mode_fixed_accuracy, 3);
+}
+#endif
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressDecompressReversible_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (setupCompressParam(bundle, zfp_mode_reversible, 0) == 1) {
+    fail_msg("ERROR setting zfp mode");
+  }
+
+  runCompressDecompressReversible(bundle, 1);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, ReversedArray_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != REVERSED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressDecompressTests(state, zfp_mode_fixed_precision, 1);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, InterleavedArray_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != INTERLEAVED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressDecompressTests(state, zfp_mode_fixed_precision, 1);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, PermutedArray_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch)(void **state)
+{
+  struct setupVars *bundle = *state;
+  if (bundle->stride != PERMUTED) {
+    fail_msg("Invalid stride during test");
+  }
+
+  runCompressDecompressTests(state, zfp_mode_fixed_precision, 1);
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, ZfpStream_when_SetRateWithWriteRandomAccess_expect_RateRoundedUpProperly)(void **state)
+{
+  zfp_stream* zfp = zfp_stream_open(NULL);
+
+  // align currently requires blocks to start at the beginning of a word
+  // rate will be rounded up such that a block fills the rest of the word
+  // (would be wasted space otherwise, padded with zeros)
+  double rateWithoutAlign = zfp_stream_set_rate(zfp, ZFP_RATE_PARAM_BITS, ZFP_TYPE, DIMS, zfp_false);
+  double rateWithAlign = zfp_stream_set_rate(zfp, ZFP_RATE_PARAM_BITS, ZFP_TYPE, DIMS, zfp_true);
+  if (!(rateWithAlign >= rateWithoutAlign)) {
+    fail_msg("rateWithAlign (%lf) >= rateWithoutAlign (%lf) failed\n", rateWithAlign, rateWithoutAlign);
+  }
+
+  uint bitsPerBlock = (uint)floor(rateWithAlign * intPow(4, DIMS) + 0.5);
+  assert_int_equal(0, bitsPerBlock % stream_word_bits);
+
+  zfp_stream_close(zfp);
+}
+
+// returns 0 on success, 1 on test failure
+static int
+isCompressedBitrateComparableToChosenRate(struct setupVars* bundle)
+{
+  zfp_field* field = bundle->field;
+  zfp_stream* stream = bundle->stream;
+
+  // integer arithmetic allows exact comparison
+  size_t compressedBytes = zfp_compress(stream, field);
+  if (compressedBytes == 0) {
+    printf("Compression failed\n");
+    return 1;
+  }
+  size_t compressedBits = compressedBytes * 8;
+
+  // compute padded lengths (multiples of block-side-len, 4)
+  size_t paddedNx = (bundle->randomGenArrSideLen[0] + 3) & ~0x3;
+  size_t paddedNy = (bundle->randomGenArrSideLen[1] + 3) & ~0x3;
+  size_t paddedNz = (bundle->randomGenArrSideLen[2] + 3) & ~0x3;
+  size_t paddedNw = (bundle->randomGenArrSideLen[3] + 3) & ~0x3;
+
+  size_t paddedArrayLen = 1;
+  switch (DIMS) {
+    case 4:
+      paddedArrayLen *= paddedNw;
+    case 3:
+      paddedArrayLen *= paddedNz;
+    case 2:
+      paddedArrayLen *= paddedNy;
+    case 1:
+      paddedArrayLen *= paddedNx;
+  }
+
+  // expect bitrate to scale wrt padded array length
+  size_t expectedTotalBits = bundle->rateParam * paddedArrayLen;
+  // account for zfp_compress() ending with stream_flush()
+  expectedTotalBits = (expectedTotalBits + stream_word_bits - 1) & ~(stream_word_bits - 1);
+
+  if(compressedBits != expectedTotalBits) {
+    printf("compressedBits (%lu) == expectedTotalBits (%lu) failed, given fixed-rate %zu\n", (unsigned long)compressedBits, (unsigned long)expectedTotalBits, bundle->rateParam);
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressFixedRate_expect_CompressedBitrateComparableToChosenRate)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  int failures = 0;
+  int compressParam;
+  for (compressParam = 0; compressParam < 3; compressParam++) {
+    if (setupCompressParam(bundle, zfp_mode_fixed_rate, compressParam) == 1) {
+      failures++;
+      continue;
+    }
+
+    failures += isCompressedBitrateComparableToChosenRate(bundle);
+
+    zfp_stream_rewind(bundle->stream);
+    memset(bundle->buffer, 0, bundle->bufsizeBytes);
+  }
+
+  if (failures > 0) {
+    fail_msg("Compressed bitrate test failure\n");
+  }
+}
+
+#ifdef FL_PT_DATA
+// returns 0 on all tests pass, 1 on test failure
+static int
+isCompressedValuesWithinAccuracy(struct setupVars* bundle)
+{
+  zfp_field* field = bundle->field;
+  zfp_stream* stream = bundle->stream;
+
+  size_t compressedBytes = zfp_compress(stream, field);
+  if (compressedBytes == 0) {
+    printf("Compression failed\n");
+    return 1;
+  }
+
+  // zfp_decompress() will write to bundle->decompressedArr
+  // assert bitstream ends in same location
+  zfp_stream_rewind(stream);
+  size_t result = zfp_decompress(stream, bundle->decompressField);
+  if (result != compressedBytes) {
+    printf("Decompression advanced the bitstream to a different position than after compression: %zu != %zu\n", result, compressedBytes);
+    return 1;
+  }
+
+  ptrdiff_t strides[4];
+  zfp_field_stride(field, strides);
+
+  // apply strides
+  ptrdiff_t offset = 0;
+  size_t* n = bundle->randomGenArrSideLen;
+  float maxDiffF = 0;
+  double maxDiffD = 0;
+
+  size_t i, j, k, l;
+  for (l = (n[3] ? n[3] : 1); l--; offset += strides[3] - n[2]*strides[2]) {
+    for (k = (n[2] ? n[2] : 1); k--; offset += strides[2] - n[1]*strides[1]) {
+      for (j = (n[1] ? n[1] : 1); j--; offset += strides[1] - n[0]*strides[0]) {
+        for (i = (n[0] ? n[0] : 1); i--; offset += strides[0]) {
+          float absDiffF;
+          double absDiffD;
+
+          switch(ZFP_TYPE) {
+            case zfp_type_float:
+              absDiffF = fabsf((float)bundle->decompressedArr[offset] - (float)bundle->compressedArr[offset]);
+
+              if(absDiffF > bundle->accParam) {
+                printf("Compressed error %f was greater than supplied tolerance %lf\n", absDiffF, bundle->accParam);
+                return 1;
+              }
+
+              if (absDiffF > maxDiffF) {
+                maxDiffF = absDiffF;
+              }
+
+              break;
+
+            case zfp_type_double:
+              absDiffD = fabs(bundle->decompressedArr[offset] - bundle->compressedArr[offset]);
+
+              if(absDiffD > bundle->accParam) {
+                printf("Compressed error %lf was greater than supplied tolerance %lf\n", absDiffD, bundle->accParam);
+                return 1;
+              }
+
+
+              if (absDiffD > maxDiffD) {
+                maxDiffD = absDiffD;
+              }
+
+              break;
+
+            default:
+              printf("Test requires zfp_type float or double\n");
+              return 1;
+          }
+        }
+      }
+    }
+  }
+
+  if (ZFP_TYPE == zfp_type_float) {
+    printf("\t\t\t\tMax abs error: %f\n", maxDiffF);
+  } else {
+    printf("\t\t\t\tMax abs error: %lf\n", maxDiffD);
+  }
+
+  return 0;
+}
+
+static void
+_catFunc3(given_, DESCRIPTOR, Array_when_ZfpCompressFixedAccuracy_expect_CompressedValuesWithinAccuracy)(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  int failures = 0;
+  int compressParam;
+  for (compressParam = 0; compressParam < 3; compressParam++) {
+    if (setupCompressParam(bundle, zfp_mode_fixed_accuracy, compressParam) == 1) {
+      failures++;
+      continue;
+    }
+
+    failures += isCompressedValuesWithinAccuracy(bundle);
+
+    zfp_stream_rewind(bundle->stream);
+    memset(bundle->buffer, 0, bundle->bufsizeBytes);
+  }
+
+  if (failures > 0) {
+    fail_msg("Compressed value accuracy test failure\n");
+  }
+
+}
+
+// #endif FL_PT_DATA
+#endif
+
+// setup functions
+static int
+setupPermuted(void **state)
+{
+  return initZfpStreamAndField(state, PERMUTED);
+}
+
+static int
+setupInterleaved(void **state)
+{
+  return initZfpStreamAndField(state, INTERLEAVED);
+}
+
+static int
+setupReversed(void **state)
+{
+  return initZfpStreamAndField(state, REVERSED);
+}
+
+static int
+setupDefaultStride(void **state)
+{
+  return initZfpStreamAndField(state, AS_IS);
+}
diff --git a/tests/src/endtoend/testZfpCuda1dDouble.c b/tests/src/endtoend/testZfpCuda1dDouble.c
new file mode 100644
index 00000000..d6d77293
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode1d.c"
+
+#include "constants/1dDouble.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda1dFloat.c b/tests/src/endtoend/testZfpCuda1dFloat.c
new file mode 100644
index 00000000..1bd9e251
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode1f.c"
+
+#include "constants/1dFloat.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda1dInt32.c b/tests/src/endtoend/testZfpCuda1dInt32.c
new file mode 100644
index 00000000..bbc59716
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode1i.c"
+
+#include "constants/1dInt32.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda1dInt64.c b/tests/src/endtoend/testZfpCuda1dInt64.c
new file mode 100644
index 00000000..ed0a3f5c
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode1l.c"
+
+#include "constants/1dInt64.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda2dDouble.c b/tests/src/endtoend/testZfpCuda2dDouble.c
new file mode 100644
index 00000000..75c36bb4
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode2d.c"
+
+#include "constants/2dDouble.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda2dFloat.c b/tests/src/endtoend/testZfpCuda2dFloat.c
new file mode 100644
index 00000000..a12533ae
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode2f.c"
+
+#include "constants/2dFloat.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda2dInt32.c b/tests/src/endtoend/testZfpCuda2dInt32.c
new file mode 100644
index 00000000..183c42ad
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode2i.c"
+
+#include "constants/2dInt32.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda2dInt64.c b/tests/src/endtoend/testZfpCuda2dInt64.c
new file mode 100644
index 00000000..daf7f0f5
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode2l.c"
+
+#include "constants/2dInt64.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda3dDouble.c b/tests/src/endtoend/testZfpCuda3dDouble.c
new file mode 100644
index 00000000..630feba7
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode3d.c"
+
+#include "constants/3dDouble.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda3dFloat.c b/tests/src/endtoend/testZfpCuda3dFloat.c
new file mode 100644
index 00000000..6c727685
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode3f.c"
+
+#include "constants/3dFloat.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda3dInt32.c b/tests/src/endtoend/testZfpCuda3dInt32.c
new file mode 100644
index 00000000..dd01c900
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode3i.c"
+
+#include "constants/3dInt32.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda3dInt64.c b/tests/src/endtoend/testZfpCuda3dInt64.c
new file mode 100644
index 00000000..ee8fbdc0
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode3l.c"
+
+#include "constants/3dInt64.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda4dDouble.c b/tests/src/endtoend/testZfpCuda4dDouble.c
new file mode 100644
index 00000000..afd05944
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode4d.c"
+
+#include "constants/4dDouble.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda4dFloat.c b/tests/src/endtoend/testZfpCuda4dFloat.c
new file mode 100644
index 00000000..d0ce3ba8
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode4f.c"
+
+#include "constants/4dFloat.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda4dInt32.c b/tests/src/endtoend/testZfpCuda4dInt32.c
new file mode 100644
index 00000000..bad538fb
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode4i.c"
+
+#include "constants/4dInt32.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpCuda4dInt64.c b/tests/src/endtoend/testZfpCuda4dInt64.c
new file mode 100644
index 00000000..eb7c24d2
--- /dev/null
+++ b/tests/src/endtoend/testZfpCuda4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode4l.c"
+
+#include "constants/4dInt64.h"
+#include "cudaExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/cuda.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp1dDouble.c b/tests/src/endtoend/testZfpOmp1dDouble.c
new file mode 100644
index 00000000..266d355b
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode1d.c"
+
+#include "constants/1dDouble.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp1dFloat.c b/tests/src/endtoend/testZfpOmp1dFloat.c
new file mode 100644
index 00000000..5cc1dcf9
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode1f.c"
+
+#include "constants/1dFloat.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp1dInt32.c b/tests/src/endtoend/testZfpOmp1dInt32.c
new file mode 100644
index 00000000..45e1181f
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode1i.c"
+
+#include "constants/1dInt32.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp1dInt64.c b/tests/src/endtoend/testZfpOmp1dInt64.c
new file mode 100644
index 00000000..c6d5bf5f
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode1l.c"
+
+#include "constants/1dInt64.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp2dDouble.c b/tests/src/endtoend/testZfpOmp2dDouble.c
new file mode 100644
index 00000000..9c57161a
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode2d.c"
+
+#include "constants/2dDouble.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp2dFloat.c b/tests/src/endtoend/testZfpOmp2dFloat.c
new file mode 100644
index 00000000..2f3fa175
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode2f.c"
+
+#include "constants/2dFloat.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp2dInt32.c b/tests/src/endtoend/testZfpOmp2dInt32.c
new file mode 100644
index 00000000..e9358235
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode2i.c"
+
+#include "constants/2dInt32.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp2dInt64.c b/tests/src/endtoend/testZfpOmp2dInt64.c
new file mode 100644
index 00000000..05ee2e68
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode2l.c"
+
+#include "constants/2dInt64.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp3dDouble.c b/tests/src/endtoend/testZfpOmp3dDouble.c
new file mode 100644
index 00000000..c6621ee7
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode3d.c"
+
+#include "constants/3dDouble.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp3dFloat.c b/tests/src/endtoend/testZfpOmp3dFloat.c
new file mode 100644
index 00000000..ff5f9a34
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode3f.c"
+
+#include "constants/3dFloat.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp3dInt32.c b/tests/src/endtoend/testZfpOmp3dInt32.c
new file mode 100644
index 00000000..190a6638
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode3i.c"
+
+#include "constants/3dInt32.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp3dInt64.c b/tests/src/endtoend/testZfpOmp3dInt64.c
new file mode 100644
index 00000000..cb5ca925
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode3l.c"
+
+#include "constants/3dInt64.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp4dDouble.c b/tests/src/endtoend/testZfpOmp4dDouble.c
new file mode 100644
index 00000000..83b69e03
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode4d.c"
+
+#include "constants/4dDouble.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp4dFloat.c b/tests/src/endtoend/testZfpOmp4dFloat.c
new file mode 100644
index 00000000..ccff0c4e
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode4f.c"
+
+#include "constants/4dFloat.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp4dInt32.c b/tests/src/endtoend/testZfpOmp4dInt32.c
new file mode 100644
index 00000000..b4478b81
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode4i.c"
+
+#include "constants/4dInt32.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpOmp4dInt64.c b/tests/src/endtoend/testZfpOmp4dInt64.c
new file mode 100644
index 00000000..1e1765a9
--- /dev/null
+++ b/tests/src/endtoend/testZfpOmp4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode4l.c"
+
+#include "constants/4dInt64.h"
+#include "ompExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/omp.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial1dDouble.c b/tests/src/endtoend/testZfpSerial1dDouble.c
new file mode 100644
index 00000000..35b98aef
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial1dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode1d.c"
+
+#include "constants/1dDouble.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial1dFloat.c b/tests/src/endtoend/testZfpSerial1dFloat.c
new file mode 100644
index 00000000..8acf5533
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial1dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode1f.c"
+
+#include "constants/1dFloat.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial1dInt32.c b/tests/src/endtoend/testZfpSerial1dInt32.c
new file mode 100644
index 00000000..c16904e0
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial1dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode1i.c"
+
+#include "constants/1dInt32.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial1dInt64.c b/tests/src/endtoend/testZfpSerial1dInt64.c
new file mode 100644
index 00000000..4e815fb0
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial1dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode1l.c"
+
+#include "constants/1dInt64.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial2dDouble.c b/tests/src/endtoend/testZfpSerial2dDouble.c
new file mode 100644
index 00000000..b293d905
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial2dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode2d.c"
+
+#include "constants/2dDouble.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial2dFloat.c b/tests/src/endtoend/testZfpSerial2dFloat.c
new file mode 100644
index 00000000..3f770904
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial2dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode2f.c"
+
+#include "constants/2dFloat.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial2dInt32.c b/tests/src/endtoend/testZfpSerial2dInt32.c
new file mode 100644
index 00000000..61f6a493
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial2dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode2i.c"
+
+#include "constants/2dInt32.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial2dInt64.c b/tests/src/endtoend/testZfpSerial2dInt64.c
new file mode 100644
index 00000000..c07e9b3b
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial2dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode2l.c"
+
+#include "constants/2dInt64.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial3dDouble.c b/tests/src/endtoend/testZfpSerial3dDouble.c
new file mode 100644
index 00000000..e224ae8a
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial3dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode3d.c"
+
+#include "constants/3dDouble.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial3dFloat.c b/tests/src/endtoend/testZfpSerial3dFloat.c
new file mode 100644
index 00000000..9ae89c67
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial3dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode3f.c"
+
+#include "constants/3dFloat.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial3dInt32.c b/tests/src/endtoend/testZfpSerial3dInt32.c
new file mode 100644
index 00000000..9b0e4fc3
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial3dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode3i.c"
+
+#include "constants/3dInt32.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial3dInt64.c b/tests/src/endtoend/testZfpSerial3dInt64.c
new file mode 100644
index 00000000..5effe046
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial3dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode3l.c"
+
+#include "constants/3dInt64.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial4dDouble.c b/tests/src/endtoend/testZfpSerial4dDouble.c
new file mode 100644
index 00000000..ea515cd4
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial4dDouble.c
@@ -0,0 +1,13 @@
+#include "src/encode4d.c"
+
+#include "constants/4dDouble.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial4dFloat.c b/tests/src/endtoend/testZfpSerial4dFloat.c
new file mode 100644
index 00000000..891b02bb
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial4dFloat.c
@@ -0,0 +1,13 @@
+#include "src/encode4f.c"
+
+#include "constants/4dFloat.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial4dInt32.c b/tests/src/endtoend/testZfpSerial4dInt32.c
new file mode 100644
index 00000000..bd9067e2
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial4dInt32.c
@@ -0,0 +1,13 @@
+#include "src/encode4i.c"
+
+#include "constants/4dInt32.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testZfpSerial4dInt64.c b/tests/src/endtoend/testZfpSerial4dInt64.c
new file mode 100644
index 00000000..a8a49646
--- /dev/null
+++ b/tests/src/endtoend/testZfpSerial4dInt64.c
@@ -0,0 +1,13 @@
+#include "src/encode4l.c"
+
+#include "constants/4dInt64.h"
+#include "serialExecBase.c"
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    #include "testcases/serial.c"
+  };
+
+  return cmocka_run_group_tests(tests, setupRandomData, teardownRandomData);
+}
diff --git a/tests/src/endtoend/testcases/cuda.c b/tests/src/endtoend/testcases/cuda.c
new file mode 100644
index 00000000..0af0341f
--- /dev/null
+++ b/tests/src/endtoend/testcases/cuda.c
@@ -0,0 +1,24 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+#if DIMS < 4
+_cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+/* strided */
+/* contiguous layout supported */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_Cuda_, DIM_INT_STR, ReversedArray_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch), setupReversed, teardown),
+#if DIMS >= 2
+_cmocka_unit_test_setup_teardown(_catFunc3(given_Cuda_, DIM_INT_STR, PermutedArray_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch), setupPermuted, teardown),
+#endif
+
+/* non-contiguous unsupported */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_Cuda_, DIM_INT_STR, InterleavedArray_when_ZfpCompressDecompressFixedRate_expect_BitstreamUntouchedAndReturnsZero), setupInterleaved, teardown),
+
+/* fixed-rate */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_Cuda_, DIM_INT_STR, Array_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch), setupDefaultStride, teardown),
+
+/* non fixed-rate modes unsupported */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_Cuda_, DIM_INT_STR, Array_when_ZfpCompressDecompressNonFixedRate_expect_BitstreamUntouchedAndReturnsZero), setupDefaultStride, teardown),
+#else
+/* 4d compression unsupported */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_Cuda_, DIM_INT_STR, Array_when_ZfpCompressDecompress_expect_BitstreamUntouchedAndReturnsZero), setupDefaultStride, teardown),
+#endif
diff --git a/tests/src/endtoend/testcases/omp.c b/tests/src/endtoend/testcases/omp.c
new file mode 100644
index 00000000..3578ef52
--- /dev/null
+++ b/tests/src/endtoend/testcases/omp.c
@@ -0,0 +1,20 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+_cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+// OpenMP decompression not yet supported
+
+/* strided tests */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, ReversedArray_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch), setupReversed, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, InterleavedArray_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch), setupInterleaved, teardown),
+#if DIMS >= 2
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, PermutedArray_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch), setupPermuted, teardown),
+#endif
+
+/* non-strided tests */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, Array_when_ZfpCompressFixedPrecision_expect_BitstreamChecksumsMatch), setupDefaultStride, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, Array_when_ZfpCompressFixedRate_expect_BitstreamChecksumsMatch), setupDefaultStride, teardown),
+#ifdef FL_PT_DATA
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, Array_when_ZfpCompressFixedAccuracy_expect_BitstreamChecksumsMatch), setupDefaultStride, teardown),
+#endif
+_cmocka_unit_test_setup_teardown(_catFunc3(given_OpenMP_, DIM_INT_STR, Array_when_ZfpCompressReversible_expect_BitstreamChecksumsMatch), setupDefaultStride, teardown),
diff --git a/tests/src/endtoend/testcases/serial.c b/tests/src/endtoend/testcases/serial.c
new file mode 100644
index 00000000..196a296c
--- /dev/null
+++ b/tests/src/endtoend/testcases/serial.c
@@ -0,0 +1,31 @@
+// requires #include "utils/testMacros.h", do outside of main()
+
+_cmocka_unit_test(when_seededRandomSmoothDataGenerated_expect_ChecksumMatches),
+
+#ifndef PRINT_CHECKSUMS
+
+/* strided tests */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, ReversedArray_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch), setupReversed, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, InterleavedArray_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch), setupInterleaved, teardown),
+#if DIMS >= 2
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, PermutedArray_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch), setupPermuted, teardown),
+#endif
+
+#endif
+
+/* fixed-precision */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Array_when_ZfpCompressDecompressFixedPrecision_expect_BitstreamAndArrayChecksumsMatch), setupDefaultStride, teardown),
+
+/* fixed-rate */
+_cmocka_unit_test(_catFunc3(given_, DIM_INT_STR, ZfpStream_when_SetRateWithWriteRandomAccess_expect_RateRoundedUpProperly)),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Array_when_ZfpCompressDecompressFixedRate_expect_BitstreamAndArrayChecksumsMatch), setupDefaultStride, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Array_when_ZfpCompressFixedRate_expect_CompressedBitrateComparableToChosenRate), setupDefaultStride, teardown),
+
+#ifdef FL_PT_DATA
+/* fixed-accuracy */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Array_when_ZfpCompressDecompressFixedAccuracy_expect_BitstreamAndArrayChecksumsMatch), setupDefaultStride, teardown),
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Array_when_ZfpCompressFixedAccuracy_expect_CompressedValuesWithinAccuracy), setupDefaultStride, teardown),
+#endif
+
+/* reversible */
+_cmocka_unit_test_setup_teardown(_catFunc3(given_, DIM_INT_STR, Array_when_ZfpCompressDecompressReversible_expect_BitstreamAndArrayChecksumsMatch), setupDefaultStride, teardown),
diff --git a/tests/src/endtoend/zfpEndtoendBase.c b/tests/src/endtoend/zfpEndtoendBase.c
new file mode 100644
index 00000000..5069824f
--- /dev/null
+++ b/tests/src/endtoend/zfpEndtoendBase.c
@@ -0,0 +1,591 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+
+#include "utils/genSmoothRandNums.h"
+#include "utils/stridedOperations.h"
+#include "utils/testMacros.h"
+#include "utils/zfpChecksums.h"
+#include "utils/zfpCompressionParams.h"
+#include "utils/zfpHash.h"
+#include "utils/zfpTimer.h"
+
+#ifdef FL_PT_DATA
+  #define MIN_TOTAL_ELEMENTS 1000000
+#else
+  #define MIN_TOTAL_ELEMENTS 4096
+#endif
+
+struct setupVars {
+  // randomly generated array
+  //   this entire dataset eventually gets compressed
+  //   its data gets copied and possibly rearranged, into compressedArr
+  size_t randomGenArrSideLen[4];
+  size_t totalRandomGenArrLen;
+  Scalar* randomGenArr;
+
+  // these arrays/dims may include stride-space
+  Scalar* compressedArr;
+  Scalar* decompressedArr;
+
+  size_t bufsizeBytes;
+  void* buffer;
+  // dimensions of data that gets compressed (currently same as randomGenArrSideLen)
+  size_t dimLens[4];
+  zfp_field* field;
+  zfp_field* decompressField;
+  zfp_stream* stream;
+  zfp_mode mode;
+
+  // compressParamNum is 0, 1, or 2
+  //   used to compute fixed mode param
+  //   and to select proper checksum to compare against
+  int compressParamNum;
+  size_t rateParam;
+  int precParam;
+  double accParam;
+
+  stride_config stride;
+
+  zfp_timer* timer;
+};
+
+// run this once per (datatype, DIM) combination for performance
+static int
+setupRandomData(void** state)
+{
+  int i;
+  struct setupVars *bundle = calloc(1, sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  switch (ZFP_TYPE) {
+
+#ifdef FL_PT_DATA
+    case zfp_type_float:
+      generateSmoothRandFloats(MIN_TOTAL_ELEMENTS, DIMS, (float**)&bundle->randomGenArr, &bundle->randomGenArrSideLen[0], &bundle->totalRandomGenArrLen);
+      break;
+
+    case zfp_type_double:
+      generateSmoothRandDoubles(MIN_TOTAL_ELEMENTS, DIMS, (double**)&bundle->randomGenArr, &bundle->randomGenArrSideLen[0], &bundle->totalRandomGenArrLen);
+      break;
+#else
+    case zfp_type_int32:
+      generateSmoothRandInts32(MIN_TOTAL_ELEMENTS, DIMS, 32 - 2, (int32**)&bundle->randomGenArr, &bundle->randomGenArrSideLen[0], &bundle->totalRandomGenArrLen);
+      break;
+
+    case zfp_type_int64:
+      generateSmoothRandInts64(MIN_TOTAL_ELEMENTS, DIMS, 64 - 2, (int64**)&bundle->randomGenArr, &bundle->randomGenArrSideLen[0], &bundle->totalRandomGenArrLen);
+      break;
+#endif
+
+    default:
+      fail_msg("Invalid zfp_type during setupRandomData()");
+      break;
+  }
+  assert_non_null(bundle->randomGenArr);
+
+  // set remaining indices (square for now)
+  for (i = 0; i < 4; i++) {
+    bundle->randomGenArrSideLen[i] = (i < DIMS) ? bundle->randomGenArrSideLen[0] : 0;
+    // for now, entire randomly generated array always entirely compressed
+    bundle->dimLens[i] = bundle->randomGenArrSideLen[i];
+  }
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardownRandomData(void** state)
+{
+  struct setupVars *bundle = *state;
+  free(bundle->randomGenArr);
+  free(bundle);
+
+  return 0;
+}
+
+static void
+setupZfpFields(struct setupVars* bundle, ptrdiff_t s[4])
+{
+  size_t* n = bundle->dimLens;
+
+  // setup zfp_fields: source/destination arrays for compression/decompression
+  zfp_type type = ZFP_TYPE;
+  zfp_field* field;
+  zfp_field* decompressField;
+
+  switch (DIMS) {
+    case 1:
+      field = zfp_field_1d(bundle->compressedArr, type, n[0]);
+      zfp_field_set_stride_1d(field, s[0]);
+
+      decompressField = zfp_field_1d(bundle->decompressedArr, type, n[0]);
+      zfp_field_set_stride_1d(decompressField, s[0]);
+      break;
+
+    case 2:
+      field = zfp_field_2d(bundle->compressedArr, type, n[0], n[1]);
+      zfp_field_set_stride_2d(field, s[0], s[1]);
+
+      decompressField = zfp_field_2d(bundle->decompressedArr, type, n[0], n[1]);
+      zfp_field_set_stride_2d(decompressField, s[0], s[1]);
+      break;
+
+    case 3:
+      field = zfp_field_3d(bundle->compressedArr, type, n[0], n[1], n[2]);
+      zfp_field_set_stride_3d(field, s[0], s[1], s[2]);
+
+      decompressField = zfp_field_3d(bundle->decompressedArr, type, n[0], n[1], n[2]);
+      zfp_field_set_stride_3d(decompressField, s[0], s[1], s[2]);
+      break;
+
+    case 4:
+      field = zfp_field_4d(bundle->compressedArr, type, n[0], n[1], n[2], n[3]);
+      zfp_field_set_stride_4d(field, s[0], s[1], s[2], s[3]);
+
+      decompressField = zfp_field_4d(bundle->decompressedArr, type, n[0], n[1], n[2], n[3]);
+      zfp_field_set_stride_4d(decompressField, s[0], s[1], s[2], s[3]);
+      break;
+  }
+
+  bundle->field = field;
+  bundle->decompressField = decompressField;
+}
+
+static void
+allocateFieldArrays(stride_config stride, size_t totalRandomGenArrLen, Scalar** compressedArrPtr, Scalar** decompressedArrPtr)
+{
+  size_t totalEntireDataLen = totalRandomGenArrLen;
+  if (stride == INTERLEAVED)
+    totalEntireDataLen *= 2;
+
+  // allocate arrays which we directly compress or decompress into
+  *compressedArrPtr = calloc(totalEntireDataLen, sizeof(Scalar));
+  assert_non_null(*compressedArrPtr);
+
+  *decompressedArrPtr = malloc(sizeof(Scalar) * totalEntireDataLen);
+  assert_non_null(*decompressedArrPtr);
+}
+
+static void
+generateStridedRandomArray(stride_config stride, Scalar* randomGenArr, zfp_type type, size_t n[4], ptrdiff_t s[4], Scalar** compressedArrPtr, Scalar** decompressedArrPtr)
+{
+  int dims, i;
+  for (i = 0; i < 4; i++) {
+    if (n[i] == 0) {
+      break;
+    }
+  }
+  dims = i;
+
+  size_t totalRandomGenArrLen = 1;
+  for (i = 0; i < dims; i++) {
+    totalRandomGenArrLen *= n[i];
+  }
+
+  // identify strides and produce compressedArr
+  switch(stride) {
+    case REVERSED:
+      getReversedStrides(dims, n, s);
+
+      reverseArray(randomGenArr, *compressedArrPtr, totalRandomGenArrLen, type);
+
+      // adjust pointer to last element, so strided traverse is valid
+      *compressedArrPtr += totalRandomGenArrLen - 1;
+      *decompressedArrPtr += totalRandomGenArrLen - 1;
+      break;
+
+    case INTERLEAVED:
+      getInterleavedStrides(dims, n, s);
+
+      interleaveArray(randomGenArr, *compressedArrPtr, totalRandomGenArrLen, ZFP_TYPE);
+      break;
+
+    case PERMUTED:
+      getPermutedStrides(dims, n, s);
+
+      if (permuteSquareArray(randomGenArr, *compressedArrPtr, n[0], dims, type)) {
+        fail_msg("Unexpected dims value in permuteSquareArray()");
+      }
+      break;
+
+    case AS_IS:
+      // no-op
+      memcpy(*compressedArrPtr, randomGenArr, totalRandomGenArrLen * sizeof(Scalar));
+      break;
+  }
+}
+
+static void
+initStridedFields(struct setupVars* bundle, stride_config stride)
+{
+  // apply stride permutations on randomGenArr, into compressedArr, which gets compressed
+  bundle->stride = stride;
+
+  allocateFieldArrays(stride, bundle->totalRandomGenArrLen, &bundle->compressedArr, &bundle->decompressedArr);
+
+  ptrdiff_t s[4] = {0};
+  generateStridedRandomArray(stride, bundle->randomGenArr, ZFP_TYPE, bundle->randomGenArrSideLen, s, &bundle->compressedArr, &bundle->decompressedArr);
+
+  setupZfpFields(bundle, s);
+}
+
+static void
+setupZfpStream(struct setupVars* bundle)
+{
+  // setup zfp_stream (compression settings)
+  zfp_stream* stream = zfp_stream_open(NULL);
+  assert_non_null(stream);
+
+  bundle->bufsizeBytes = zfp_stream_maximum_size(stream, bundle->field);
+  char* buffer = calloc(bundle->bufsizeBytes, sizeof(char));
+  assert_non_null(buffer);
+
+  bitstream* s = stream_open(buffer, bundle->bufsizeBytes);
+  assert_non_null(s);
+
+  zfp_stream_set_bit_stream(stream, s);
+  zfp_stream_rewind(stream);
+
+  bundle->stream = stream;
+  bundle->buffer = buffer;
+}
+
+// returns 1 on failure, 0 on success
+static int
+setupCompressParam(struct setupVars* bundle, zfp_mode zfpMode, int compressParamNum)
+{
+  bundle->mode = zfpMode;
+
+  // set compression mode for this compressParamNum
+  if (compressParamNum > 2 || compressParamNum < 0) {
+    printf("ERROR: Unknown compressParamNum %d during setupCompressParam()\n", compressParamNum);
+    return 1;
+  }
+  bundle->compressParamNum = compressParamNum;
+
+  switch(zfpMode) {
+    case zfp_mode_fixed_precision:
+      bundle->precParam = computeFixedPrecisionParam(bundle->compressParamNum);
+      zfp_stream_set_precision(bundle->stream, bundle->precParam);
+      printf("\t\t\t\tFixed precision param: %u\n", bundle->precParam);
+
+      break;
+
+    case zfp_mode_fixed_rate:
+      bundle->rateParam = computeFixedRateParam(bundle->compressParamNum);
+      zfp_stream_set_rate(bundle->stream, (double)bundle->rateParam, ZFP_TYPE, DIMS, zfp_false);
+      printf("\t\t\t\tFixed rate param: %lu\n", (unsigned long)bundle->rateParam);
+
+      break;
+
+#ifdef FL_PT_DATA
+    case zfp_mode_fixed_accuracy:
+      bundle->accParam = computeFixedAccuracyParam(bundle->compressParamNum);
+      zfp_stream_set_accuracy(bundle->stream, bundle->accParam);
+      printf("\t\t\t\tFixed accuracy param: %lf\n", bundle->accParam);
+
+      break;
+#endif
+
+    case zfp_mode_reversible:
+      zfp_stream_set_reversible(bundle->stream);
+      printf("\t\t\t\tReversible mode\n");
+
+      break;
+
+    default:
+      printf("ERROR: Invalid zfp mode %d during setupCompressParam()\n", zfpMode);
+      return 1;
+  }
+
+  return 0;
+}
+
+// assumes setupRandomData() already run (having set some setupVars members)
+static int
+initZfpStreamAndField(void **state, stride_config stride)
+{
+  struct setupVars *bundle = *state;
+
+  initStridedFields(bundle, stride);
+  setupZfpStream(bundle);
+
+  bundle->timer = zfp_timer_alloc();
+
+  *state = bundle;
+
+  return 0;
+}
+
+// randomGenArr and the struct itself are freed in teardownRandomData()
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+  stream_close(bundle->stream->stream);
+  zfp_stream_close(bundle->stream);
+  zfp_field_free(bundle->field);
+  zfp_field_free(bundle->decompressField);
+  free(bundle->buffer);
+
+  if (bundle->stride == REVERSED) {
+    // for convenience, we adjusted negative strided arrays to point to last element
+    bundle->compressedArr -= bundle->totalRandomGenArrLen - 1;
+    bundle->decompressedArr -= bundle->totalRandomGenArrLen - 1;
+  }
+  free(bundle->decompressedArr);
+  free(bundle->compressedArr);
+
+  zfp_timer_free(bundle->timer);
+
+  return 0;
+}
+
+static void
+when_seededRandomSmoothDataGenerated_expect_ChecksumMatches(void **state)
+{
+  struct setupVars *bundle = *state;
+  UInt checksum = _catFunc2(hashArray, SCALAR_BITS)((const UInt*)bundle->randomGenArr, bundle->totalRandomGenArrLen, 1);
+  uint64 key1, key2;
+  computeKeyOriginalInput(ARRAY_TEST, bundle->dimLens, &key1, &key2);
+  ASSERT_EQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2);
+}
+
+// returns 1 on failure, 0 on success
+static int
+runZfpCompress(zfp_stream* stream, const zfp_field* field, zfp_timer* timer, size_t* compressedBytes)
+{
+  // perform compression and time it
+  if (zfp_timer_start(timer)) {
+    printf("ERROR: Unknown platform (none of linux, win, osx) when starting timer\n");
+    return 1;
+  }
+
+  *compressedBytes = zfp_compress(stream, field);
+  double time = zfp_timer_stop(timer);
+  printf("\t\t\t\t\tCompress time (s): %lf\n", time);
+
+  if (compressedBytes == 0) {
+    printf("ERROR: Compression failed, nothing was written to bitstream\n");
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+// returns 1 on failure, 0 on success
+static int
+isCompressedBitstreamChecksumsMatch(zfp_stream* stream, bitstream* bs, size_t dimLens[4], zfp_mode mode, int compressParamNum)
+{
+  uint64 checksum = hashBitstream(stream_data(bs), stream_size(bs));
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, COMPRESSED_BITSTREAM, dimLens, mode, compressParamNum, &key1, &key2);
+
+  if (COMPARE_NEQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2)) {
+    printf("ERROR: Compressed bitstream checksums were different: 0x%"UINT64PRIx" != 0x%"UINT64PRIx"\n", checksum, getChecksumByKey(DIMS, ZFP_TYPE, key1, key2));
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+// returns 1 on failure, 0 on success
+static int
+runZfpDecompress(zfp_stream* stream, zfp_field* decompressField, zfp_timer* timer, size_t compressedBytes)
+{
+  // zfp_decompress() will write to bundle->decompressedArr
+  // assert bitstream ends in same location
+  if (zfp_timer_start(timer)) {
+    printf("ERROR: Unknown platform (none of linux, win, osx)\n");
+    return 1;
+  }
+
+  size_t result = zfp_decompress(stream, decompressField);
+  double time = zfp_timer_stop(timer);
+  printf("\t\t\t\t\tDecompress time (s): %lf\n", time);
+
+  if (compressedBytes != result) {
+    printf("ERROR: Decompression advanced the bitstream to a different position than after compression: %zu != %zu\n", result, compressedBytes);
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+// returns 1 on failure, 0 on success
+static int
+isDecompressedArrayChecksumsMatch(struct setupVars* bundle)
+{
+  zfp_field* field = bundle->field;
+
+  // hash decompressedArr
+  const UInt* arr = (const UInt*)bundle->decompressedArr;
+  ptrdiff_t strides[4] = {0, 0, 0, 0};
+  zfp_field_stride(field, strides);
+
+  uint64 checksum = 0;
+  switch(bundle->stride) {
+    case REVERSED:
+      // arr already points to last element (so strided traverse is legal)
+      checksum = _catFunc2(hashStridedArray, SCALAR_BITS)(arr, bundle->randomGenArrSideLen, strides);
+      break;
+
+    case INTERLEAVED:
+      checksum = _catFunc2(hashArray, SCALAR_BITS)(arr, bundle->totalRandomGenArrLen, 2);
+      break;
+
+    case PERMUTED:
+      checksum = _catFunc2(hashStridedArray, SCALAR_BITS)(arr, bundle->randomGenArrSideLen, strides);
+      break;
+
+    case AS_IS:
+      checksum = _catFunc2(hashArray, SCALAR_BITS)(arr, bundle->totalRandomGenArrLen, 1);
+      break;
+  }
+
+  uint64 key1, key2;
+  computeKey(ARRAY_TEST, DECOMPRESSED_ARRAY, bundle->dimLens, bundle->mode, bundle->compressParamNum, &key1, &key2);
+
+  if (COMPARE_NEQ_CHECKSUM(DIMS, ZFP_TYPE, checksum, key1, key2)) {
+    printf("ERROR: Decompressed array checksums were different: 0x%"UINT64PRIx" != 0x%"UINT64PRIx"\n", checksum, getChecksumByKey(DIMS, ZFP_TYPE, key1, key2));
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+// returns 0 on all tests pass, 1 on test failure
+// will skip decompression if compression fails
+static int
+isZfpCompressDecompressChecksumsMatch(void **state, int doDecompress)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_field* field = bundle->field;
+  zfp_timer* timer = bundle->timer;
+
+  size_t compressedBytes;
+  if (runZfpCompress(stream, field, timer, &compressedBytes) == 1) {
+    return 1;
+  }
+
+  bitstream* bs = zfp_stream_bit_stream(stream);
+  if (isCompressedBitstreamChecksumsMatch(stream, bs, bundle->dimLens, bundle->mode, bundle->compressParamNum) == 1) {
+    return 1;
+  }
+
+  if (doDecompress == 0) {
+    return 0;
+  }
+
+  // rewind stream for decompression
+  zfp_stream_rewind(stream);
+  if (runZfpDecompress(stream, bundle->decompressField, timer, compressedBytes) == 1) {
+    return 1;
+  }
+
+  if (isDecompressedArrayChecksumsMatch(bundle) == 1) {
+    return 1;
+  }
+
+  return 0;
+}
+
+// this test is run by itself as its own test case, so it can use fail_msg() instead of accumulating error counts
+// will skip decompression if compression fails
+static void
+runCompressDecompressReversible(struct setupVars* bundle, int doDecompress)
+{
+  zfp_stream* stream = bundle->stream;
+  zfp_field* field = bundle->field;
+  zfp_timer* timer = bundle->timer;
+
+  size_t compressedBytes;
+  if (runZfpCompress(stream, field, timer, &compressedBytes) == 1) {
+    fail_msg("Reversible test failed.");
+  }
+
+  bitstream* bs = zfp_stream_bit_stream(stream);
+  if (isCompressedBitstreamChecksumsMatch(stream, bs, bundle->dimLens, zfp_mode_reversible, bundle->compressParamNum) == 1) {
+    fail_msg("Reversible test failed.");
+  }
+
+  if (doDecompress == 0) {
+    return;
+  }
+
+  // rewind stream for decompression
+  zfp_stream_rewind(stream);
+  if (runZfpDecompress(stream, bundle->decompressField, timer, compressedBytes) == 1) {
+    fail_msg("Reversible test failed.");
+  }
+
+  // verify that uncompressed and decompressed arrays match bit for bit
+  switch(bundle->stride) {
+    case REVERSED:
+    case INTERLEAVED:
+    case PERMUTED: {
+        // test one scalar at a time for bitwise equality
+        const size_t* n = bundle->randomGenArrSideLen;
+        ptrdiff_t strides[4];
+        ptrdiff_t offset = 0;
+        size_t i, j, k, l;
+        zfp_field_stride(field, strides);
+        for (l = (n[3] ? n[3] : 1); l--; offset += strides[3] - n[2]*strides[2]) {
+          for (k = (n[2] ? n[2] : 1); k--; offset += strides[2] - n[1]*strides[1]) {
+            for (j = (n[1] ? n[1] : 1); j--; offset += strides[1] - n[0]*strides[0]) {
+              for (i = (n[0] ? n[0] : 1); i--; offset += strides[0]) {
+                assert_memory_equal(&bundle->compressedArr[offset], &bundle->decompressedArr[offset], sizeof(Scalar));
+              }
+            }
+          }
+        }
+      }
+      break;
+
+    case AS_IS:
+      assert_memory_equal(bundle->compressedArr, bundle->decompressedArr, bundle->totalRandomGenArrLen * sizeof(Scalar));
+      break;
+  }
+}
+
+// returns number of testcase failures
+// (not allowed to call fail_msg() because all tests must run before signaling test failure)
+static int
+runCompressDecompressAcrossParamsGivenMode(void** state, int doDecompress, zfp_mode mode, int numCompressParams)
+{
+  struct setupVars *bundle = *state;
+
+  int failures = 0;
+  int compressParam;
+  for (compressParam = 0; compressParam < numCompressParams; compressParam++) {
+    if (setupCompressParam(bundle, mode, compressParam) == 1) {
+      failures++;
+      continue;
+    }
+
+    failures += isZfpCompressDecompressChecksumsMatch(state, doDecompress);
+
+    zfp_stream_rewind(bundle->stream);
+    memset(bundle->buffer, 0, bundle->bufsizeBytes);
+  }
+
+  return failures;
+}
+
+static void
+runCompressDecompressTests(void** state, zfp_mode mode, int numCompressParams)
+{
+  if (runCompressDecompressAcrossParamsGivenMode(state, 1, mode, numCompressParams) > 0) {
+    fail_msg("Overall compress/decompress test failure\n");
+  }
+}
diff --git a/tests/src/execPolicy/CMakeLists.txt b/tests/src/execPolicy/CMakeLists.txt
new file mode 100644
index 00000000..5e49a46c
--- /dev/null
+++ b/tests/src/execPolicy/CMakeLists.txt
@@ -0,0 +1,19 @@
+add_executable(testOmp testOmp.c)
+target_link_libraries(testOmp cmocka zfp)
+add_test(NAME testOmp COMMAND testOmp)
+if(ZFP_WITH_OPENMP)
+  target_link_libraries(testOmp OpenMP::OpenMP_C)
+  set_property(TEST testOmp PROPERTY RUN_SERIAL TRUE)
+endif()
+
+if(ZFP_WITH_OPENMP)
+  add_executable(testOmpInternal testOmpInternal.c)
+  target_link_libraries(testOmpInternal cmocka zfp OpenMP::OpenMP_C)
+  add_test(NAME testOmpInternal COMMAND testOmpInternal)
+endif()
+
+if(ZFP_WITH_CUDA AND NOT DEFINED ZFP_OMP_TESTS_ONLY)
+  add_executable(testCuda testCuda.c)
+  target_link_libraries(testCuda cmocka zfp)
+  add_test(NAME testCuda COMMAND testCuda)
+endif()
diff --git a/tests/src/execPolicy/testCuda.c b/tests/src/execPolicy/testCuda.c
new file mode 100644
index 00000000..83d2d1fc
--- /dev/null
+++ b/tests/src/execPolicy/testCuda.c
@@ -0,0 +1,95 @@
+#include "zfp.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+struct setupVars {
+  zfp_stream* stream;
+  zfp_field* field;
+  bitstream* bs;
+  void* buffer;
+  size_t streamSize;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  bundle->stream = zfp_stream_open(NULL);
+  assert_non_null(bundle);
+
+  /* create a bitstream with buffer */
+  size_t bufferSize = 50 * sizeof(int);
+  bundle->buffer = malloc(bufferSize);
+  assert_non_null(bundle->buffer);
+  memset(bundle->buffer, 0, bufferSize);
+
+  /* offset bitstream, so we can distinguish 0 from stream_size() returned from zfp_decompress() */
+  bundle->bs = stream_open(bundle->buffer, bufferSize);
+  stream_skip(bundle->bs, stream_word_bits + 1);
+
+  bundle->streamSize = stream_size(bundle->bs);
+  assert_int_not_equal(bundle->streamSize, 0);
+
+  /* set cuda policy */
+  assert_int_equal(1, zfp_stream_set_execution(bundle->stream, zfp_exec_cuda));
+
+  /* create 4d field */
+  bundle->field = zfp_field_4d(NULL, zfp_type_int32, 9, 5, 4, 4);
+  assert_non_null(bundle->field);
+  assert_int_equal(4, zfp_field_dimensionality(bundle->field));
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  zfp_field_free(bundle->field);
+
+  stream_close(bundle->bs);
+  free(bundle->buffer);
+  zfp_stream_close(bundle->stream);
+
+  free(bundle);
+
+  return 0;
+}
+
+static void
+given_withCuda_when_4dCompressCudaPolicy_expect_noop(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  assert_int_equal(zfp_compress(bundle->stream, bundle->field), 0);
+  assert_int_equal(stream_size(bundle->bs), bundle->streamSize);
+}
+
+static void
+given_withCuda_when_4dDecompressCudaPolicy_expect_noop(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  assert_int_equal(zfp_decompress(bundle->stream, bundle->field), 0);
+  assert_int_equal(stream_size(bundle->bs), bundle->streamSize);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test_setup_teardown(given_withCuda_when_4dCompressCudaPolicy_expect_noop, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withCuda_when_4dDecompressCudaPolicy_expect_noop, setup, teardown),
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/execPolicy/testOmp.c b/tests/src/execPolicy/testOmp.c
new file mode 100644
index 00000000..29ab5d57
--- /dev/null
+++ b/tests/src/execPolicy/testOmp.c
@@ -0,0 +1,213 @@
+#include "zfp.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+struct setupVars {
+  zfp_stream* stream;
+  zfp_field* field;
+  bitstream* bs;
+  void* buffer;
+  size_t streamSize;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  bundle->stream = zfp_stream_open(NULL);
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  zfp_stream_close(bundle->stream);
+  free(bundle);
+
+  return 0;
+}
+
+static int
+setupForCompress(void **state)
+{
+  if (setup(state))
+    return 1;
+
+  struct setupVars *bundle = *state;
+
+  /* create a bitstream with buffer */
+  size_t bufferSize = 50 * sizeof(int);
+  bundle->buffer = malloc(bufferSize);
+  assert_non_null(bundle->buffer);
+  memset(bundle->buffer, 0, bufferSize);
+
+  /* offset bitstream, so we can distinguish 0 from stream_size() returned from zfp_decompress() */
+  bundle->bs = stream_open(bundle->buffer, bufferSize);
+  stream_skip(bundle->bs, (uint)(stream_word_bits + 1));
+
+  bundle->streamSize = stream_size(bundle->bs);
+  assert_int_not_equal(bundle->streamSize, 0);
+
+  /* manually set omp policy (needed for tests compiled without openmp) */
+  bundle->stream->exec.policy = zfp_exec_omp;
+
+  bundle->field = zfp_field_1d(NULL, zfp_type_int32, 9);
+  assert_non_null(bundle->field);
+
+  return 0;
+}
+
+static int
+teardownForCompress(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  zfp_field_free(bundle->field);
+  stream_close(bundle->bs);
+  free(bundle->buffer);
+
+  return teardown(state);
+}
+
+#ifdef _OPENMP
+static void
+given_withOpenMP_when_setExecutionOmp_expect_set(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  assert_int_equal(zfp_stream_set_execution(stream, zfp_exec_omp), 1);
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_omp);
+}
+
+static void
+given_withOpenMP_when_setOmpThreads_expect_set(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  uint threads = 5;
+
+  assert_int_equal(zfp_stream_set_omp_threads(stream, threads), 1);
+  assert_int_equal(zfp_stream_omp_threads(stream), threads);
+}
+
+static void
+given_withOpenMP_serialExec_when_setOmpThreads_expect_setToExecOmp(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_serial);
+
+  assert_int_equal(zfp_stream_set_omp_threads(stream, 5), 1);
+
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_omp);
+}
+
+static void
+given_withOpenMP_when_setOmpChunkSize_expect_set(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  uint chunk_size = 0x2u;
+
+  assert_int_equal(zfp_stream_set_omp_chunk_size(stream, chunk_size), 1);
+  assert_int_equal(zfp_stream_omp_chunk_size(stream), chunk_size);
+}
+
+static void
+given_withOpenMP_serialExec_when_setOmpChunkSize_expect_setToExecOmp(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_serial);
+
+  assert_int_equal(zfp_stream_set_omp_chunk_size(stream, 0x200u), 1);
+
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_omp);
+}
+
+static void
+given_withOpenMP_whenDecompressOmpPolicy_expect_noop(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  assert_int_equal(zfp_decompress(bundle->stream, bundle->field), 0);
+  assert_int_equal(stream_size(bundle->bs), bundle->streamSize);
+}
+
+#else
+static void
+given_withoutOpenMP_when_setExecutionOmp_expect_unableTo(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  assert_int_equal(zfp_stream_set_execution(stream, zfp_exec_omp), 0);
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_serial);
+}
+
+static void
+given_withoutOpenMP_when_setOmpParams_expect_unableTo(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  assert_int_equal(zfp_stream_set_omp_threads(stream, 5), 0);
+  assert_int_equal(zfp_stream_set_omp_chunk_size(stream, 0x200u), 0);
+
+  assert_int_equal(zfp_stream_execution(stream), zfp_exec_serial);
+}
+
+static void
+given_withoutOpenMP_whenCompressOmpPolicy_expect_noop(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  assert_int_equal(zfp_compress(bundle->stream, bundle->field), 0);
+  assert_int_equal(stream_size(bundle->bs), bundle->streamSize);
+}
+
+static void
+given_withoutOpenMP_whenDecompressOmpPolicy_expect_noop(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  assert_int_equal(zfp_decompress(bundle->stream, bundle->field), 0);
+  assert_int_equal(stream_size(bundle->bs), bundle->streamSize);
+}
+
+#endif
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+#ifdef _OPENMP
+    cmocka_unit_test_setup_teardown(given_withOpenMP_when_setExecutionOmp_expect_set, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withOpenMP_when_setOmpThreads_expect_set, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withOpenMP_serialExec_when_setOmpThreads_expect_setToExecOmp, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withOpenMP_when_setOmpChunkSize_expect_set, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withOpenMP_serialExec_when_setOmpChunkSize_expect_setToExecOmp, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(given_withOpenMP_whenDecompressOmpPolicy_expect_noop, setupForCompress, teardownForCompress),
+#else
+    cmocka_unit_test_setup_teardown(given_withoutOpenMP_when_setExecutionOmp_expect_unableTo, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withoutOpenMP_when_setOmpParams_expect_unableTo, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(given_withoutOpenMP_whenCompressOmpPolicy_expect_noop, setupForCompress, teardownForCompress),
+    cmocka_unit_test_setup_teardown(given_withoutOpenMP_whenDecompressOmpPolicy_expect_noop, setupForCompress, teardownForCompress),
+#endif
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/execPolicy/testOmpInternal.c b/tests/src/execPolicy/testOmpInternal.c
new file mode 100644
index 00000000..dfa89eb5
--- /dev/null
+++ b/tests/src/execPolicy/testOmpInternal.c
@@ -0,0 +1,84 @@
+#include "zfp.h"
+#include "zfp/internal/zfp/macros.h"
+#include "src/share/omp.c"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+struct setupVars {
+  zfp_stream* stream;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  bundle->stream = stream;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream_close(bundle->stream);
+  free(bundle);
+
+  return 0;
+}
+
+static void
+given_withOpenMP_zfpStreamOmpThreadsZero_when_threadCountOmp_expect_returnsOmpMaxThreadCount(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  assert_int_equal(zfp_stream_set_omp_threads(stream, 0), 1);
+
+  assert_int_equal(thread_count_omp(stream), omp_get_max_threads());
+}
+
+static void
+given_withOpenMP_zfpStreamOmpChunkSizeZero_when_chunkCountOmp_expect_returnsOneChunkPerThread(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  uint threads = 3;
+  assert_int_equal(zfp_stream_set_omp_threads(stream, threads), 1);
+
+  uint blocks = 50;
+  assert_int_equal(chunk_count_omp(stream, blocks, threads), threads);
+}
+
+static void
+given_withOpenMP_zfpStreamOmpChunkSizeNonzero_when_chunkCountOmp_expect_returnsNumChunks(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  uint blocks = 51;
+  uint chunkSize = 3;
+  assert_int_equal(zfp_stream_set_omp_chunk_size(stream, chunkSize), 1);
+
+  // the MIN(chunks, blocks) will always return chunks
+  assert_int_equal(chunk_count_omp(stream, blocks, thread_count_omp(stream)), (blocks + chunkSize - 1) / chunkSize);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test_setup_teardown(given_withOpenMP_zfpStreamOmpThreadsZero_when_threadCountOmp_expect_returnsOmpMaxThreadCount, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withOpenMP_zfpStreamOmpChunkSizeZero_when_chunkCountOmp_expect_returnsOneChunkPerThread, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_withOpenMP_zfpStreamOmpChunkSizeNonzero_when_chunkCountOmp_expect_returnsNumChunks, setup, teardown),
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/inline/CMakeLists.txt b/tests/src/inline/CMakeLists.txt
new file mode 100644
index 00000000..63e44989
--- /dev/null
+++ b/tests/src/inline/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_executable(testBitstream testBitstream.c)
+target_link_libraries(testBitstream cmocka zfp)
+add_test(NAME testBitstream COMMAND testBitstream)
+
+add_executable(testBitstreamSmallWsize testBitstreamSmallWsize.c)
+target_link_libraries(testBitstreamSmallWsize cmocka)
+add_test(NAME testBitstreamSmallWsize COMMAND testBitstreamSmallWsize)
+
+add_executable(testBitstreamStrided testBitstreamStrided.c)
+target_link_libraries(testBitstreamStrided cmocka)
+add_test(NAME testBitstreamStrided COMMAND testBitstreamStrided)
diff --git a/tests/src/inline/testBitstream.c b/tests/src/inline/testBitstream.c
new file mode 100644
index 00000000..c025d95f
--- /dev/null
+++ b/tests/src/inline/testBitstream.c
@@ -0,0 +1,674 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include "include/zfp/bitstream.h"
+#include "include/zfp/bitstream.inl"
+
+#define STREAM_WORD_CAPACITY 3
+
+#define WORD_MASK ((bitstream_word)(-1))
+#define WORD1 WORD_MASK
+#define WORD2 (0x5555555555555555 & WORD_MASK)
+
+struct setupVars {
+  void* buffer;
+  bitstream* b;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *s = malloc(sizeof(struct setupVars));
+  assert_non_null(s);
+
+  s->buffer = calloc(STREAM_WORD_CAPACITY, sizeof(bitstream_word));
+  assert_non_null(s->buffer);
+
+  s->b = stream_open(s->buffer, STREAM_WORD_CAPACITY * sizeof(bitstream_word));
+  assert_non_null(s->b);
+
+  *state = s;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *s = *state;
+  free(s->buffer);
+  free(s->b);
+  free(s);
+
+  return 0;
+}
+
+static void
+when_StreamCopy_expect_BitsCopiedToDestBitstream(void **state)
+{
+  const uint SRC_OFFSET = wsize - 6;
+  const uint DST_OFFSET = 5;
+  const uint COPY_BITS = wsize + 4;
+
+  const uint NUM_WORD2_BITS_WRITTEN_TO_WORD = DST_OFFSET + (wsize - SRC_OFFSET);
+  const bitstream_word EXPECTED_WRITTEN_WORD = ((WORD1 >> SRC_OFFSET) << DST_OFFSET)
+                                               + (WORD2 << NUM_WORD2_BITS_WRITTEN_TO_WORD);
+  const uint EXPECTED_BITS = (DST_OFFSET + COPY_BITS) % wsize;
+  const bitstream_word EXPECTED_BUFFER = (WORD2 >> (NUM_WORD2_BITS_WRITTEN_TO_WORD))
+                                         & ((1u << EXPECTED_BITS) - 1);
+
+  bitstream* src = ((struct setupVars *)*state)->b;
+  stream_write_word(src, WORD1);
+  stream_write_word(src, WORD2);
+  stream_flush(src);
+  stream_rseek(src, SRC_OFFSET);
+
+  void* buffer = calloc(STREAM_WORD_CAPACITY, sizeof(bitstream_word));
+  bitstream* dst = stream_open(buffer, STREAM_WORD_CAPACITY * sizeof(bitstream_word));
+  stream_wseek(dst, DST_OFFSET);
+
+  stream_copy(dst, src, COPY_BITS);
+
+  assert_ptr_equal(dst->ptr, dst->begin + 1);
+  assert_int_equal(dst->bits, (DST_OFFSET + COPY_BITS) % wsize);
+  assert_int_equal(*dst->begin, EXPECTED_WRITTEN_WORD);
+  assert_int_equal(dst->buffer, EXPECTED_BUFFER);
+
+  stream_close(dst);
+  free(buffer);
+}
+
+static void
+when_Flush_expect_PaddedWordWrittenToStream(void **state)
+{
+  const uint PREV_BUFFER_BIT_COUNT = 8;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD1, wsize);
+
+  stream_rewind(s);
+  stream_write_bits(s, WORD2, PREV_BUFFER_BIT_COUNT);
+  bitstream_word *prevPtr = s->ptr;
+
+  size_t padCount = stream_flush(s);
+
+  assert_ptr_equal(s->ptr, prevPtr + 1);
+  assert_int_equal(s->bits, 0);
+  assert_int_equal(s->buffer, 0);
+  assert_int_equal(padCount, wsize - PREV_BUFFER_BIT_COUNT);
+}
+
+static void
+given_EmptyBuffer_when_Flush_expect_NOP(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  bitstream_word *prevPtr = s->ptr;
+  bitstream_count prevBits = s->bits;
+  bitstream_word prevBuffer = s->buffer;
+
+  size_t padCount = stream_flush(s);
+
+  assert_ptr_equal(s->ptr, prevPtr);
+  assert_int_equal(s->bits, prevBits);
+  assert_int_equal(s->buffer, prevBuffer);
+  assert_int_equal(padCount, 0);
+}
+
+static void
+when_Align_expect_BufferEmptyBitsZero(void **state)
+{
+  const uint READ_BIT_COUNT = 3;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_rewind(s);
+  stream_read_bits(s, READ_BIT_COUNT);
+  bitstream_word *prevPtr = s->ptr;
+
+  stream_align(s);
+
+  assert_ptr_equal(s->ptr, prevPtr);
+  assert_int_equal(s->bits, 0);
+  assert_int_equal(s->buffer, 0);
+}
+
+static void
+when_SkipPastBufferEnd_expect_NewMaskedWordInBuffer(void **state)
+{
+  const uint READ_BIT_COUNT = 3;
+  const uint SKIP_COUNT = wsize + 5;
+  const uint TOTAL_OFFSET = READ_BIT_COUNT + SKIP_COUNT;
+  const uint EXPECTED_BITS = wsize - (TOTAL_OFFSET % wsize);
+  const bitstream_word EXPECTED_BUFFER = WORD2 >> (TOTAL_OFFSET % wsize);
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_rewind(s);
+  stream_read_bits(s, READ_BIT_COUNT);
+
+  stream_skip(s, SKIP_COUNT);
+
+  assert_ptr_equal(s->ptr, s->begin + 2);
+  assert_int_equal(s->bits, EXPECTED_BITS);
+  assert_int_equal(s->buffer, EXPECTED_BUFFER);
+}
+
+static void
+when_SkipWithinBuffer_expect_MaskedBuffer(void **state)
+{
+  const uint READ_BIT_COUNT = 3;
+  const uint SKIP_COUNT = 5;
+  const uint TOTAL_OFFSET = READ_BIT_COUNT + SKIP_COUNT;
+  const uint EXPECTED_BITS = wsize - (TOTAL_OFFSET % wsize);
+  const bitstream_word EXPECTED_BUFFER = WORD1 >> (TOTAL_OFFSET % wsize);
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+
+  stream_rewind(s);
+  stream_read_bits(s, READ_BIT_COUNT);
+  bitstream_word *prevPtr = s->ptr;
+
+  stream_skip(s, SKIP_COUNT);
+
+  assert_ptr_equal(s->ptr, prevPtr);
+  assert_int_equal(s->bits, EXPECTED_BITS);
+  assert_int_equal(s->buffer, EXPECTED_BUFFER);
+}
+
+static void
+when_SkipZeroBits_expect_NOP(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_rewind(s);
+  stream_read_bits(s, 2);
+
+  bitstream_word* prevPtr = s->ptr;
+  bitstream_word prevBits = s->bits;
+  bitstream_word prevBuffer = s->buffer;
+
+  stream_skip(s, 0);
+
+  assert_ptr_equal(s->ptr, prevPtr);
+  assert_int_equal(s->bits, prevBits);
+  assert_int_equal(s->buffer, prevBuffer);
+}
+
+static void
+when_RseekToNonMultipleOfWsize_expect_MaskedWordLoadedToBuffer(void **state)
+{
+  const uint BIT_OFFSET = wsize + 5;
+  const uint EXPECTED_BITS = wsize - (BIT_OFFSET % wsize);
+  const bitstream_word EXPECTED_BUFFER = WORD2 >> (BIT_OFFSET % wsize);
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_rseek(s, BIT_OFFSET);
+
+  assert_ptr_equal(s->ptr, s->begin + 2);
+  assert_int_equal(s->bits, EXPECTED_BITS);
+  assert_int_equal(s->buffer, EXPECTED_BUFFER);
+}
+
+static void
+when_RseekToMultipleOfWsize_expect_PtrAlignedBufferEmpty(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_rseek(s, wsize);
+
+  assert_ptr_equal(s->ptr, s->begin + 1);
+  assert_int_equal(s->bits, 0);
+  assert_int_equal(s->buffer, 0);
+}
+
+static void
+when_WseekToNonMultipleOfWsize_expect_MaskedWordLoadedToBuffer(void **state)
+{
+  const uint BIT_OFFSET = wsize + 5;
+  const bitstream_word MASK = 0x1f;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_wseek(s, BIT_OFFSET);
+
+  assert_ptr_equal(s->ptr, s->begin + 1);
+  assert_int_equal(s->bits, BIT_OFFSET % wsize);
+  assert_int_equal(s->buffer, WORD2 & MASK);
+}
+
+static void
+when_WseekToMultipleOfWsize_expect_PtrAlignedBufferEmpty(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_wseek(s, wsize);
+
+  assert_ptr_equal(s->ptr, s->begin + 1);
+  assert_int_equal(s->bits, 0);
+  assert_int_equal(s->buffer, 0);
+}
+
+static void
+when_Rtell_expect_ReturnsReadBitCount(void **state)
+{
+  const uint READ_BIT_COUNT1 = wsize - 6;
+  const uint READ_BIT_COUNT2 = wsize;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, wsize);
+  stream_write_bits(s, WORD1, wsize);
+
+  stream_rewind(s);
+  stream_read_bits(s, READ_BIT_COUNT1);
+  stream_read_bits(s, READ_BIT_COUNT2);
+
+  assert_int_equal(stream_rtell(s), READ_BIT_COUNT1 + READ_BIT_COUNT2);
+}
+
+static void
+when_Wtell_expect_ReturnsWrittenBitCount(void **state)
+{
+  const uint WRITE_BIT_COUNT1 = wsize;
+  const uint WRITE_BIT_COUNT2 = 6;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, WRITE_BIT_COUNT1);
+  stream_write_bits(s, WORD1, WRITE_BIT_COUNT2);
+
+  assert_int_equal(stream_wtell(s), WRITE_BIT_COUNT1 + WRITE_BIT_COUNT2);
+}
+
+static void
+when_ReadBitsSpreadsAcrossTwoWords_expect_BitsCombinedFromBothWords(void **state)
+{
+  const uint READ_BIT_COUNT = wsize - 3;
+  const uint PARTIAL_WORD_BIT_COUNT = 16;
+  const uint NUM_OVERFLOWED_BITS = READ_BIT_COUNT - PARTIAL_WORD_BIT_COUNT;
+  const uint EXPECTED_BUFFER_BIT_COUNT = wsize - NUM_OVERFLOWED_BITS;
+
+  const bitstream_word PARTIAL_WORD1 = WORD1 & 0xffff;
+  const bitstream_word PARTIAL_WORD2 = WORD2 & 0x1fffffffffff << PARTIAL_WORD_BIT_COUNT;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, PARTIAL_WORD1, wsize);
+  stream_write_bits(s, WORD2, wsize);
+
+  stream_rewind(s);
+  s->buffer = stream_read_word(s);
+  s->bits = PARTIAL_WORD_BIT_COUNT;
+
+  uint64 readBits = stream_read_bits(s, READ_BIT_COUNT);
+
+  assert_int_equal(s->bits, EXPECTED_BUFFER_BIT_COUNT);
+  assert_int_equal(readBits, PARTIAL_WORD1 + PARTIAL_WORD2);
+  assert_int_equal(s->buffer, WORD2 >> NUM_OVERFLOWED_BITS);
+}
+
+static void
+given_BitstreamBufferEmptyWithNextWordAvailable_when_ReadBitsWsize_expect_EntireNextWordReturned(void **state)
+{
+  const uint READ_BIT_COUNT = wsize;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, 0, wsize);
+  stream_write_bits(s, WORD1, wsize);
+
+  stream_rewind(s);
+  s->buffer = stream_read_word(s);
+  s->bits = 0;
+
+  uint64 readBits = stream_read_bits(s, READ_BIT_COUNT);
+
+  assert_int_equal(s->bits, 0);
+  assert_int_equal(readBits, WORD1);
+  assert_int_equal(s->buffer, 0);
+}
+
+static void
+when_ReadBits_expect_BitsReadInOrderLSB(void **state)
+{
+  const uint BITS_TO_READ = 2;
+  const bitstream_word MASK = 0x3;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  s->buffer = WORD2;
+  s->bits = wsize;
+
+  uint64 readBits = stream_read_bits(s, BITS_TO_READ);
+
+  assert_int_equal(s->bits, wsize - BITS_TO_READ);
+  assert_int_equal(readBits, WORD2 & MASK);
+  assert_int_equal(s->buffer, WORD2 >> BITS_TO_READ);
+}
+
+static void
+when_ReadZeroBits_expect_NOP(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  s->buffer = WORD1;
+  s->bits = wsize;
+
+  uint64 readBits = stream_read_bits(s, 0);
+
+  assert_int_equal(s->bits, wsize);
+  assert_int_equal(readBits, 0);
+  assert_int_equal(s->buffer, WORD1);
+}
+
+// overflow refers to what will land in the buffer
+// more significant bits than overflow are returned by stream_write_bits()
+static void
+when_WriteBitsOverflowsBuffer_expect_OverflowWrittenToNewBuffer(void **state)
+{
+  const uint EXISTING_BIT_COUNT = 5;
+  const uint NUM_BITS_TO_WRITE = wsize - 1;
+  const uint OVERFLOW_BIT_COUNT = NUM_BITS_TO_WRITE - (wsize - EXISTING_BIT_COUNT);
+  // 0x1101 0101 0101 ... 0101 allows stream_write_bit() to return non-zero
+  const bitstream_word WORD_TO_WRITE = WORD2 + 0x8000000000000000;
+  const bitstream_word OVERFLOWED_BITS = WORD_TO_WRITE >> (wsize - EXISTING_BIT_COUNT);
+  const bitstream_word EXPECTED_BUFFER_RESULT = OVERFLOWED_BITS & 0xf;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, EXISTING_BIT_COUNT);
+
+  uint64 remainingBits = stream_write_bits(s, WORD_TO_WRITE, NUM_BITS_TO_WRITE);
+
+  assert_int_equal(s->bits, OVERFLOW_BIT_COUNT);
+  assert_int_equal(s->buffer, EXPECTED_BUFFER_RESULT);
+  assert_int_equal(remainingBits, WORD_TO_WRITE >> NUM_BITS_TO_WRITE);
+}
+
+static void
+when_WriteBitsFillsBufferExactly_expect_WordWrittenToStream(void **state)
+{
+  const uint EXISTING_BIT_COUNT = 5;
+  const uint NUM_BITS_TO_WRITE = wsize - EXISTING_BIT_COUNT;
+  const bitstream_word COMPLETING_WORD = WORD2 & 0x07ffffffffffffff;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WORD1, EXISTING_BIT_COUNT);
+  uint64 remainingBits = stream_write_bits(s, COMPLETING_WORD, NUM_BITS_TO_WRITE);
+
+  stream_rewind(s);
+  bitstream_word readWord = stream_read_word(s);
+
+  assert_int_equal(readWord, 0x1f + 0xaaaaaaaaaaaaaaa0);
+  assert_int_equal(remainingBits, 0);
+}
+
+static void
+when_WriteBits_expect_BitsWrittenToBufferFromLSB(void **state)
+{
+  const uint NUM_BITS_TO_WRITE = 3;
+  const uint MASK = 0x7;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  uint64 remainingBits = stream_write_bits(s, WORD1, NUM_BITS_TO_WRITE);
+
+  assert_int_equal(s->bits, NUM_BITS_TO_WRITE);
+  assert_int_equal(s->buffer, WORD1 & MASK);
+  assert_int_equal(remainingBits, WORD1 >> NUM_BITS_TO_WRITE);
+}
+
+static void
+when_WriteZeroBits_expect_NOP(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+
+  uint64 remainingBits = stream_write_bits(s, WORD1, 0);
+
+  assert_int_equal(s->bits, 0);
+  assert_int_equal(s->buffer, 0);
+  assert_int_equal(remainingBits, WORD1);
+}
+
+static void
+given_BitstreamWithEmptyBuffer_when_ReadBit_expect_LoadNextWordToBuffer(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_word(s, 0);
+  stream_write_word(s, WORD1);
+
+  stream_rewind(s);
+  s->buffer = stream_read_word(s);
+  s->bits = 0;
+
+  assert_int_equal(s->buffer, 0);
+  assert_int_equal(stream_read_bit(s), 1);
+  assert_int_equal(s->bits, wsize - 1);
+  assert_int_equal(s->buffer, WORD1 >> 1);
+}
+
+static void
+given_BitstreamWithBitInBuffer_when_ReadBit_expect_OneBitReadFromLSB(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bit(s, 1);
+
+  bitstream_count prevBits = s->bits;
+  bitstream_word prevBuffer = s->buffer;
+
+  assert_int_equal(stream_read_bit(s), 1);
+  assert_int_equal(s->bits, prevBits - 1);
+  assert_int_equal(s->buffer, prevBuffer >> 1);
+}
+
+static void
+given_BitstreamBufferOneBitFromFull_when_WriteBit_expect_BitWrittenToBufferWrittenToStreamAndBufferReset(void **state)
+{
+  const uint PLACE = wsize - 1;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  s->bits = PLACE;
+
+  stream_write_bit(s, 1);
+
+  assert_int_equal(stream_size(s), sizeof(bitstream_word));
+  assert_int_equal(*s->begin, (bitstream_word)1 << PLACE);
+  assert_int_equal(s->buffer, 0);
+}
+
+static void
+when_WriteBit_expect_BitWrittenToBufferFromLSB(void **state)
+{
+  const uint PLACE = 3;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  s->bits = PLACE;
+
+  stream_write_bit(s, 1);
+
+  assert_int_equal(s->bits, PLACE + 1);
+  assert_int_equal(s->buffer, (bitstream_word)1 << PLACE);
+}
+
+static void
+given_StartedBuffer_when_StreamPadOverflowsBuffer_expect_ProperWordsWritten(void **state)
+{
+  const uint NUM_WORDS = 2;
+  const uint EXISTING_BIT_COUNT = 12;
+  const bitstream_word EXISTING_BUFFER = 0xfff;
+  const uint PAD_AMOUNT = NUM_WORDS * wsize - EXISTING_BIT_COUNT;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_word(s, 0);
+  stream_write_word(s, WORD1);
+
+  stream_rewind(s);
+  s->buffer = EXISTING_BUFFER;
+  s->bits = EXISTING_BIT_COUNT;
+  size_t prevStreamSize = stream_size(s);
+
+  stream_pad(s, PAD_AMOUNT);
+
+  assert_int_equal(stream_size(s), prevStreamSize + NUM_WORDS * sizeof(bitstream_word));
+  stream_rewind(s);
+  assert_int_equal(stream_read_word(s), EXISTING_BUFFER);
+  assert_int_equal(stream_read_word(s), 0);
+}
+
+static void
+given_StartedBuffer_when_StreamPad_expect_PaddedWordWritten(void **state)
+{
+  const uint EXISTING_BIT_COUNT = 12;
+  const bitstream_word EXISTING_BUFFER = 0xfff;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  s->buffer = EXISTING_BUFFER;
+  s->bits = EXISTING_BIT_COUNT;
+  size_t prevStreamSize = stream_size(s);
+
+  stream_pad(s, wsize - EXISTING_BIT_COUNT);
+
+  assert_int_equal(stream_size(s), prevStreamSize + sizeof(bitstream_word));
+  stream_rewind(s);
+  assert_int_equal(stream_read_word(s), EXISTING_BUFFER);
+}
+
+static void
+when_ReadTwoWords_expect_ReturnConsecutiveWordsInOrder(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_word(s, WORD1);
+  stream_write_word(s, WORD2);
+  stream_rewind(s);
+
+  assert_int_equal(stream_read_word(s), WORD1);
+  assert_int_equal(stream_read_word(s), WORD2);
+}
+
+static void
+when_ReadWord_expect_WordReturned(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_word(s, WORD1);
+  stream_rewind(s);
+
+  assert_int_equal(stream_read_word(s), WORD1);
+}
+
+static void
+given_BitstreamWithOneWrittenWordRewound_when_WriteWord_expect_NewerWordOverwrites(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_word(s, WORD1);
+
+  stream_rewind(s);
+  stream_write_word(s, WORD2);
+
+  assert_int_equal(*s->begin, WORD2);
+}
+
+static void
+when_WriteTwoWords_expect_WordsWrittenToStreamConsecutively(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+
+  stream_write_word(s, WORD1);
+  stream_write_word(s, WORD2);
+
+  assert_int_equal(stream_size(s), sizeof(bitstream_word) * 2);
+  assert_int_equal(*s->begin, WORD1);
+  assert_int_equal(*(s->begin + 1), WORD2);
+}
+
+static void
+given_RewoundBitstream_when_WriteWord_expect_WordWrittenAtStreamBegin(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  size_t prevStreamSize = stream_size(s);
+
+  stream_write_word(s, WORD1);
+
+  assert_int_equal(stream_size(s), prevStreamSize + sizeof(bitstream_word));
+  assert_int_equal(*s->begin, WORD1);
+}
+
+static void
+when_BitstreamOpened_expect_ProperLengthAndBoundaries(void **state)
+{
+  const int NUM_WORDS = 4;
+
+  size_t bufferLenBytes = sizeof(bitstream_word) * NUM_WORDS;
+  void* buffer = malloc(bufferLenBytes);
+  bitstream* s = stream_open(buffer, bufferLenBytes);
+
+  void* streamBegin = stream_data(s);
+  void* computedStreamEnd = (bitstream_word*)streamBegin + NUM_WORDS;
+
+  assert_ptr_equal(streamBegin, buffer);
+  assert_ptr_equal(s->end, computedStreamEnd);
+  assert_int_equal(stream_capacity(s), bufferLenBytes);
+
+  stream_close(s);
+  free(buffer);
+}
+
+static void
+when_Alignment_expect_MatchingStreamWordBits(void **state)
+{
+  size_t alignment = stream_alignment();
+  assert_int_equal(alignment, stream_word_bits);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(when_Alignment_expect_MatchingStreamWordBits),
+    cmocka_unit_test(when_BitstreamOpened_expect_ProperLengthAndBoundaries),
+    cmocka_unit_test_setup_teardown(given_RewoundBitstream_when_WriteWord_expect_WordWrittenAtStreamBegin, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WriteTwoWords_expect_WordsWrittenToStreamConsecutively, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_BitstreamWithOneWrittenWordRewound_when_WriteWord_expect_NewerWordOverwrites, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_ReadWord_expect_WordReturned, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_ReadTwoWords_expect_ReturnConsecutiveWordsInOrder, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_StartedBuffer_when_StreamPad_expect_PaddedWordWritten, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_StartedBuffer_when_StreamPadOverflowsBuffer_expect_ProperWordsWritten, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WriteBit_expect_BitWrittenToBufferFromLSB, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_BitstreamBufferOneBitFromFull_when_WriteBit_expect_BitWrittenToBufferWrittenToStreamAndBufferReset, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_BitstreamWithBitInBuffer_when_ReadBit_expect_OneBitReadFromLSB, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_BitstreamWithEmptyBuffer_when_ReadBit_expect_LoadNextWordToBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WriteZeroBits_expect_NOP, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WriteBits_expect_BitsWrittenToBufferFromLSB, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WriteBitsFillsBufferExactly_expect_WordWrittenToStream, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WriteBitsOverflowsBuffer_expect_OverflowWrittenToNewBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_ReadZeroBits_expect_NOP, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_ReadBits_expect_BitsReadInOrderLSB, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_BitstreamBufferEmptyWithNextWordAvailable_when_ReadBitsWsize_expect_EntireNextWordReturned, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_ReadBitsSpreadsAcrossTwoWords_expect_BitsCombinedFromBothWords, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_Wtell_expect_ReturnsWrittenBitCount, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_Rtell_expect_ReturnsReadBitCount, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WseekToMultipleOfWsize_expect_PtrAlignedBufferEmpty, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_WseekToNonMultipleOfWsize_expect_MaskedWordLoadedToBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_RseekToMultipleOfWsize_expect_PtrAlignedBufferEmpty, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_RseekToNonMultipleOfWsize_expect_MaskedWordLoadedToBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_SkipZeroBits_expect_NOP, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_SkipWithinBuffer_expect_MaskedBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_SkipPastBufferEnd_expect_NewMaskedWordInBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_Align_expect_BufferEmptyBitsZero, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_EmptyBuffer_when_Flush_expect_NOP, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_Flush_expect_PaddedWordWrittenToStream, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_StreamCopy_expect_BitsCopiedToDestBitstream, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_StreamCopy_expect_BitsCopiedToDestBitstream, setup, teardown),
+  };
+
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/inline/testBitstreamSmallWsize.c b/tests/src/inline/testBitstreamSmallWsize.c
new file mode 100644
index 00000000..23350e55
--- /dev/null
+++ b/tests/src/inline/testBitstreamSmallWsize.c
@@ -0,0 +1,119 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#define BIT_STREAM_WORD_TYPE uint16
+
+#include "include/zfp/bitstream.h"
+#include "include/zfp/bitstream.inl"
+
+#define STREAM_WORD_CAPACITY 4
+
+struct setupVars {
+  void* buffer;
+  bitstream* b;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *s = malloc(sizeof(struct setupVars));
+  assert_non_null(s);
+
+  s->buffer = calloc(STREAM_WORD_CAPACITY, sizeof(bitstream_word));
+  assert_non_null(s->buffer);
+
+  s->b = stream_open(s->buffer, STREAM_WORD_CAPACITY * sizeof(bitstream_word));
+  assert_non_null(s->b);
+
+  *state = s;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *s = *state;
+  free(s->buffer);
+  free(s->b);
+  free(s);
+
+  return 0;
+}
+
+static void
+when_ReadBitsSpreadsAcrossMultipleWords_expect_BitsCombinedFromMultipleWords(void **state)
+{
+  const uint READ_BIT_COUNT = 48;
+  const uint PARTIAL_WORD_BIT_COUNT = 8;
+  const uint NUM_OVERFLOWED_BITS = READ_BIT_COUNT - PARTIAL_WORD_BIT_COUNT;
+
+  const uint64 WRITE_BITS1 = 0x11;
+  const uint64 WRITE_BITS2 = 0x5555;
+  const uint64 WRITE_BITS3 = 0x9249;
+  const uint64 WRITE_BITS4 = 0x1111 + 0x8000;
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, WRITE_BITS1, wsize);
+  stream_write_bits(s, WRITE_BITS2, wsize);
+  stream_write_bits(s, WRITE_BITS3, wsize);
+  stream_write_bits(s, WRITE_BITS4, wsize);
+
+  stream_rewind(s);
+  s->buffer = stream_read_word(s);
+  s->bits = PARTIAL_WORD_BIT_COUNT;
+
+  uint64 readBits = stream_read_bits(s, READ_BIT_COUNT);
+
+  assert_int_equal(s->bits, wsize - (NUM_OVERFLOWED_BITS % wsize));
+  assert_int_equal(readBits, WRITE_BITS1
+    + (WRITE_BITS2 << PARTIAL_WORD_BIT_COUNT)
+    + (WRITE_BITS3 << (wsize + PARTIAL_WORD_BIT_COUNT))
+    + ((WRITE_BITS4 & 0xff) << (2*wsize + PARTIAL_WORD_BIT_COUNT)));
+  assert_int_equal(s->buffer, (bitstream_word) (WRITE_BITS4 >> (NUM_OVERFLOWED_BITS % wsize)));
+}
+
+// overflow refers to what will land in the buffer
+// more significant bits than overflow are returned by stream_write_bits()
+static void
+when_WriteBitsOverflowsBufferByMultipleWords_expect_WordsWrittenAndRemainingOverflowInBuffer(void **state)
+{
+  const uint EXISTING_BIT_COUNT = 4;
+  const uint NUM_BITS_TO_WRITE = 40;
+  const uint OVERFLOW_BIT_COUNT = (NUM_BITS_TO_WRITE - (wsize - EXISTING_BIT_COUNT)) % wsize;
+
+  const uint64 EXISTING_BUFFER = 0xf;
+  const uint64 WRITE_WORD1 = 0x5555;
+  const uint64 WRITE_WORD2 = 0x9249;
+  const uint64 WRITE_WORD3 = 0x1111 + 0x8000;
+
+  const uint64 BITS_TO_WRITE = WRITE_WORD1
+    + (WRITE_WORD2 << wsize)
+    + (WRITE_WORD3 << (2*wsize));
+
+  bitstream* s = ((struct setupVars *)*state)->b;
+  stream_write_bits(s, EXISTING_BUFFER, EXISTING_BIT_COUNT);
+
+  uint64 remainingWord = stream_write_bits(s, BITS_TO_WRITE, NUM_BITS_TO_WRITE);
+
+  assert_int_equal(remainingWord, WRITE_WORD3 >> (3*wsize - NUM_BITS_TO_WRITE));
+  assert_int_equal(*s->begin, EXISTING_BUFFER
+    + ((WRITE_WORD1 << EXISTING_BIT_COUNT) & 0xffff));
+  assert_int_equal(*(s->begin + 1), (WRITE_WORD1 >> (wsize - EXISTING_BIT_COUNT))
+    + ((WRITE_WORD2 << EXISTING_BIT_COUNT) & 0xffff));
+  assert_int_equal(s->bits, OVERFLOW_BIT_COUNT);
+  assert_int_equal(s->buffer, (WRITE_WORD2 >> (wsize - EXISTING_BIT_COUNT))
+    + ((WRITE_WORD3 << EXISTING_BIT_COUNT) & 0x0fff));
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test_setup_teardown(when_WriteBitsOverflowsBufferByMultipleWords_expect_WordsWrittenAndRemainingOverflowInBuffer, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_ReadBitsSpreadsAcrossMultipleWords_expect_BitsCombinedFromMultipleWords, setup, teardown),
+  };
+
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/inline/testBitstreamStrided.c b/tests/src/inline/testBitstreamStrided.c
new file mode 100644
index 00000000..d628d9eb
--- /dev/null
+++ b/tests/src/inline/testBitstreamStrided.c
@@ -0,0 +1,95 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#define BIT_STREAM_STRIDED
+
+#include "include/zfp/bitstream.h"
+#include "include/zfp/bitstream.inl"
+
+// 4 words per block
+#define BLOCK_SIZE 4
+// 16 blocks between consecutive stream-touched blocks
+#define DELTA 16
+#define STREAM_BUFFER_LEN 3
+#define STREAM_STRIDED_LEN (STREAM_BUFFER_LEN * BLOCK_SIZE * DELTA)
+
+struct setupVars {
+  void* buffer;
+  bitstream* b;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *s = malloc(sizeof(struct setupVars));
+  assert_non_null(s);
+
+  s->buffer = calloc(STREAM_STRIDED_LEN, sizeof(bitstream_word));
+  assert_non_null(s->buffer);
+
+  s->b = stream_open(s->buffer, STREAM_STRIDED_LEN * sizeof(bitstream_word));
+  assert_non_null(s->b);
+
+  assert_true(stream_set_stride(s->b, BLOCK_SIZE, DELTA));
+
+  *state = s;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *s = *state;
+  free(s->buffer);
+  free(s->b);
+  free(s);
+
+  return 0;
+}
+
+static void
+given_Strided_when_ReadWordCompletesBlock_expect_PtrAdvancedByStrideLen(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  bitstream_word* prevPtr = s->ptr;
+
+  int i;
+  for (i = 0; i < BLOCK_SIZE - 1; i++) {
+    stream_read_word(s);
+    assert_ptr_equal(s->ptr, prevPtr + 1);
+    prevPtr = s->ptr;
+  }
+
+  stream_read_word(s);
+  assert_ptr_equal(s->ptr, (prevPtr + 1) + DELTA * BLOCK_SIZE);
+}
+
+static void
+given_Strided_when_WriteWordCompletesBlock_expect_PtrAdvancedByStrideLen(void **state)
+{
+  bitstream* s = ((struct setupVars *)*state)->b;
+  bitstream_word* prevPtr = s->ptr;
+
+  int i;
+  for (i = 0; i < BLOCK_SIZE - 1; i++) {
+    stream_write_word(s, 0);
+    assert_ptr_equal(s->ptr, prevPtr + 1);
+    prevPtr = s->ptr;
+  }
+
+  stream_write_word(s, 0);
+  assert_ptr_equal(s->ptr, (prevPtr + 1) + DELTA * BLOCK_SIZE);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test_setup_teardown(given_Strided_when_WriteWordCompletesBlock_expect_PtrAdvancedByStrideLen, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_Strided_when_ReadWordCompletesBlock_expect_PtrAdvancedByStrideLen, setup, teardown),
+  };
+
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/misc/CMakeLists.txt b/tests/src/misc/CMakeLists.txt
new file mode 100644
index 00000000..ce76cf0d
--- /dev/null
+++ b/tests/src/misc/CMakeLists.txt
@@ -0,0 +1,48 @@
+add_executable(testZfpHeader testZfpHeader.c)
+target_link_libraries(testZfpHeader cmocka zfp)
+add_test(NAME testZfpHeader COMMAND testZfpHeader)
+
+add_executable(testZfpStream testZfpStream.c)
+target_link_libraries(testZfpStream cmocka zfp)
+add_test(NAME testZfpStream COMMAND testZfpStream)
+
+add_executable(testZfpPromote testZfpPromote.c)
+target_link_libraries(testZfpPromote cmocka zfp)
+add_test(NAME testZfpPromote COMMAND testZfpPromote)
+
+add_executable(testZfpField1f testZfpField1f.c)
+target_link_libraries(testZfpField1f cmocka zfp)
+add_test(NAME testZfpField1f COMMAND testZfpField1f)
+
+add_executable(testZfpField2f testZfpField2f.c)
+target_link_libraries(testZfpField2f cmocka zfp)
+add_test(NAME testZfpField2f COMMAND testZfpField2f)
+
+add_executable(testZfpField3f testZfpField3f.c)
+target_link_libraries(testZfpField3f cmocka zfp)
+add_test(NAME testZfpField3f COMMAND testZfpField3f)
+
+add_executable(testZfpField4f testZfpField4f.c)
+target_link_libraries(testZfpField4f cmocka zfp)
+add_test(NAME testZfpField4f COMMAND testZfpField4f)
+
+add_executable(testZfpField1d testZfpField1d.c)
+target_link_libraries(testZfpField1d cmocka zfp)
+add_test(NAME testZfpField1d COMMAND testZfpField1d)
+
+add_executable(testZfpField2d testZfpField2d.c)
+target_link_libraries(testZfpField2d cmocka zfp)
+add_test(NAME testZfpField2d COMMAND testZfpField2d)
+
+add_executable(testZfpField3d testZfpField3d.c)
+target_link_libraries(testZfpField3d cmocka zfp)
+add_test(NAME testZfpField3d COMMAND testZfpField3d)
+
+add_executable(testZfpField4d testZfpField4d.c)
+target_link_libraries(testZfpField4d cmocka zfp)
+add_test(NAME testZfpField4d COMMAND testZfpField4d)
+
+if(HAVE_LIBM_MATH)
+  target_link_libraries(testZfpHeader m)
+  target_link_libraries(testZfpStream m)
+endif()
diff --git a/tests/src/misc/testZfpField1d.c b/tests/src/misc/testZfpField1d.c
new file mode 100644
index 00000000..745e550f
--- /dev/null
+++ b/tests/src/misc/testZfpField1d.c
@@ -0,0 +1,24 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 1
+#define ZFP_TYPE zfp_type_double
+#define SCALAR double
+
+#define NX 20
+#define SX 2
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef SX
diff --git a/tests/src/misc/testZfpField1f.c b/tests/src/misc/testZfpField1f.c
new file mode 100644
index 00000000..4a416cdf
--- /dev/null
+++ b/tests/src/misc/testZfpField1f.c
@@ -0,0 +1,24 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 1
+#define ZFP_TYPE zfp_type_float
+#define SCALAR float
+
+#define NX 20
+#define SX 2
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef SX
diff --git a/tests/src/misc/testZfpField2d.c b/tests/src/misc/testZfpField2d.c
new file mode 100644
index 00000000..7c48af60
--- /dev/null
+++ b/tests/src/misc/testZfpField2d.c
@@ -0,0 +1,28 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 2
+#define ZFP_TYPE zfp_type_double
+#define SCALAR double
+
+#define NX 20
+#define NY 21
+#define SX 2
+#define SY 3
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef NY
+#undef SX
+#undef SY
diff --git a/tests/src/misc/testZfpField2f.c b/tests/src/misc/testZfpField2f.c
new file mode 100644
index 00000000..ebba9fea
--- /dev/null
+++ b/tests/src/misc/testZfpField2f.c
@@ -0,0 +1,28 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 2
+#define ZFP_TYPE zfp_type_float
+#define SCALAR float
+
+#define NX 20
+#define NY 21
+#define SX 2
+#define SY 3
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef NY
+#undef SX
+#undef SY
diff --git a/tests/src/misc/testZfpField3d.c b/tests/src/misc/testZfpField3d.c
new file mode 100644
index 00000000..c084276b
--- /dev/null
+++ b/tests/src/misc/testZfpField3d.c
@@ -0,0 +1,32 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 3
+#define ZFP_TYPE zfp_type_double
+#define SCALAR double
+
+#define NX 20
+#define NY 21
+#define NZ 12
+#define SX 2
+#define SY 3
+#define SZ 4
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef NY
+#undef NZ
+#undef SX
+#undef SY
+#undef SZ
diff --git a/tests/src/misc/testZfpField3f.c b/tests/src/misc/testZfpField3f.c
new file mode 100644
index 00000000..b4fca826
--- /dev/null
+++ b/tests/src/misc/testZfpField3f.c
@@ -0,0 +1,32 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 3
+#define ZFP_TYPE zfp_type_float
+#define SCALAR float
+
+#define NX 20
+#define NY 21
+#define NZ 12
+#define SX 2
+#define SY 3
+#define SZ 4
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef NY
+#undef NZ
+#undef SX
+#undef SY
+#undef SZ
diff --git a/tests/src/misc/testZfpField4d.c b/tests/src/misc/testZfpField4d.c
new file mode 100644
index 00000000..aeffbccb
--- /dev/null
+++ b/tests/src/misc/testZfpField4d.c
@@ -0,0 +1,36 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 4
+#define ZFP_TYPE zfp_type_double
+#define SCALAR double
+
+#define NX 20
+#define NY 21
+#define NZ 12
+#define NW 6
+#define SX 2
+#define SY 3
+#define SZ 4
+#define SW 2
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef NY
+#undef NZ
+#undef NW
+#undef SX
+#undef SY
+#undef SZ
+#undef SW
diff --git a/tests/src/misc/testZfpField4f.c b/tests/src/misc/testZfpField4f.c
new file mode 100644
index 00000000..9fdd6039
--- /dev/null
+++ b/tests/src/misc/testZfpField4f.c
@@ -0,0 +1,36 @@
+#include "zfp.h"
+
+#include <limits.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define DIMS 4
+#define ZFP_TYPE zfp_type_float
+#define SCALAR float
+
+#define NX 20
+#define NY 21
+#define NZ 12
+#define NW 6
+#define SX 2
+#define SY 3
+#define SZ 4
+#define SW 2
+
+#include "zfpFieldBase.c"
+
+#undef DIMS
+#undef ZFP_TYPE
+#undef SCALAR
+#undef NX
+#undef NY
+#undef NZ
+#undef NW
+#undef SX
+#undef SY
+#undef SZ
+#undef SW
diff --git a/tests/src/misc/testZfpHeader.c b/tests/src/misc/testZfpHeader.c
new file mode 100644
index 00000000..3986a739
--- /dev/null
+++ b/tests/src/misc/testZfpHeader.c
@@ -0,0 +1,497 @@
+#include "src/encode1d.c"
+#include "constants/1dDouble.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <stdlib.h>
+
+#define FIELD_X_LEN 33
+#define FIELD_Y_LEN 401
+
+// custom compression parameters
+#define MIN_BITS  11u
+#define MAX_BITS 1001u
+#define MAX_PREC 52u
+#define MIN_EXP (-1000)
+
+#define PREC 44
+#define ACC 1e-4
+
+struct setupVars {
+  void* buffer;
+  zfp_stream* stream;
+  zfp_field* field;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  zfp_type type = ZFP_TYPE;
+  zfp_field* field = zfp_field_2d(NULL, type, FIELD_X_LEN, FIELD_Y_LEN);
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  zfp_stream_set_rate(stream, ZFP_RATE_PARAM_BITS, type, DIMS, zfp_false);
+
+  size_t bufsizeBytes = zfp_stream_maximum_size(stream, field);
+  bundle->buffer = calloc(bufsizeBytes, sizeof(char));
+  assert_non_null(bundle->buffer);
+
+  bitstream* s = stream_open(bundle->buffer, bufsizeBytes);
+  assert_non_null(s);
+
+  zfp_stream_set_bit_stream(stream, s);
+  zfp_stream_rewind(stream);
+
+  bundle->stream = stream;
+  bundle->field = field;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+  stream_close(bundle->stream->stream);
+  zfp_stream_close(bundle->stream);
+  zfp_field_free(bundle->field);
+  free(bundle->buffer);
+  free(bundle);
+
+  return 0;
+}
+
+static void
+when_zfpFieldMetadataCalled_expect_LSB2BitsEncodeScalarType(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  uint64 metadata = zfp_field_metadata(field);
+  uint zfpType = (metadata & 0x3u) + 1;
+
+  assert_int_equal(zfpType, ZFP_TYPE);
+}
+
+static void
+when_zfpFieldMetadataCalled_expect_LSBBits3To4EncodeDimensionality(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  uint64 metadata = zfp_field_metadata(field);
+  uint dimensionality = ((metadata >> 2) & 0x3u) + 1;
+
+  // setup uses a 2d field
+  assert_int_equal(dimensionality, 2);
+}
+
+static void
+when_zfpFieldMetadataCalled_expect_LSBBits5To53EncodeArrayDimensions(void **state)
+{
+  uint MASK_24_BITS = 0xffffffu;
+  uint64 MASK_48_BITS = 0xffffffffffffu;
+
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  uint64 metadata = zfp_field_metadata(field);
+
+  // setup uses a 2d field
+  uint64 metadataEncodedDims = (metadata >> 4) & MASK_48_BITS;
+  uint nx = (uint)((metadataEncodedDims & MASK_24_BITS) + 1);
+  metadataEncodedDims >>= 24;
+  uint ny = (uint)((metadataEncodedDims & MASK_24_BITS) + 1);
+
+  assert_int_equal(nx, FIELD_X_LEN);
+  assert_int_equal(ny, FIELD_Y_LEN);
+}
+
+static void
+when_zfpFieldSetMetadataCalled_expect_scalarTypeSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+  uint64 metadata = zfp_field_metadata(field);
+
+  // reset field parameter
+  field->type = zfp_type_none;
+
+  zfp_field_set_metadata(field, metadata);
+
+  assert_int_equal(field->type, ZFP_TYPE);
+}
+
+static void
+when_zfpFieldSetMetadataCalled_expect_arrayDimensionsSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+  uint64 metadata = zfp_field_metadata(field);
+
+  // reset dimension values
+  zfp_field_set_size_3d(field, 0, 0, 0);
+
+  zfp_field_set_metadata(field, metadata);
+
+  // setup uses a 2d field
+  assert_int_equal(field->nx, FIELD_X_LEN);
+  assert_int_equal(field->ny, FIELD_Y_LEN);
+  assert_int_equal(field->nz, 0);
+}
+
+static void
+when_zfpFieldMetadataCalled_onInvalidSize_expect_ZFP_META_NULL(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+  uint64 metadata = zfp_field_metadata(field);
+
+  // setup uses a 2d field
+  field->nx = 1 << 25;
+  field->ny = 1 << 25;
+
+  uint64 meta = zfp_field_metadata(field); 
+
+  assert_int_equal(meta, ZFP_META_NULL);
+}
+
+static void
+when_zfpFieldSetMetadataCalled_forInvalidMeta_expect_false(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  uint64 meta = 1ULL << (ZFP_META_BITS + 1);
+  zfp_bool status = zfp_field_set_metadata(field, meta); 
+
+  assert_int_equal(status, zfp_false);
+}
+
+static void
+when_zfpWriteHeaderMagic_expect_numBitsWrittenEqualToZFP_MAGIC_BITS(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_MAGIC), ZFP_MAGIC_BITS);
+
+  // check bitstream buffer
+  bitstream* s = zfp_stream_bit_stream(stream);
+  assert_int_equal(s->bits, ZFP_MAGIC_BITS);
+}
+
+static void
+when_zfpWriteHeaderMagic_expect_24BitsAreCharsZfpFollowedBy8BitsZfpCodecVersion(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_MAGIC), ZFP_MAGIC_BITS);
+  zfp_stream_flush(stream);
+
+  zfp_stream_rewind(stream);
+  bitstream* s = zfp_stream_bit_stream(stream);
+  uint64 char1 = stream_read_bits(s, 8);
+  uint64 char2 = stream_read_bits(s, 8);
+  uint64 char3 = stream_read_bits(s, 8);
+  uint64 zfp_codec_version = stream_read_bits(s, 8);
+
+  assert_int_equal(char1, 'z');
+  assert_int_equal(char2, 'f');
+  assert_int_equal(char3, 'p');
+  assert_int_equal(zfp_codec_version, ZFP_CODEC);
+}
+
+static void
+when_zfpWriteHeaderMetadata_expect_numBitsWrittenEqualToZFP_META_BITS(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_META), ZFP_META_BITS);
+}
+
+static void
+given_fixedRate_when_zfpWriteHeaderMode_expect_12BitsWrittenToBitstream(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  // setup uses fixed rate
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_MODE), ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_fixedPrecision_when_zfpWriteHeaderMode_expect_12BitsWrittenToBitstream(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_stream_set_precision(stream, PREC);
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_MODE), ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_fixedAccuracy_when_zfpWriteHeaderMode_expect_12BitsWrittenToBitstream(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_stream_set_accuracy(stream, ACC);
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_MODE), ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_customCompressParamsSet_when_zfpWriteHeaderMode_expect_64BitsWrittenToBitstream(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  assert_int_equal(zfp_stream_set_params(stream, MIN_BITS, MAX_BITS, MAX_PREC, MIN_EXP), 1);
+
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_MODE), ZFP_MODE_LONG_BITS);
+}
+
+static void
+setupAndAssertProperNumBitsRead(void **state, uint mask, size_t expectedWrittenBits, size_t expectedReadBits)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  assert_int_equal(zfp_write_header(stream, bundle->field, mask), expectedWrittenBits);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  assert_int_equal(zfp_read_header(stream, bundle->field, mask), expectedReadBits);
+
+  // check bitstream buffer
+  bitstream* s = zfp_stream_bit_stream(stream);
+  // use expectedWrittenBits because when zfp_read_header() returns 0, the bitstream is still displaced
+  assert_int_equal(s->bits, wsize - expectedWrittenBits);
+}
+
+static void
+when_zfpReadHeaderMagic_expect_properNumBitsRead(void **state)
+{
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_MAGIC, ZFP_MAGIC_BITS, ZFP_MAGIC_BITS);
+}
+
+static void
+given_improperHeader_when_zfpReadHeaderMagic_expect_returnsZero(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  // bitstream is zeros
+
+  assert_int_equal(zfp_read_header(stream, bundle->field, ZFP_HEADER_MAGIC), 0);
+  assert_int_equal(zfp_stream_bit_stream(stream)->bits, 64 - 8);
+}
+
+static void
+when_zfpReadHeaderMetadata_expect_properNumBitsRead(void **state)
+{
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_META, ZFP_META_BITS, ZFP_META_BITS);
+}
+
+static void
+given_properHeader_when_zfpReadHeaderMetadata_expect_fieldArrayDimsSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_field* field = bundle->field;
+  size_t nx = field->nx;
+  size_t ny = field->ny;
+  size_t nz = field->nz;
+
+  // write header to bitstream
+  assert_int_equal(zfp_write_header(stream, bundle->field, ZFP_HEADER_META), ZFP_META_BITS);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  // reset field->nx, ny, nz
+  zfp_field_set_size_3d(field, 0, 0, 0);
+
+  assert_int_equal(zfp_read_header(stream, bundle->field, ZFP_HEADER_META), ZFP_META_BITS);
+  assert_int_equal(field->nx, nx);
+  assert_int_equal(field->ny, ny);
+  assert_int_equal(field->nz, nz);
+}
+
+static void
+given_properHeaderFixedRate_when_zfpReadHeaderMode_expect_properNumBitsRead(void **state)
+{
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_MODE, ZFP_MODE_SHORT_BITS, ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_properHeaderFixedPrecision_when_zfpReadHeaderMode_expect_properNumBitsRead(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream_set_precision(bundle->stream, PREC);
+
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_MODE, ZFP_MODE_SHORT_BITS, ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_properHeaderFixedAccuracy_when_zfpReadHeaderMode_expect_properNumBitsRead(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream_set_accuracy(bundle->stream, ACC);
+
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_MODE, ZFP_MODE_SHORT_BITS, ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_customCompressParamsSet_when_zfpReadHeaderMode_expect_properNumBitsRead(void **state)
+{
+  struct setupVars *bundle = *state;
+  assert_int_equal(zfp_stream_set_params(bundle->stream, MIN_BITS, MAX_BITS, MAX_PREC, MIN_EXP), 1);
+
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_MODE, ZFP_MODE_LONG_BITS, ZFP_MODE_LONG_BITS);
+}
+
+static void
+setInvalidCompressParams(zfp_stream* stream)
+{
+  assert_int_equal(zfp_stream_set_params(stream, MAX_BITS + 1, MAX_BITS, MAX_PREC, MIN_EXP), 0);
+  stream->minbits = MAX_BITS + 1;
+  stream->maxbits = MAX_BITS;
+  stream->maxprec = MAX_PREC;
+  stream->minexp = MIN_EXP;
+}
+
+static void
+given_invalidCompressParamsInHeader_when_zfpReadHeaderMode_expect_properNumBitsRead(void **state)
+{
+  struct setupVars *bundle = *state;
+  setInvalidCompressParams(bundle->stream);
+
+  setupAndAssertProperNumBitsRead(state, ZFP_HEADER_MODE, ZFP_MODE_LONG_BITS, 0);
+}
+
+static void
+assertCompressParamsBehaviorWhenReadHeader(void **state, int expectedWrittenBits, int expectedReadBits)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_field* field = bundle->field;
+
+  uint minBits, maxBits, maxPrec;
+  int minExp;
+  zfp_stream_params(stream, &minBits, &maxBits, &maxPrec, &minExp);
+
+  assert_int_equal(zfp_write_header(stream, field, ZFP_HEADER_MODE), expectedWrittenBits);
+  zfp_stream_flush(stream);
+  zfp_stream_rewind(stream);
+
+  assert_int_equal(zfp_stream_set_params(stream, ZFP_MIN_BITS, ZFP_MAX_BITS, ZFP_MAX_PREC, ZFP_MIN_EXP), 1);
+
+  assert_int_equal(zfp_read_header(stream, field, ZFP_HEADER_MODE), expectedReadBits);
+
+  if (!expectedReadBits) {
+    // expect params were not set
+    assert_int_not_equal(stream->minbits, minBits);
+    assert_int_not_equal(stream->maxbits, maxBits);
+    assert_int_not_equal(stream->maxprec, maxPrec);
+    assert_int_not_equal(stream->minexp, minExp);
+  } else {
+    assert_int_equal(stream->minbits, minBits);
+    assert_int_equal(stream->maxbits, maxBits);
+    assert_int_equal(stream->maxprec, maxPrec);
+    assert_int_equal(stream->minexp, minExp);
+  }
+}
+
+static void
+given_properHeaderFixedRate_when_zfpReadHeaderMode_expect_streamParamsSet(void **state)
+{
+  assertCompressParamsBehaviorWhenReadHeader(state, ZFP_MODE_SHORT_BITS, ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_properHeaderFixedPrecision_when_zfpReadHeaderMode_expect_streamParamsSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream_set_precision(bundle->stream, PREC);
+
+  assertCompressParamsBehaviorWhenReadHeader(state, ZFP_MODE_SHORT_BITS, ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_properHeaderFixedAccuracy_when_zfpReadHeaderMode_expect_streamParamsSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream_set_accuracy(bundle->stream, ACC);
+
+  assertCompressParamsBehaviorWhenReadHeader(state, ZFP_MODE_SHORT_BITS, ZFP_MODE_SHORT_BITS);
+}
+
+static void
+given_customCompressParamsAndProperHeader_when_zfpReadHeaderMode_expect_streamParamsSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  assert_int_equal(zfp_stream_set_params(bundle->stream, MIN_BITS, MAX_BITS, MAX_PREC, MIN_EXP), 1);
+
+  assertCompressParamsBehaviorWhenReadHeader(state, ZFP_MODE_LONG_BITS, ZFP_MODE_LONG_BITS);
+}
+
+static void
+given_invalidCompressParamsInHeader_when_zfpReadHeaderMode_expect_streamParamsNotSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  setInvalidCompressParams(bundle->stream);
+
+  assertCompressParamsBehaviorWhenReadHeader(state, ZFP_MODE_LONG_BITS, 0);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    // (non zfp_stream) functions involved in zfp header
+    cmocka_unit_test_setup_teardown(when_zfpFieldMetadataCalled_expect_LSB2BitsEncodeScalarType, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_zfpFieldMetadataCalled_expect_LSBBits3To4EncodeDimensionality, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_zfpFieldMetadataCalled_expect_LSBBits5To53EncodeArrayDimensions, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(when_zfpFieldSetMetadataCalled_expect_scalarTypeSet, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_zfpFieldSetMetadataCalled_expect_arrayDimensionsSet, setup, teardown),
+
+    // write header
+    cmocka_unit_test_setup_teardown(when_zfpWriteHeaderMagic_expect_numBitsWrittenEqualToZFP_MAGIC_BITS, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_zfpFieldMetadataCalled_onInvalidSize_expect_ZFP_META_NULL, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_zfpFieldSetMetadataCalled_forInvalidMeta_expect_false, setup, teardown),
+    cmocka_unit_test_setup_teardown(when_zfpWriteHeaderMagic_expect_24BitsAreCharsZfpFollowedBy8BitsZfpCodecVersion, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(when_zfpWriteHeaderMetadata_expect_numBitsWrittenEqualToZFP_META_BITS, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(given_fixedRate_when_zfpWriteHeaderMode_expect_12BitsWrittenToBitstream, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_fixedPrecision_when_zfpWriteHeaderMode_expect_12BitsWrittenToBitstream, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_fixedAccuracy_when_zfpWriteHeaderMode_expect_12BitsWrittenToBitstream, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_customCompressParamsSet_when_zfpWriteHeaderMode_expect_64BitsWrittenToBitstream, setup, teardown),
+
+    // read header
+    cmocka_unit_test_setup_teardown(when_zfpReadHeaderMagic_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_improperHeader_when_zfpReadHeaderMagic_expect_returnsZero, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(when_zfpReadHeaderMetadata_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_properHeader_when_zfpReadHeaderMetadata_expect_fieldArrayDimsSet, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(given_properHeaderFixedRate_when_zfpReadHeaderMode_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_properHeaderFixedPrecision_when_zfpReadHeaderMode_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_properHeaderFixedAccuracy_when_zfpReadHeaderMode_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_properHeaderFixedRate_when_zfpReadHeaderMode_expect_streamParamsSet, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_properHeaderFixedPrecision_when_zfpReadHeaderMode_expect_streamParamsSet, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_properHeaderFixedAccuracy_when_zfpReadHeaderMode_expect_streamParamsSet, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_customCompressParamsSet_when_zfpReadHeaderMode_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_customCompressParamsAndProperHeader_when_zfpReadHeaderMode_expect_streamParamsSet, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_invalidCompressParamsInHeader_when_zfpReadHeaderMode_expect_properNumBitsRead, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_invalidCompressParamsInHeader_when_zfpReadHeaderMode_expect_streamParamsNotSet, setup, teardown),
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/misc/testZfpPromote.c b/tests/src/misc/testZfpPromote.c
new file mode 100644
index 00000000..6416abee
--- /dev/null
+++ b/tests/src/misc/testZfpPromote.c
@@ -0,0 +1,120 @@
+#include "zfp.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+static void
+given_int8_when_promoteToInt32_expect_demoteToInt8Matches(void **state)
+{
+  uint dims = 3;
+  uint sz = 1u << (2 * dims);
+  int8* iblock8 = (int8*)malloc(sizeof(int8)*sz);
+  int8* oblock8 = (int8*)calloc(sz, sizeof(int8));
+  int32* block32 = (int32*)malloc(sizeof(int32)*sz);
+
+  assert_non_null(iblock8);
+  assert_non_null(oblock8);
+  assert_non_null(block32);
+
+  uint i;
+  for (i = 0; i < sz; i++)
+    iblock8[i] = (int8)i;
+
+  zfp_promote_int8_to_int32(block32, iblock8, dims);
+  zfp_demote_int32_to_int8(oblock8, block32, dims);
+
+  for (i = 0; i < sz; i++)
+    assert_int_equal(iblock8[i], oblock8[i]);
+}
+
+static void
+given_uint8_when_promoteToInt32_expect_demoteToUInt8Matches(void **state)
+{
+  uint dims = 3;
+  uint sz = 1u << (2 * dims);
+  uint8* iblock8 = (uint8*)malloc(sizeof(uint8)*sz);
+  uint8* oblock8 = (uint8*)calloc(sz, sizeof(uint8));
+  int32* block32 = (int32*)malloc(sizeof(int32)*sz);
+
+  assert_non_null(iblock8);
+  assert_non_null(oblock8);
+  assert_non_null(block32);
+
+  uint i;
+  for (i = 0; i < sz; i++)
+    iblock8[i] = (uint8)i;
+
+  zfp_promote_uint8_to_int32(block32, iblock8, dims);
+  zfp_demote_int32_to_uint8(oblock8, block32, dims);
+
+  for (i = 0; i < sz; i++)
+    assert_int_equal(iblock8[i], oblock8[i]);
+}
+
+static void
+given_int16_when_promoteToInt32_expect_demoteToInt16Matches(void **state)
+{
+  uint dims = 3;
+  uint sz = 1u << (2 * dims);
+  int16* iblock16 = (int16*)malloc(sizeof(int16)*sz);
+  int16* oblock16 = (int16*)calloc(sz, sizeof(int16));
+  int32* block32 = (int32*)malloc(sizeof(int32)*sz);
+
+  assert_non_null(iblock16);
+  assert_non_null(oblock16);
+  assert_non_null(block32);
+
+  uint i;
+  for (i = 0; i < sz; i++)
+    iblock16[i] = (int16)i;
+
+  zfp_promote_int16_to_int32(block32, iblock16, dims);
+  zfp_demote_int32_to_int16(oblock16, block32, dims);
+
+  for (i = 0; i < sz; i++)
+    assert_int_equal(iblock16[i], oblock16[i]);
+}
+
+static void
+given_uint16_when_promoteToInt32_expect_demoteToUInt16Matches(void **state)
+{
+  uint dims = 3;
+  uint sz = 1u << (2 * dims);
+  uint16* iblock16 = (uint16*)malloc(sizeof(uint16)*sz);
+  uint16* oblock16 = (uint16*)calloc(sz, sizeof(uint16));
+  int32* block32 = (int32*)malloc(sizeof(int32)*sz);
+
+  assert_non_null(iblock16);
+  assert_non_null(oblock16);
+  assert_non_null(block32);
+
+  uint i;
+  for (i = 0; i < sz; i++)
+    iblock16[i] = (uint16)i;
+
+  zfp_promote_uint16_to_int32(block32, iblock16, dims);
+  zfp_demote_int32_to_uint16(oblock16, block32, dims);
+
+  for (i = 0; i < sz; i++)
+    assert_int_equal(iblock16[i], oblock16[i]);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test(given_int8_when_promoteToInt32_expect_demoteToInt8Matches),
+    cmocka_unit_test(given_uint8_when_promoteToInt32_expect_demoteToUInt8Matches),
+    cmocka_unit_test(given_int16_when_promoteToInt32_expect_demoteToInt16Matches),
+    cmocka_unit_test(given_uint16_when_promoteToInt32_expect_demoteToUInt16Matches),
+  };
+
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/misc/testZfpStream.c b/tests/src/misc/testZfpStream.c
new file mode 100644
index 00000000..4f885a2e
--- /dev/null
+++ b/tests/src/misc/testZfpStream.c
@@ -0,0 +1,934 @@
+#include "zfp.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+// expert mode compression parameters
+#define MIN_BITS  11u
+#define MAX_BITS 1001u
+#define MAX_PREC 52u
+#define MIN_EXP (-1000)
+
+#define MAX_EXP 1023
+
+struct setupVars {
+  zfp_stream* stream;
+};
+
+static int
+setup(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+  zfp_stream* stream = zfp_stream_open(NULL);
+  bundle->stream = stream;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream_close(bundle->stream);
+  free(bundle);
+
+  return 0;
+}
+
+static void
+given_openedZfpStream_when_zfpStreamCompressionMode_expect_returnsExpertEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  // default values imply expert mode
+  assert_int_equal(zfp_stream_compression_mode(bundle->stream), zfp_mode_expert);
+}
+
+static void
+given_zfpStreamSetWithInvalidParams_when_zfpStreamCompressionMode_expect_returnsNullEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+
+  // ensure this config would be rejected by zfp_stream_set_params()
+  assert_int_equal(zfp_stream_set_params(stream, stream->maxbits + 1, stream->maxbits, stream->maxprec, stream->minexp), 0);
+  stream->minbits = stream->maxbits + 1;
+
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_null);
+}
+
+static void
+setNonExpertMode(zfp_stream* stream)
+{
+  zfp_stream_set_precision(stream, ZFP_MAX_PREC - 2);
+  assert_int_not_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+}
+
+static void
+setDefaultCompressionParams(zfp_stream* stream)
+{
+  /* reset to expert mode */
+  assert_int_equal(zfp_stream_set_params(stream, ZFP_MIN_BITS, ZFP_MAX_BITS, ZFP_MAX_PREC, ZFP_MIN_EXP), 1);
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+}
+
+static void
+given_zfpStreamSetWithFixedRate_when_zfpStreamCompressionMode_expect_returnsFixedRateEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  zfp_type zfpType;
+  uint dims;
+  int rate;
+  int align;
+  for (zfpType = (zfp_type)1; zfpType <= (zfp_type)4; zfpType++) {
+    for (dims = 1; dims <= 4; dims++) {
+      for (rate = 1; rate <= ((zfpType % 2) ? 32 : 64); rate++) {
+        for (align = 0; align <= 1; align++) {
+          setDefaultCompressionParams(stream);
+
+          /* set fixed-rate, assert fixed-rate identified */
+          zfp_stream_set_rate(stream, rate, zfpType, dims, (zfp_bool)align);
+
+          zfp_mode mode = zfp_stream_compression_mode(stream);
+          if (mode != zfp_mode_fixed_rate) {
+            fail_msg("Setting zfp_stream with zfp_type %u, fixed rate %d, align = %d, in %u dimensions returned zfp_mode enum %u", zfpType, rate, align, dims, mode);
+          }
+        }
+      }
+    }
+  }
+}
+
+static void
+given_zfpStreamSetWithFixedRate_when_zfpStreamRate_expect_returnsSameRate(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  zfp_type zfpType;
+  uint dims;
+  double rate, set_rate, actual_rate;
+  int align;
+  int i;
+  for (zfpType = (zfp_type)1; zfpType <= (zfp_type)4; zfpType++) {
+    for (dims = 1; dims <= 4; dims++) {
+      for (i = 1; i <= 4; i++) {
+        rate = (double)zfp_type_size(zfpType) * CHAR_BIT * i / 4;
+        for (align = 0; align <= 1; align++) {
+          setDefaultCompressionParams(stream);
+
+          /* set fixed-rate, assert same rate identified */
+          set_rate = zfp_stream_set_rate(stream, rate, zfpType, dims, (zfp_bool)align);
+          actual_rate = zfp_stream_rate(stream, dims);
+          if (actual_rate != set_rate) {
+            fail_msg("Setting zfp_stream with zfp_type %u, fixed rate %g, obtained rate %g, align = %d, in %u dimensions returned rate %g", zfpType, rate, set_rate, align, dims, actual_rate);
+
+          }
+        }
+      }
+    }
+  }
+}
+
+static void
+given_zfpStreamSetWithFixedPrecision_when_zfpStreamCompressionMode_expect_returnsFixedPrecisionEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  uint prec;
+
+  /* float/int32 technically sees no improvement in compression for prec>32 */
+  /* (prec=ZFP_MAX_PREC handled in next test case) */
+  for (prec = 1; prec < ZFP_MAX_PREC; prec++) {
+    setDefaultCompressionParams(stream);
+
+    /* set fixed-precision, assert fixed-precision identified */
+    zfp_stream_set_precision(stream, prec);
+
+    zfp_mode mode = zfp_stream_compression_mode(stream);
+    if (mode != zfp_mode_fixed_precision) {
+      fail_msg("Setting zfp_stream with fixed precision %u returned zfp_mode enum %u", prec, mode);
+    }
+  }
+}
+
+/* compression params equivalent to default, which are defined as expert mode */
+static void
+given_zfpStreamSetWithMaxPrecision_when_zfpStreamCompressionMode_expect_returnsExpertModeEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  setDefaultCompressionParams(stream);
+
+  zfp_stream_set_precision(stream, ZFP_MAX_PREC);
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+}
+
+static void
+given_zfpStreamSetWithFixedPrecision_when_zfpStreamPrecision_expect_returnsSamePrecision(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  uint prec, actual_prec;
+
+  /* float/int32 technically sees no improvement in compression for prec>32 */
+  /* (prec=ZFP_MAX_PREC handled in next test case) */
+  for (prec = 1; prec < ZFP_MAX_PREC; prec++) {
+    setDefaultCompressionParams(stream);
+
+    /* set fixed-precision, assert fixed-precision identified */
+    zfp_stream_set_precision(stream, prec);
+    actual_prec = zfp_stream_precision(stream);
+    if (prec != actual_prec) {
+      fail_msg("Setting zfp_stream with fixed precision %u returned precision %u", prec, actual_prec);
+    }
+  }
+}
+
+static void
+given_zfpStreamSetWithFixedAccuracy_when_zfpStreamCompressionMode_expect_returnsFixedAccuracyEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  int accExp;
+  /* using ZFP_MIN_EXP implies expert mode (all default values) */
+  for (accExp = MAX_EXP; (accExp > ZFP_MIN_EXP) && (ldexp(1., accExp) != 0.); accExp--) {
+    setDefaultCompressionParams(stream);
+
+    /* set fixed-accuracy, assert fixed-accuracy identified */
+    zfp_stream_set_accuracy(stream, ldexp(1., accExp));
+
+    zfp_mode mode = zfp_stream_compression_mode(stream);
+    if (mode != zfp_mode_fixed_accuracy) {
+      fail_msg("Setting zfp_stream with fixed accuracy 2^(%d) returned zfp_mode enum %u", accExp, mode);
+    }
+  }
+}
+
+static void
+given_zfpStreamSetWithFixedAccuracy_when_zfpStreamAccuracy_expect_returnsSameAccuracy(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  double tol, actual_tol;
+  int accExp;
+  /* using ZFP_MIN_EXP implies expert mode (all default values) */
+  for (accExp = MAX_EXP; (accExp > ZFP_MIN_EXP) && (ldexp(1., accExp) != 0.); accExp--) {
+    setDefaultCompressionParams(stream);
+
+    /* set fixed-accuracy, assert fixed-accuracy identified */
+    tol = ldexp(1., accExp);
+    zfp_stream_set_accuracy(stream, tol);
+    actual_tol = zfp_stream_accuracy(stream);
+
+    if (tol != actual_tol) {
+      fail_msg("Setting zfp_stream with fixed accuracy %g returned accuracy %g", tol, actual_tol);
+    }
+  }
+}
+
+static void
+given_zfpStreamSetWithReversible_when_zfpStreamCompressionMode_expect_returnsReversibleEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  setDefaultCompressionParams(stream);
+
+  /* set reversible, assert reversible identified */
+  zfp_stream_set_reversible(stream);
+
+  zfp_mode mode = zfp_stream_compression_mode(stream);
+  if (mode != zfp_mode_reversible) {
+    fail_msg("Setting zfp_stream with reversible returned zfp_mode enum %u", mode);
+  }
+}
+
+static void
+given_zfpStreamSetWithExpertParams_when_zfpStreamCompressionMode_expect_returnsExpertEnum(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  setNonExpertMode(stream);
+
+  /* successfully set custom expert params, assert change */
+  assert_int_equal(zfp_stream_set_params(stream, MIN_BITS, MAX_BITS, MAX_PREC, MIN_EXP), 1);
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+}
+
+static void
+given_zfpStreamDefaultModeVal_when_zfpStreamSetMode_expect_returnsExpertMode_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  /* get mode and compression params */
+  uint64 mode = zfp_stream_mode(stream);
+  uint minbits = stream->minbits;
+  uint maxbits = stream->maxbits;
+  uint maxprec = stream->maxprec;
+  int minexp = stream->minexp;
+
+  setNonExpertMode(stream);
+
+  /* see that mode is updated correctly */
+  assert_int_equal(zfp_stream_set_mode(stream, mode), zfp_mode_expert);
+
+  /* see that compression params conserved */
+  if (stream->minbits != minbits
+      || stream->maxbits != maxbits
+      || stream->maxprec != maxprec
+      || stream->minexp != minexp) {
+    printf("Using default params, zfp_stream_set_mode() incorrectly set compression params when fed zfp_stream_mode() = %"UINT64PRIx"\n", mode);
+    fail_msg("The zfp_stream had (minbits, maxbits, maxprec, minexp) = (%u, %u, %u, %d), but was expected to equal (%u, %u, %u, %d)", stream->minbits, stream->maxbits, stream->maxprec, stream->minexp, minbits, maxbits, maxprec, minexp);
+  }
+}
+
+static void
+given_zfpStreamSetRateModeVal_when_zfpStreamSetMode_expect_returnsFixedRate_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  zfp_type zfpType;
+  uint dims;
+  int rate;
+  int align;
+  for (zfpType = (zfp_type)1; zfpType <= (zfp_type)4; zfpType++) {
+    for (dims = 1; dims <= 4; dims++) {
+      for (rate = 1; rate <= ((zfpType % 2) ? 32 : 64); rate++) {
+        for (align = 0; align <= 1; align++) {
+          /* set fixed-rate mode */
+          zfp_stream_set_rate(stream, rate, zfpType, dims, (zfp_bool)align);
+          assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_fixed_rate);
+
+          /* get mode and compression params */
+          uint64 mode = zfp_stream_mode(stream);
+          uint minbits = stream->minbits;
+          uint maxbits = stream->maxbits;
+          uint maxprec = stream->maxprec;
+          int minexp = stream->minexp;
+
+          /* set expert mode */
+          setDefaultCompressionParams(stream);
+
+          /* see that mode is updated correctly */
+          zfp_mode zfpMode = zfp_stream_set_mode(stream, mode);
+          if (zfpMode != zfp_mode_fixed_rate) {
+            fail_msg("Using fixed rate %d, align %d, zfp_type %u, in %u dimensions, zfp_stream_compression_mode() incorrectly returned %u", rate, align, zfpType, dims, zfpMode);
+          }
+
+          /* see that compression params conserved */
+          if (stream->minbits != minbits
+              || stream->maxbits != maxbits
+              || stream->maxprec != maxprec
+              || stream->minexp != minexp) {
+            printf("Using fixed rate %d, align %d, zfp_type %u, in %u dimensions, zfp_stream_set_mode() incorrectly set compression params when fed zfp_stream_mode() = %"UINT64PRIx"\n", rate, align, zfpType, dims, mode);
+            fail_msg("The zfp_stream had (minbits, maxbits, maxprec, minexp) = (%u, %u, %u, %d), but was expected to equal (%u, %u, %u, %d)", stream->minbits, stream->maxbits, stream->maxprec, stream->minexp, minbits, maxbits, maxprec, minexp);
+          }
+        }
+      }
+    }
+  }
+}
+
+static void
+given_zfpStreamSetPrecisionModeVal_when_zfpStreamSetMode_expect_returnsFixedPrecision_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  uint prec;
+  /* ZFP_MAX_PREC considered expert mode */
+  for (prec = 1; prec < ZFP_MAX_PREC; prec++) {
+    zfp_stream_set_precision(stream, prec);
+    assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_fixed_precision);
+
+    /* get mode and compression params */
+    uint64 mode = zfp_stream_mode(stream);
+    uint minbits = stream->minbits;
+    uint maxbits = stream->maxbits;
+    uint maxprec = stream->maxprec;
+    int minexp = stream->minexp;
+
+    /* set expert mode */
+    setDefaultCompressionParams(stream);
+
+    /* see that mode is updated correctly */
+    zfp_mode zfpMode = zfp_stream_set_mode(stream, mode);
+    if (zfpMode != zfp_mode_fixed_precision) {
+      fail_msg("Using fixed precision %u, zfp_stream_compression_mode() incorrectly returned %u", prec, zfpMode);
+    }
+
+    /* see that compression params conserved */
+    if (stream->minbits != minbits
+        || stream->maxbits != maxbits
+        || stream->maxprec != maxprec
+        || stream->minexp != minexp) {
+      printf("Using fixed precision %u, zfp_stream_set_mode() incorrectly set compression params when fed zfp_stream_mode() = %"UINT64PRIx"\n", prec, mode);
+      fail_msg("The zfp_stream had (minbits, maxbits, maxprec, minexp) = (%u, %u, %u, %d), but was expected to equal (%u, %u, %u, %d)", stream->minbits, stream->maxbits, stream->maxprec, stream->minexp, minbits, maxbits, maxprec, minexp);
+    }
+  }
+}
+
+/* using precision ZFP_MAX_PREC sets compression params equivalent to default values (expert mode) */
+static void
+given_fixedPrecisionMaxPrecModeVal_when_zfpStreamSetMode_expect_returnsExpert_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  zfp_stream_set_precision(stream, ZFP_MAX_PREC);
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+  uint64 mode = zfp_stream_mode(stream);
+
+  /* set non-expert mode */
+  zfp_stream_set_precision(stream, ZFP_MAX_PREC - 2);
+  assert_int_not_equal(zfp_stream_compression_mode(stream), zfp_mode_expert);
+
+  /* see that mode is updated correctly */
+  assert_int_equal(zfp_stream_set_mode(stream, mode), zfp_mode_expert);
+
+  /* see that compression params conserved */
+  assert_int_equal(stream->minbits, ZFP_MIN_BITS);
+  assert_int_equal(stream->maxbits, ZFP_MAX_BITS);
+  assert_int_equal(stream->maxprec, ZFP_MAX_PREC);
+  assert_int_equal(stream->minexp, ZFP_MIN_EXP);
+}
+
+static void
+given_zfpStreamSetAccuracyModeVal_when_zfpStreamSetMode_expect_returnsFixedAccuracy_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  int accExp;
+  for (accExp = MAX_EXP; (accExp > ZFP_MIN_EXP) && (ldexp(1., accExp) != 0.); accExp--) {
+    zfp_stream_set_accuracy(stream, ldexp(1., accExp));
+    assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_fixed_accuracy);
+
+    /* get mode and compression params */
+    uint64 mode = zfp_stream_mode(stream);
+    uint minbits = stream->minbits;
+    uint maxbits = stream->maxbits;
+    uint maxprec = stream->maxprec;
+    int minexp = stream->minexp;
+
+    /* set expert mode */
+    setDefaultCompressionParams(stream);
+
+    /* see that mode is updated correctly */
+    zfp_mode zfpMode = zfp_stream_set_mode(stream, mode);
+    if (zfpMode != zfp_mode_fixed_accuracy) {
+      fail_msg("Using fixed accuracy 2^(%d), zfp_stream_compression_mode() incorrectly returned %u", accExp, zfpMode);
+    }
+
+    /* see that compression params conserved */
+    if (stream->minbits != minbits
+        || stream->maxbits != maxbits
+        || stream->maxprec != maxprec
+        || stream->minexp != minexp) {
+      printf("Using fixed accuracy 2^(%d), zfp_stream_set_mode() incorrectly set compression params when fed zfp_stream_mode() = %"UINT64PRIx"\n", accExp, mode);
+      fail_msg("The zfp_stream had (minbits, maxbits, maxprec, minexp) = (%u, %u, %u, %d), but was expected to equal (%u, %u, %u, %d)", stream->minbits, stream->maxbits, stream->maxprec, stream->minexp, minbits, maxbits, maxprec, minexp);
+    }
+  }
+}
+
+static void
+given_zfpStreamSetReversibleModeVal_when_zfpStreamSetMode_expect_returnsReversible_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  zfp_stream_set_reversible(stream);
+  assert_int_equal(zfp_stream_compression_mode(stream), zfp_mode_reversible);
+
+  /* get mode and compression params */
+  uint64 mode = zfp_stream_mode(stream);
+  uint minbits = stream->minbits;
+  uint maxbits = stream->maxbits;
+  uint maxprec = stream->maxprec;
+  int minexp = stream->minexp;
+
+  /* set expert mode */
+  setDefaultCompressionParams(stream);
+
+  /* see that mode is updated correctly */
+  zfp_mode zfpMode = zfp_stream_set_mode(stream, mode);
+  if (zfpMode != zfp_mode_reversible) {
+    fail_msg("Using reversible mode, zfp_stream_compression_mode() incorrectly returned %u", zfpMode);
+  }
+
+  /* see that compression params conserved */
+  if (stream->minbits != minbits
+      || stream->maxbits != maxbits
+      || stream->maxprec != maxprec
+      || stream->minexp != minexp) {
+    printf("Using reversible mode, zfp_stream_set_mode() incorrectly set compression params when fed zfp_stream_mode() = %"UINT64PRIx"\n", mode);
+    fail_msg("The zfp_stream had (minbits, maxbits, maxprec, minexp) = (%u, %u, %u, %d), but was expected to equal (%u, %u, %u, %d)", stream->minbits, stream->maxbits, stream->maxprec, stream->minexp, minbits, maxbits, maxprec, minexp);
+  }
+}
+
+static void
+assertCompressParamsBehaviorThroughSetMode(void **state, zfp_mode expectedMode)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  // grab existing values
+  uint minBits = stream->minbits;
+  uint maxBits = stream->maxbits;
+  uint maxPrec = stream->maxprec;
+  int minExp = stream->minexp;
+
+  uint64 mode = zfp_stream_mode(stream);
+
+  // reset params
+  assert_int_equal(zfp_stream_set_params(stream, ZFP_MIN_BITS, ZFP_MAX_BITS, ZFP_MAX_PREC, ZFP_MIN_EXP), 1);
+  assert_int_equal(zfp_stream_set_mode(stream, mode), expectedMode);
+
+  if (expectedMode == zfp_mode_null) {
+    assert_int_not_equal(stream->minbits, minBits);
+    assert_int_not_equal(stream->maxbits, maxBits);
+    assert_int_not_equal(stream->maxprec, maxPrec);
+    assert_int_not_equal(stream->minexp, minExp);
+  } else {
+    assert_int_equal(stream->minbits, minBits);
+    assert_int_equal(stream->maxbits, maxBits);
+    assert_int_equal(stream->maxprec, maxPrec);
+    assert_int_equal(stream->minexp, minExp);
+  }
+}
+
+static void
+given_customCompressParamsModeVal_when_zfpStreamSetMode_expect_returnsExpert_and_compressParamsConserved(void **state)
+{
+  struct setupVars *bundle = *state;
+  assert_int_equal(zfp_stream_set_params(bundle->stream, MIN_BITS, MAX_BITS, MAX_PREC, MIN_EXP), 1);
+
+  assertCompressParamsBehaviorThroughSetMode(state, zfp_mode_expert);
+}
+
+static void
+given_invalidCompressParamsModeVal_when_zfpStreamSetMode_expect_returnsNullMode_and_paramsNotSet(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+
+  /* set invalid compress params */
+  assert_int_equal(zfp_stream_set_params(stream, MAX_BITS + 1, MAX_BITS, MAX_PREC, MIN_EXP), 0);
+  stream->minbits = MAX_BITS + 1;
+  stream->maxbits = MAX_BITS;
+  stream->maxprec = MAX_PREC;
+  stream->minexp = MIN_EXP;
+
+  assertCompressParamsBehaviorThroughSetMode(state, zfp_mode_null);
+}
+
+static void
+testStreamAlignSizeMatches(void **state, int dim, zfp_type type)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_field* field;
+
+  size_t arrsize = 4 << 2*(dim-1);
+  size_t dimsize = 4;
+  size_t flushsize;
+  size_t alignsize;
+
+  if (type == zfp_type_float)
+  {
+    float* array;
+    float* block = (float*)calloc(arrsize, sizeof(float));
+
+    if (dim == 1)
+    {
+      array = (float*)calloc(dimsize, sizeof(float));
+      field = zfp_field_1d(array, type, dimsize);
+    }
+    else if (dim == 2)
+    {
+      array = (float*)calloc(dimsize*dimsize, sizeof(float));
+      field = zfp_field_2d(array, type, dimsize, dimsize);
+    }
+    else if (dim == 3)
+    {
+      array = (float*)calloc(dimsize*dimsize*dimsize, sizeof(float));
+      field = zfp_field_3d(array, type, dimsize, dimsize, dimsize);
+    }
+    else if (dim == 4)
+    {
+      array = (float*)calloc(dimsize*dimsize*dimsize*dimsize, sizeof(float));
+      field = zfp_field_4d(array, type, dimsize, dimsize, dimsize, dimsize);
+    }
+
+    size_t bufsize = zfp_stream_maximum_size(stream, field);
+    void* buffer = malloc(bufsize);
+    bitstream* s = stream_open(buffer, bufsize);
+    zfp_stream_set_bit_stream(stream, s);
+    zfp_stream_rewind(stream);
+
+    if (dim == 1)
+    {
+      zfp_encode_block_float_1(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_float_1(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+    else if (dim == 2)
+    {
+      zfp_encode_block_float_2(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_float_2(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+    else if (dim == 3)
+    {
+      zfp_encode_block_float_3(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_float_3(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+    else if (dim == 4)
+    {
+      zfp_encode_block_float_4(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_float_4(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+
+    free(array);
+    free(block);
+  }
+  else if (type == zfp_type_double)
+  {
+    double* array;
+    double* block = (double*)calloc(arrsize, sizeof(double));
+
+    if (dim == 1)
+    {
+      array = (double*)calloc(dimsize, sizeof(double));
+      field = zfp_field_1d(array, type, dimsize);
+    }
+    else if (dim == 2)
+    {
+      array = (double*)calloc(dimsize*dimsize, sizeof(double));
+      field = zfp_field_2d(array, type, dimsize, dimsize);
+    }
+    else if (dim == 3)
+    {
+      array = (double*)calloc(dimsize*dimsize*dimsize, sizeof(double));
+      field = zfp_field_3d(array, type, dimsize, dimsize, dimsize);
+    }
+    else if (dim == 4)
+    {
+      array = (double*)calloc(dimsize*dimsize*dimsize*dimsize, sizeof(double));
+      field = zfp_field_4d(array, type, dimsize, dimsize, dimsize, dimsize);
+    }
+
+    size_t bufsize = zfp_stream_maximum_size(stream, field);
+    void* buffer = malloc(bufsize);
+    bitstream* s = stream_open(buffer, bufsize);
+    zfp_stream_set_bit_stream(stream, s);
+    zfp_stream_rewind(stream);
+
+    if (dim == 1)
+    {
+      zfp_encode_block_double_1(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_double_1(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+    else if (dim == 2)
+    {
+      zfp_encode_block_double_2(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_double_2(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+    else if (dim == 3)
+    {
+      zfp_encode_block_double_3(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_double_3(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+    else if (dim == 4)
+    {
+      zfp_encode_block_double_4(stream, block);
+      flushsize = zfp_stream_flush(stream);
+      zfp_stream_rewind(stream);
+      zfp_decode_block_double_4(stream, block);
+      alignsize = zfp_stream_align(stream);
+    }
+
+    free(array);
+    free(block);
+  }
+
+  assert_true(flushsize > 0);
+  assert_true(flushsize == alignsize);
+}
+
+static void
+given_block1f_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 1, zfp_type_float);
+}
+
+static void
+given_block2f_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 2, zfp_type_float);
+}
+
+static void
+given_block3f_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 3, zfp_type_float);
+}
+
+static void
+given_block4f_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 4, zfp_type_float);
+}
+
+static void
+given_block1d_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 1, zfp_type_double);
+}
+
+static void
+given_block2d_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 2, zfp_type_double);
+}
+
+static void
+given_block3d_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 3, zfp_type_double);
+}
+
+static void
+given_block4d_when_StreamFlush_expect_StreamAlignSizeMatches(void **state)
+{
+  testStreamAlignSizeMatches(state, 4, zfp_type_double);
+}
+
+static void
+testStreamCompressedSizeIncreasedCorrectly(void **state, int dim, zfp_type type)
+{
+  struct setupVars *bundle = *state;
+  zfp_stream* stream = bundle->stream;
+  zfp_field* field;
+
+  /* use fixed rate mode to simplify size calculation */
+  double rate = zfp_stream_set_rate(stream, 64, type, dim, 0);
+
+  size_t blocksize = 4 << 2*(dim-1);
+  size_t dimsize = 4;
+  size_t startsize;
+  size_t endsize;
+
+  if (type == zfp_type_float)
+  {
+    float* array = (float*)calloc(blocksize, sizeof(float));
+    float* block = (float*)calloc(blocksize, sizeof(float));
+
+    if (dim == 1)
+      field = zfp_field_1d(array, type, dimsize);
+    else if (dim == 2)
+      field = zfp_field_2d(array, type, dimsize, dimsize);
+    else if (dim == 3)
+      field = zfp_field_3d(array, type, dimsize, dimsize, dimsize);
+    else if (dim == 4)
+      field = zfp_field_4d(array, type, dimsize, dimsize, dimsize, dimsize);
+
+    size_t bufsize = zfp_stream_maximum_size(stream, field);
+    void* buffer = malloc(bufsize);
+    bitstream* s = stream_open(buffer, bufsize);
+    zfp_stream_set_bit_stream(stream, s);
+    zfp_stream_rewind(stream);
+    startsize = zfp_stream_compressed_size(stream);
+
+    if (dim == 1)
+      zfp_encode_block_float_1(stream, block);
+    else if (dim == 2)
+      zfp_encode_block_float_2(stream, block);
+    else if (dim == 3)
+      zfp_encode_block_float_3(stream, block);
+    else if (dim == 4)
+      zfp_encode_block_float_4(stream, block);
+
+    endsize = zfp_stream_compressed_size(stream);
+    free(array);
+    free(block);
+  }
+  else if (type == zfp_type_double)
+  {
+    double* array = (double*)calloc(blocksize, sizeof(double));
+    double* block = (double*)calloc(blocksize, sizeof(double));
+
+    if (dim == 1)
+      field = zfp_field_1d(array, type, dimsize);
+    else if (dim == 2)
+      field = zfp_field_2d(array, type, dimsize, dimsize);
+    else if (dim == 3)
+      field = zfp_field_3d(array, type, dimsize, dimsize, dimsize);
+    else if (dim == 4)
+      field = zfp_field_4d(array, type, dimsize, dimsize, dimsize, dimsize);
+
+    size_t bufsize = zfp_stream_maximum_size(stream, field);
+    void* buffer = malloc(bufsize);
+    bitstream* s = stream_open(buffer, bufsize);
+    zfp_stream_set_bit_stream(stream, s);
+    zfp_stream_rewind(stream);
+    startsize = zfp_stream_compressed_size(stream);
+
+    if (dim == 1)
+      zfp_encode_block_double_1(stream, block);
+    else if (dim == 2)
+      zfp_encode_block_double_2(stream, block);
+    else if (dim == 3)
+      zfp_encode_block_double_3(stream, block);
+    else if (dim == 4)
+      zfp_encode_block_double_4(stream, block);
+
+    endsize = zfp_stream_compressed_size(stream);
+    free(array);
+    free(block);
+  }
+
+  assert_true(endsize > 0);
+  assert_true(endsize == startsize + blocksize * (size_t)(rate/8));
+}
+
+static void
+given_block1f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 1, zfp_type_float);
+}
+
+static void
+given_block2f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 2, zfp_type_float);
+}
+
+static void
+given_block3f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 3, zfp_type_float);
+}
+
+static void
+given_block4f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 4, zfp_type_float);
+}
+
+static void
+given_block1d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 1, zfp_type_double);
+}
+
+static void
+given_block2d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 2, zfp_type_double);
+}
+
+static void
+given_block3d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 3, zfp_type_double);
+}
+
+static void
+given_block4d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly(void **state)
+{
+  testStreamCompressedSizeIncreasedCorrectly(state, 4, zfp_type_double);
+}
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    /* test zfp_stream_compression_mode() */
+    cmocka_unit_test_setup_teardown(given_openedZfpStream_when_zfpStreamCompressionMode_expect_returnsExpertEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithInvalidParams_when_zfpStreamCompressionMode_expect_returnsNullEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithFixedRate_when_zfpStreamCompressionMode_expect_returnsFixedRateEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithFixedRate_when_zfpStreamRate_expect_returnsSameRate, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithFixedPrecision_when_zfpStreamCompressionMode_expect_returnsFixedPrecisionEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithMaxPrecision_when_zfpStreamCompressionMode_expect_returnsExpertModeEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithFixedPrecision_when_zfpStreamPrecision_expect_returnsSamePrecision, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithFixedAccuracy_when_zfpStreamCompressionMode_expect_returnsFixedAccuracyEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithFixedAccuracy_when_zfpStreamAccuracy_expect_returnsSameAccuracy, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithReversible_when_zfpStreamCompressionMode_expect_returnsReversibleEnum, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetWithExpertParams_when_zfpStreamCompressionMode_expect_returnsExpertEnum, setup, teardown),
+
+    /* test zfp_stream_set_mode() */
+    cmocka_unit_test_setup_teardown(given_zfpStreamDefaultModeVal_when_zfpStreamSetMode_expect_returnsExpertMode_and_compressParamsConserved, setup, teardown),
+
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetRateModeVal_when_zfpStreamSetMode_expect_returnsFixedRate_and_compressParamsConserved, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetPrecisionModeVal_when_zfpStreamSetMode_expect_returnsFixedPrecision_and_compressParamsConserved, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_fixedPrecisionMaxPrecModeVal_when_zfpStreamSetMode_expect_returnsExpert_and_compressParamsConserved, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetAccuracyModeVal_when_zfpStreamSetMode_expect_returnsFixedAccuracy_and_compressParamsConserved, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_zfpStreamSetReversibleModeVal_when_zfpStreamSetMode_expect_returnsReversible_and_compressParamsConserved, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_customCompressParamsModeVal_when_zfpStreamSetMode_expect_returnsExpert_and_compressParamsConserved, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_invalidCompressParamsModeVal_when_zfpStreamSetMode_expect_returnsNullMode_and_paramsNotSet, setup, teardown),
+
+    /* test other zfp_stream_align() */
+    cmocka_unit_test_setup_teardown(given_block1f_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block2f_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block3f_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block4f_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block1d_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block2d_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block3d_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block4d_when_StreamFlush_expect_StreamAlignSizeMatches, setup, teardown),
+
+    /* test zfp_stream_compressed_size() */
+    cmocka_unit_test_setup_teardown(given_block1f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block2f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block3f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block4f_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block1d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block2d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block3d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+    cmocka_unit_test_setup_teardown(given_block4d_when_WriteBlock_expect_StreamCompressedSizeIncreasedCorrectly, setup, teardown),
+  };
+
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/src/misc/zfpFieldBase.c b/tests/src/misc/zfpFieldBase.c
new file mode 100644
index 00000000..fa2ddc20
--- /dev/null
+++ b/tests/src/misc/zfpFieldBase.c
@@ -0,0 +1,256 @@
+struct setupVars {
+  zfp_field* field;
+  SCALAR* data;
+};
+
+static int
+setupBasic(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+#if DIMS == 1
+  zfp_field* field = zfp_field_1d(NULL, ZFP_TYPE, NX);
+#elif DIMS == 2
+  zfp_field* field = zfp_field_2d(NULL, ZFP_TYPE, NX, NY);
+#elif DIMS == 3
+  zfp_field* field = zfp_field_3d(NULL, ZFP_TYPE, NX, NY, NZ);
+#elif DIMS == 4
+  zfp_field* field = zfp_field_4d(NULL, ZFP_TYPE, NX, NY, NZ, NW);
+#endif
+
+  bundle->field = field;
+  bundle->data = NULL;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+setupContiguous(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+#if DIMS == 1
+  zfp_field* field = zfp_field_1d(NULL, ZFP_TYPE, NX);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR)*NX);
+#elif DIMS == 2
+  zfp_field* field = zfp_field_2d(NULL, ZFP_TYPE, NX, NY);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR)*NX*NY);
+#elif DIMS == 3
+  zfp_field* field = zfp_field_3d(NULL, ZFP_TYPE, NX, NY, NZ);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR)*NX*NY*NZ);
+#elif DIMS == 4
+  zfp_field* field = zfp_field_4d(NULL, ZFP_TYPE, NX, NY, NZ, NW);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR)*NX*NY*NZ*NW);
+#endif
+  assert_non_null(data);
+
+  zfp_field_set_pointer(field, data);
+  bundle->field = field;
+  bundle->data = data;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+setupStrided(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+#if DIMS == 1
+  zfp_field* field = zfp_field_1d(NULL, ZFP_TYPE, NX);
+  zfp_field_set_stride_1d(field, SX);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + 1));
+#elif DIMS == 2
+  zfp_field* field = zfp_field_2d(NULL, ZFP_TYPE, NX, NY);
+  zfp_field_set_stride_2d(field, SX, SY);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + (SY*(NY-1)) + 1));
+#elif DIMS == 3
+  zfp_field* field = zfp_field_3d(NULL, ZFP_TYPE, NX, NY, NZ);
+  zfp_field_set_stride_3d(field, SX, SY, SZ);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + (SY*(NY-1)) + (SZ*(NZ-1)) + 1));
+#elif DIMS == 4
+  zfp_field* field = zfp_field_4d(NULL, ZFP_TYPE, NX, NY, NZ, NW);
+  zfp_field_set_stride_4d(field, SX, SY, SZ, SW);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + (SY*(NY-1)) + (SZ*(NZ-1)) + (SW*(NW-1)) + 1));
+#endif
+  assert_non_null(data);
+
+  zfp_field_set_pointer(field, data);
+  bundle->field = field;
+  bundle->data = data;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+setupNegativeStrided(void **state)
+{
+  struct setupVars *bundle = malloc(sizeof(struct setupVars));
+  assert_non_null(bundle);
+
+#if DIMS == 1
+  zfp_field* field = zfp_field_1d(NULL, ZFP_TYPE, NX);
+  zfp_field_set_stride_1d(field, -SX);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + 1));
+#elif DIMS == 2
+  zfp_field* field = zfp_field_2d(NULL, ZFP_TYPE, NX, NY);
+  zfp_field_set_stride_2d(field, -SX, -SY);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + (SY*(NY-1)) + 1));
+#elif DIMS == 3
+  zfp_field* field = zfp_field_3d(NULL, ZFP_TYPE, NX, NY, NZ);
+  zfp_field_set_stride_3d(field, -SX, -SY, -SZ);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + (SY*(NY-1)) + (SZ*(NZ-1)) + 1));
+#elif DIMS == 4
+  zfp_field* field = zfp_field_4d(NULL, ZFP_TYPE, NX, NY, NZ, NW);
+  zfp_field_set_stride_4d(field, -SX, -SY, -SZ, -SW);
+  SCALAR* data = (SCALAR*)malloc(sizeof(SCALAR) * ((SX*(NX-1)) + (SY*(NY-1)) + (SZ*(NZ-1)) + (SW*(NW-1)) + 1));
+#endif
+  assert_non_null(data);
+
+  zfp_field_set_pointer(field, data);
+  bundle->field = field;
+  bundle->data = data;
+
+  *state = bundle;
+
+  return 0;
+}
+
+static int
+teardown(void **state)
+{
+  struct setupVars *bundle = *state;
+
+  zfp_field_free(bundle->field);
+
+  if (bundle->data != NULL)
+    free(bundle->data);
+
+  free(bundle);
+
+  return 0;
+}
+
+static void
+given_contiguousData_isContiguousReturnsTrue(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  assert_true(zfp_field_is_contiguous(field));
+}
+
+static void
+given_noncontiguousData_isContiguousReturnsFalse(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  assert_false(zfp_field_is_contiguous(field));
+}
+
+static void
+when_noFieldData_fieldBeginReturnsNull(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  assert_null(zfp_field_begin(field));
+}
+
+static void
+when_contiguousData_fieldBeginsAtDataPointer(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  assert_true(zfp_field_begin(field) == zfp_field_pointer(field));
+}
+
+static void
+when_noncontiguousDataWithNegativeStride_fieldBeginsAtCorrectLocation(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+#if DIMS == 1
+  ptrdiff_t min = ((int)-SX * (ptrdiff_t)(NX - 1));
+#elif DIMS == 2
+  ptrdiff_t min = ((int)-SX * (ptrdiff_t)(NX - 1)) + ((int)-SY * (ptrdiff_t)(NY - 1));
+#elif DIMS == 3
+  ptrdiff_t min = ((int)-SX * (ptrdiff_t)(NX - 1)) + ((int)-SY * (ptrdiff_t)(NY - 1)) + ((int)-SZ * (ptrdiff_t)(NZ - 1));
+#elif DIMS == 4
+  ptrdiff_t min = ((int)-SX * (ptrdiff_t)(NX - 1)) + ((int)-SY * (ptrdiff_t)(NY - 1)) + ((int)-SZ * (ptrdiff_t)(NZ - 1)) + ((int)-SW * (ptrdiff_t)(NW - 1));
+#endif
+  void* begin = (void*)((uchar*)field->data + min * (ptrdiff_t)zfp_type_size(field->type));
+  assert_true(zfp_field_begin(field) == begin);
+}
+
+static void
+given_field_precisionCorrect(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+  assert_true(zfp_field_precision(field) == sizeof(SCALAR) * CHAR_BIT);
+}
+
+static void
+given_contiguousData_fieldSizeBytesCorrect(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+#if DIMS == 1
+  assert_true(zfp_field_size_bytes(field) == NX * sizeof(SCALAR));
+#elif DIMS == 2
+  assert_true(zfp_field_size_bytes(field) == NX * NY * sizeof(SCALAR));
+#elif DIMS == 3
+  assert_true(zfp_field_size_bytes(field) == NX * NY * NZ * sizeof(SCALAR));
+#elif DIMS == 4
+  assert_true(zfp_field_size_bytes(field) == NX * NY * NZ * NW * sizeof(SCALAR));
+#endif
+}
+
+static void
+given_noncontiguousData_fieldSizeBytesCorrect(void **state)
+{
+  struct setupVars *bundle = *state;
+  zfp_field* field = bundle->field;
+
+#if DIMS == 1
+  assert_true(zfp_field_size_bytes(field) == ((SX*(NX-1) + 1) * sizeof(SCALAR)));
+#elif DIMS == 2
+  assert_true(zfp_field_size_bytes(field) == ((SX*(NX-1) + SY*(NY-1) + 1) * sizeof(SCALAR)));
+#elif DIMS == 3
+  assert_true(zfp_field_size_bytes(field) == ((SX*(NX-1) + SY*(NY-1) + SZ*(NZ-1) + 1) * sizeof(SCALAR)));
+#elif DIMS == 4
+  assert_true(zfp_field_size_bytes(field) == ((SX*(NX-1) + SY*(NY-1) + SZ*(NZ-1) + SW*(NW-1) + 1) * sizeof(SCALAR)));
+#endif
+}
+
+
+
+int main()
+{
+  const struct CMUnitTest tests[] = {
+    cmocka_unit_test_setup_teardown(given_contiguousData_isContiguousReturnsTrue, setupContiguous, teardown),
+    cmocka_unit_test_setup_teardown(given_noncontiguousData_isContiguousReturnsFalse, setupStrided, teardown),
+    cmocka_unit_test_setup_teardown(when_noFieldData_fieldBeginReturnsNull, setupBasic, teardown),
+    cmocka_unit_test_setup_teardown(when_contiguousData_fieldBeginsAtDataPointer, setupContiguous, teardown),
+    cmocka_unit_test_setup_teardown(when_noncontiguousDataWithNegativeStride_fieldBeginsAtCorrectLocation, setupNegativeStrided, teardown),
+    cmocka_unit_test_setup_teardown(given_field_precisionCorrect, setupBasic, teardown),
+    cmocka_unit_test_setup_teardown(given_contiguousData_fieldSizeBytesCorrect, setupContiguous, teardown),
+    cmocka_unit_test_setup_teardown(given_noncontiguousData_fieldSizeBytesCorrect, setupStrided, teardown),
+  };
+  return cmocka_run_group_tests(tests, NULL, NULL);
+}
diff --git a/tests/testviews.cpp b/tests/testviews.cpp
new file mode 100644
index 00000000..fc2b1721
--- /dev/null
+++ b/tests/testviews.cpp
@@ -0,0 +1,241 @@
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#define EPSILON 1e-3
+
+// random integer in {begin, ..., end}
+static size_t
+rand(size_t begin, size_t end)
+{
+  return begin + size_t(rand()) % (end - begin + 1);
+}
+
+// ensure f and g are sufficiently close
+static void
+verify(double f, double g)
+{
+  if (std::fabs(f - g) > EPSILON) {
+#ifdef _OPENMP
+    #pragma omp critical
+#endif
+    std::cerr << "error: " << f << " != " << g << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+static int
+usage()
+{
+  std::cerr << "Usage: testviews [nx ny nz [x0 y0 z0 mx my mz]]" << std::endl;
+  return EXIT_FAILURE;
+}
+
+int main(int argc, char* argv[])
+{
+  size_t nx = 8;
+  size_t ny = 48;
+  size_t nz = 32;
+  size_t x0, y0, z0;
+  size_t mx, my, mz;
+  double rate = 16;
+
+  // parse command-line arguments
+  switch (argc) {
+    case 10:
+      if ((std::istringstream(argv[4]) >> x0).fail() ||
+          (std::istringstream(argv[5]) >> y0).fail() ||
+          (std::istringstream(argv[6]) >> z0).fail() ||
+          (std::istringstream(argv[7]) >> mx).fail() || !mx ||
+          (std::istringstream(argv[8]) >> my).fail() || !my ||
+          (std::istringstream(argv[9]) >> mz).fail() || !mz)
+        return usage();
+      // FALLTHROUGH
+    case 4:
+      if ((std::istringstream(argv[1]) >> nx).fail() || !nx ||
+          (std::istringstream(argv[2]) >> ny).fail() || !ny ||
+          (std::istringstream(argv[3]) >> nz).fail() || !nz)
+        return usage();
+      // FALLTHROUGH
+    case 1:
+      break;
+    default:
+      return usage();
+  }
+
+  if (argc < 10) {
+    // generate random view
+    x0 = rand(0, nx - 1);
+    y0 = rand(0, ny - 1);
+    z0 = rand(0, nz - 1);
+    mx = rand(1, nx - x0);
+    my = rand(1, ny - y0);
+    mz = rand(1, nz - z0);
+  }
+
+  // validate arguments
+  if (x0 + mx > nx || y0 + my > ny || z0 + mz > nz) {
+    std::cerr << "invalid view parameters" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::cout << "a(" << nx << ", " << ny << ", " << nz << ")" << std::endl;
+  std::cout << "v(" << mx << ", " << my << ", " << mz << ") + (" << x0 << ", " << y0 << ", " << z0 << ")" << std::endl;
+
+  // initialize 3D array to linear function
+  zfp::array3<double> a(nx, ny, nz, rate);
+  for (size_t z = 0; z < nz; z++)
+    for (size_t y = 0; y < ny; y++)
+      for (size_t x = 0; x < nx; x++)
+        a(x, y, z) = static_cast<double>(x + nx * (y + ny * z));
+
+  // rectangular view into a
+  std::cout << std::endl << "3D view" << std::endl;
+  zfp::array3<double>::view v(&a, x0, y0, z0, mx, my, mz);
+  for (size_t z = 0; z < v.size_z(); z++)
+    for (size_t y = 0; y < v.size_y(); y++)
+      for (size_t x = 0; x < v.size_x(); x++) {
+        std::cout << x << " " << y << " " << z << ": " << a(x0 + x, y0 + y, z0 + z) << " " << v(x, y, z) << std::endl;
+        verify(a(x0 + x, y0 + y, z0 + z), v(x, y, z));
+      }
+
+  // flat view of all of a
+  std::cout << std::endl << "3D flat view" << std::endl;
+  zfp::array3<double>::flat_view fv(&a);
+  for (size_t z = 0; z < fv.size_z(); z++)
+    for (size_t y = 0; y < fv.size_y(); y++)
+      for (size_t x = 0; x < fv.size_x(); x++) {
+        std::cout << x << " " << y << " " << z << ": " << a(x, y, z) << " " << fv[fv.index(x, y, z)] << std::endl;
+        verify(a(x, y, z), fv[fv.index(x, y, z)]);
+      }
+
+  // nested view of all of a
+  std::cout << std::endl << "3D nested view" << std::endl;
+  zfp::array3<double>::nested_view nv(&a);
+  for (size_t z = 0; z < nv.size_z(); z++)
+    for (size_t y = 0; y < nv.size_y(); y++)
+      for (size_t x = 0; x < nv.size_x(); x++) {
+        std::cout << x << " " << y << " " << z << ": " << a(x, y, z) << " " << nv[z][y][x] << std::endl;
+        verify(a(x, y, z), nv[z][y][x]);
+      }
+
+  // pointers and iterators into a via view v
+  std::cout << std::endl << "3D view pointers and iterators" << std::endl;
+  zfp::array3<double>::view::const_reference vr = v(0, 0, 0);
+  zfp::array3<double>::view::const_pointer p = &vr;
+  p = &v(0, 0, 0);
+  for (zfp::array3<double>::view::const_iterator it = v.begin(); it != v.end(); it++) {
+    size_t x = it.i();
+    size_t y = it.j();
+    size_t z = it.k();
+std::cout << x << " " << y << " " << z << std::endl;
+std::cout << mx << " " << my << " " << std::endl;
+    verify(*it, p[x + mx * (y + my * z)]);
+  }
+
+  // pointers and iterators into a via flat view fv
+  std::cout << std::endl << "3D flat view pointers and iterators" << std::endl;
+  zfp::array3<double>::flat_view::const_reference fvr = fv[0];
+  zfp::array3<double>::flat_view::const_pointer fp = &fvr;
+  fp = &fv(0, 0, 0);
+  for (zfp::array3<double>::flat_view::const_iterator it = fv.begin(); it != fv.end(); it++) {
+    size_t x = it.i();
+    size_t y = it.j();
+    size_t z = it.k();
+    verify(*it, fp[x + nx * (y + ny * z)]);
+  }
+
+  // 2D slice of a
+  std::cout << std::endl << "2D slice" << std::endl;
+  size_t z = rand(0, nv.size_z() - 1);
+  zfp::array3<double>::nested_view2 slice2(nv[z]);
+  for (size_t y = 0; y < slice2.size_y(); y++)
+    for (size_t x = 0; x < slice2.size_x(); x++) {
+      std::cout << x << " " << y << " " << z << ": " << a(x, y, z) << " " << slice2[y][x] << std::endl;
+      verify(a(x, y, z), slice2[y][x]);
+    }
+
+  // 2D array constructed from 2D slice (exercises deep copy via iterator)
+  std::cout << std::endl << "2D array from 2D slice" << std::endl;
+  zfp::array2<double> b(slice2);
+  for (size_t y = 0; y < b.size_y(); y++)
+    for (size_t x = 0; x < b.size_x(); x++) {
+      std::cout << x << " " << y << ": " << b(x, y) << " " << slice2[y][x] << std::endl;
+      verify(b(x, y), slice2[y][x]);
+    }
+
+  // 1D slice of a
+  std::cout << std::endl << "1D slice" << std::endl;
+  size_t y = rand(0, slice2.size_y() - 1);
+  zfp::array3<double>::nested_view1 slice1 = slice2[y];
+  for (size_t x = 0; x < slice1.size_x(); x++) {
+    std::cout << x << " " << y << " " << z << ": " << a(x, y, z) << " " << slice1[x] << std::endl;
+    verify(a(x, y, z), slice1[x]);
+  }
+
+  // 2D array constructed from 2D slice of 3D array (exercises deep copy via iterator)
+  std::cout << std::endl << "2D array from 2D slice of 3D array" << std::endl;
+  zfp::array2<double> c(slice2);
+  for (size_t y = 0; y < c.size_y(); y++)
+    for (size_t x = 0; x < c.size_x(); x++) {
+      std::cout << x << " " << y << ": " << c(x, y) << " " << slice2[y][x] << std::endl;
+      verify(c(x, y), slice2[y][x]);
+    }
+
+  // 2D thread-safe read-only view of c
+  std::cout << std::endl << "2D private read-only view" << std::endl;
+  zfp::array2<double>::private_const_view d(&c);
+  for (size_t y = 0; y < c.size_y(); y++)
+    for (size_t x = 0; x < c.size_x(); x++) {
+      std::cout << x << " " << y << ": " << c(x, y) << " " << d(x, y) << std::endl;
+      verify(c(x, y), d(x, y));
+    }
+
+#ifdef _OPENMP
+  std::cout << std::endl << "multithreaded 2D private read-only views" << std::endl;
+  // copy c for verification; direct accesses to c are not thread-safe
+  double* data = new double[c.size()];
+  c.get(data);
+  #pragma omp parallel
+  {
+    // make a thread-local view into c
+    zfp::array2<double>::private_const_view d(&c);
+    for (size_t y = 0; y < d.size_y(); y++)
+      for (size_t x = 0; x < d.size_x(); x++) {
+        double val = data[x + nx * y];
+        if (omp_get_thread_num() == 0)
+          std::cout << x << " " << y << ": " << val << " " << d(x, y) << std::endl;
+        verify(val, d(x, y));
+      }
+  }
+
+  std::cout << std::endl << "multithreaded 2D private read-write views" << std::endl;
+  #pragma omp parallel
+  {
+    // partition c into disjoint views
+    zfp::array2<double>::private_view d(&c);
+    d.partition(omp_get_thread_num(), omp_get_num_threads());
+    for (size_t j = 0; j < d.size_y(); j++)
+      for (size_t i = 0; i < d.size_x(); i++) {
+        d(i, j) += 1;
+        size_t x = d.global_x(i);
+        size_t y = d.global_y(j);
+        double val = data[x + nx * y] + 1;
+        if (omp_get_thread_num() == 0)
+          std::cout << x << " " << y << ": " << val << " " << d(i, j) << std::endl;
+        verify(val, d(i, j));
+      }
+  }
+  delete[] data;
+#endif
+
+  std::cout << std::endl << "all tests passed" << std::endl;
+
+  return 0;
+}
diff --git a/tests/testzfp.cpp b/tests/testzfp.cpp
index 7469358f..82b4074e 100644
--- a/tests/testzfp.cpp
+++ b/tests/testzfp.cpp
@@ -11,9 +11,10 @@
 #include <sstream>
 #include <string>
 #include "zfp.h"
-#include "zfparray1.h"
-#include "zfparray2.h"
-#include "zfparray3.h"
+#include "zfp/array1.hpp"
+#include "zfp/array2.hpp"
+#include "zfp/array3.hpp"
+#include "zfp/array4.hpp"
 
 enum ArraySize {
   Small  = 0, // 2^12 = 4096 scalars (2^12 = (2^6)^2 = (2^4)^3 = (2^3)^4)
@@ -48,7 +49,7 @@ refine1d(int* g, const int* f, size_t m)
 
   for (size_t x = 0; x < n; x++) {
     int s = 0;
-    for (int i = 0; i < 4; i++) {
+    for (size_t i = 0; i < 4; i++) {
       size_t xx = x & 1u ? (x / 2 + i - 1 + m) % m : x / 2;
       s += weight[i] * f[xx];
     }
@@ -66,9 +67,9 @@ refine2d(int* g, const int* f, size_t m)
   for (size_t y = 0; y < n; y++)
     for (size_t x = 0; x < n; x++) {
       int s = 0;
-      for (int j = 0; j < 4; j++) {
+      for (size_t j = 0; j < 4; j++) {
         size_t yy = y & 1u ? (y / 2 + j - 1 + m) % m : y / 2;
-        for (int i = 0; i < 4; i++) {
+        for (size_t i = 0; i < 4; i++) {
           size_t xx = x & 1u ? (x / 2 + i - 1 + m) % m : x / 2;
           s += weight[i] * weight[j] * f[xx + m * yy];
         }
@@ -88,11 +89,11 @@ refine3d(int* g, const int* f, size_t m)
     for (size_t y = 0; y < n; y++)
       for (size_t x = 0; x < n; x++) {
         int s = 0;
-        for (int k = 0; k < 4; k++) {
+        for (size_t k = 0; k < 4; k++) {
           size_t zz = z & 1u ? (z / 2 + k - 1 + m) % m : z / 2;
-          for (int j = 0; j < 4; j++) {
+          for (size_t j = 0; j < 4; j++) {
             size_t yy = y & 1u ? (y / 2 + j - 1 + m) % m : y / 2;
-            for (int i = 0; i < 4; i++) {
+            for (size_t i = 0; i < 4; i++) {
               size_t xx = x & 1u ? (x / 2 + i - 1 + m) % m : x / 2;
               s += weight[i] * weight[j] * weight[k] * f[xx + m * (yy + m * zz)];
             }
@@ -114,13 +115,13 @@ refine4d(int* g, const int* f, size_t m)
       for (size_t y = 0; y < n; y++)
         for (size_t x = 0; x < n; x++) {
           int s = 0;
-          for (int l = 0; l < 4; l++) {
+          for (size_t l = 0; l < 4; l++) {
             size_t ww = w & 1u ? (w / 2 + l - 1 + m) % m : w / 2;
-            for (int k = 0; k < 4; k++) {
+            for (size_t k = 0; k < 4; k++) {
               size_t zz = z & 1u ? (z / 2 + k - 1 + m) % m : z / 2;
-              for (int j = 0; j < 4; j++) {
+              for (size_t j = 0; j < 4; j++) {
                 size_t yy = y & 1u ? (y / 2 + j - 1 + m) % m : y / 2;
-                for (int i = 0; i < 4; i++) {
+                for (size_t i = 0; i < 4; i++) {
                   size_t xx = x & 1u ? (x / 2 + i - 1 + m) % m : x / 2;
                   s += weight[i] * weight[j] * weight[k] * weight[l] * f[xx + m * (yy + m * (zz + m * ww))];
                 }
@@ -270,7 +271,7 @@ template <typename Scalar>
 inline void
 initialize(Scalar* p, uint dims, ArraySize array_size)
 {
-  size_t size = 1ul << ((array_size == Small ? 12 : 24) / dims);
+  size_t size = size_t(1) << ((array_size == Small ? 12 : 24) / dims);
 
   switch (dims) {
     default:
@@ -317,7 +318,7 @@ test_rate(zfp_stream* stream, const zfp_field* input, double rate, Scalar tolera
   zfp_type type = zfp_field_type(input);
 
   // allocate memory for compressed data
-  rate = zfp_stream_set_rate(stream, rate, type, dims, 0);
+  rate = zfp_stream_set_rate(stream, rate, type, dims, zfp_false);
   size_t bufsize = zfp_stream_maximum_size(stream, input);
   uchar* buffer = new uchar[bufsize];
   bitstream* s = stream_open(buffer, bufsize);
@@ -630,6 +631,38 @@ update_array3(zfp::array3<Scalar>& a)
         a(0, 0, 0) = std::max(a(0, 0, 0), a(i, j, k));
 }
 
+// perform 4D differencing
+template <typename Scalar>
+inline void
+update_array4(zfp::array4<Scalar>& a)
+{
+  for (uint l = 0; l < a.size_w(); l++)
+    for (uint k = 0; k < a.size_z(); k++)
+      for (uint j = 0; j < a.size_y(); j++)
+        for (uint i = 0; i < a.size_x() - 1; i++)
+          a(i, j, k, l) -= a(i + 1, j, k, l);
+  for (uint l = 0; l < a.size_w(); l++)
+    for (uint k = 0; k < a.size_z(); k++)
+      for (uint j = 0; j < a.size_y() - 1; j++)
+        for (uint i = 0; i < a.size_x(); i++)
+          a(i, j, k, l) -= a(i, j + 1, k, l);
+  for (uint l = 0; l < a.size_w(); l++)
+    for (uint k = 0; k < a.size_z() - 1; k++)
+      for (uint j = 0; j < a.size_y(); j++)
+        for (uint i = 0; i < a.size_x(); i++)
+          a(i, j, k, l) -= a(i, j, k + 1, l);
+  for (uint l = 0; l < a.size_w() - 1; l++)
+    for (uint k = 0; k < a.size_z(); k++)
+      for (uint j = 0; j < a.size_y(); j++)
+        for (uint i = 0; i < a.size_x(); i++)
+          a(i, j, k, l) -= a(i, j, k, l + 1);
+  for (uint l = 0; l < a.size_w() - 1; l++)
+    for (uint k = 0; k < a.size_z() - 1; k++)
+      for (uint j = 0; j < a.size_y() - 1; j++)
+        for (uint i = 0; i < a.size_x() - 1; i++)
+          a(0, 0, 0, 0) = std::max(a(0, 0, 0, 0), a(i, j, k, l));
+}
+
 template <class Array>
 inline void update_array(Array& a);
 
@@ -657,6 +690,14 @@ template <>
 inline void
 update_array(zfp::array3<double>& a) { update_array3(a); }
 
+template <>
+inline void
+update_array(zfp::array4<float>& a) { update_array4(a); }
+
+template <>
+inline void
+update_array(zfp::array4<double>& a) { update_array4(a); }
+
 // test random-accessible array primitive
 template <class Array, typename Scalar>
 inline uint
@@ -716,9 +757,9 @@ test(uint dims, ArraySize array_size)
   Scalar* f = new Scalar[n];
 
   // determine array size
-  uint nx, ny, nz ,nw;
+  uint nx, ny, nz, nw;
   zfp_field* field = zfp_field_alloc();
-  zfp_field_set_type(field, zfp::codec<Scalar>::type);
+  zfp_field_set_type(field, zfp::internal::trait<Scalar>::type);
   zfp_field_set_pointer(field, f);
   switch (dims) {
     case 1:
@@ -749,7 +790,7 @@ test(uint dims, ArraySize array_size)
   std::cout << "testing " << dims << "D array of " << (t == 0 ? "floats" : "doubles") << std::endl;
 
   // test data integrity
-  uint32 checksum[2][2][4] = {
+  uint32 checksum[2][2][4] = { // [size][type][dims]
     // small
     {{ 0x54174c44u, 0x86609589u, 0xfc0a6a76u, 0xa3481e00u },
      { 0x7d257bb6u, 0x294bb210u, 0x68614d26u, 0xf6bd3a21u }},
@@ -767,7 +808,7 @@ test(uint dims, ArraySize array_size)
   // test fixed rate
   for (uint rate = 2u >> t, i = 0; rate <= 32 * (t + 1); rate *= 4, i++) {
     // expected max errors
-    double emax[2][2][4][4] = {
+    double emax[2][2][4][4] = { // [size][type][dims][rate]
       // small
       {
         {
@@ -808,7 +849,7 @@ test(uint dims, ArraySize array_size)
   // test fixed precision
   for (uint prec = 4u << t, i = 0; i < 3; prec *= 2, i++) {
     // expected compressed sizes
-    size_t bytes[2][2][4][3] = {
+    size_t bytes[2][2][4][3] = { // [size][type][dims][prec]
       // small
       {
         {
@@ -847,7 +888,7 @@ test(uint dims, ArraySize array_size)
   for (uint i = 0; i < 3; i++) {
     Scalar tol[] = { Scalar(1e-3), 2 * std::numeric_limits<Scalar>::epsilon(), 0 };
     // expected compressed sizes
-    size_t bytes[2][2][4][3] = {
+    size_t bytes[2][2][4][3] = { // [size][type][dims][tol]
       // small
       {
         {
@@ -885,7 +926,7 @@ test(uint dims, ArraySize array_size)
   // test reversible
   {
     // expected compressed sizes
-    size_t bytes[2][2][4] = {
+    size_t bytes[2][2][4] = { // [size][type][dims]
       // small
       {
         {
@@ -921,28 +962,28 @@ test(uint dims, ArraySize array_size)
   }
 
   // test compressed array support
-  double emax[2][2][3] = {
+  double emax[2][2][4] = { // [size][type][dims] (construct test)
     // small
     {
-      {4.578e-05, 7.630e-06, 3.148e-05},
-      {1.832e-04, 8.584e-06, 3.338e-05},
+      {4.578e-05, 7.630e-06, 3.148e-05, 3.598e-03},
+      {1.832e-04, 8.584e-06, 3.338e-05, 3.312e-03},
     },
     // large
     {
-      {0.000e+00, 0.000e+00, 0.000e+00},
-      {2.289e-05, 0.000e+00, 0.000e+00},
+      {0.000e+00, 0.000e+00, 0.000e+00, 1.193e-07},
+      {2.289e-05, 0.000e+00, 0.000e+00, 8.801e-08},
     }
   };
-  double dfmax[2][2][3] = {
+  double dfmax[2][2][4] = { // [size][type][dims] (update test)
     // small
     {
-      {2.155e-02, 3.755e-01, 1.846e+00},
-      {2.155e-02, 3.755e-01, 1.846e+00},
+      {2.155e-02, 3.755e-01, 1.846e+00, 4.843e+01},
+      {2.155e-02, 3.755e-01, 1.846e+00, 4.844e+01},
     },
     // large
     {
-      {2.441e-04, 4.883e-04, 1.221e-03},
-      {2.670e-04, 4.883e-04, 1.221e-03},
+      {2.441e-04, 4.883e-04, 1.222e-03, 2.567e-02},
+      {2.670e-04, 4.883e-04, 1.222e-03, 2.567e-02},
     }
   };
   double rate = 16;
@@ -962,7 +1003,10 @@ test(uint dims, ArraySize array_size)
         failures += test_array(a, f, n, static_cast<Scalar>(emax[array_size][t][dims - 1]), static_cast<Scalar>(dfmax[array_size][t][dims - 1]));
       }
       break;
-    case 4: // 4D arrays not yet supported
+    case 4: {
+        zfp::array4<Scalar> a(nx, ny, nz, nw, rate, f);
+        failures += test_array(a, f, n, static_cast<Scalar>(emax[array_size][t][dims - 1]), static_cast<Scalar>(dfmax[array_size][t][dims - 1]));
+      }
       break;
   }
 
@@ -979,6 +1023,7 @@ inline uint
 common_tests()
 {
   uint failures = 0;
+  uint warnings = 0;
   // test library version
   if (zfp_codec_version != ZFP_CODEC || zfp_library_version != ZFP_VERSION) {
     std::cout << "library header and binary version mismatch" << std::endl;
@@ -1021,6 +1066,21 @@ common_tests()
     std::cout << "regression testing requires BIT_STREAM_WORD_TYPE=uint64" << std::endl;
     failures++;
   }
+  // warn if non-default compiler options are used
+#if ZFP_ROUNDING_MODE != 0
+  std::cout << "warning: selected ZFP_ROUNDING_MODE may break tests" << std::endl;
+  warnings++;
+#ifdef ZFP_WITH_TIGHT_ERROR
+  std::cout << "warning: ZFP_WITH_TIGHT_ERROR option may break tests" << std::endl;
+  warnings++;
+#endif
+#endif
+#ifdef ZFP_WITH_DAZ
+  std::cout << "warning: ZFP_WITH_DAZ option may break tests" << std::endl;
+  warnings++;
+#endif
+  if (failures || warnings)
+    std::cout << std::endl;
   return failures;
 }
 
diff --git a/tests/utils/CMakeLists.txt b/tests/utils/CMakeLists.txt
new file mode 100644
index 00000000..170b34ff
--- /dev/null
+++ b/tests/utils/CMakeLists.txt
@@ -0,0 +1,43 @@
+# libraries used by tests
+
+# seeded random number generators
+add_library(rand32Lib rand32.c rand32.h)
+
+add_library(rand64Lib rand64.c rand64.h)
+
+# hashing
+add_library(zfpHashLib zfpHash.c zfpHash.h)
+if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
+  target_compile_options(zfpHashLib
+    PUBLIC $<$<COMPILE_LANGUAGE:C>:-Wno-pointer-sign>)
+endif()
+
+# fixed point
+add_library(fixedpoint96Lib fixedpoint96.c fixedpoint96.h)
+
+# [seeded] random correlated array generator
+add_library(genSmoothRandNumsLib genSmoothRandNums.c genSmoothRandNums.h)
+target_link_libraries(genSmoothRandNumsLib PRIVATE rand64Lib fixedpoint96Lib)
+
+# strided array operations
+add_library(stridedOperationsLib stridedOperations.c stridedOperations.h)
+target_link_libraries(stridedOperationsLib PRIVATE zfp)
+
+# compute zfp compression parameters
+add_library(zfpCompressionParamsLib zfpCompressionParams.c zfpCompressionParams.h)
+
+# timer
+add_library(zfpTimerLib zfpTimer.c zfpTimer.h)
+
+# checksums API
+add_library(zfpChecksumsLib zfpChecksums.c zfpChecksums.h)
+if(PRINT_CHECKSUMS)
+  target_compile_definitions(zfpChecksumsLib PUBLIC PRINT_CHECKSUMS)
+endif()
+
+if(HAVE_LIBM_MATH)
+  target_link_libraries(rand32Lib PRIVATE m)
+  target_link_libraries(rand64Lib PRIVATE m)
+  target_link_libraries(genSmoothRandNumsLib PRIVATE m)
+  target_link_libraries(zfpCompressionParamsLib PRIVATE m)
+endif()
diff --git a/tests/utils/fixedpoint96.c b/tests/utils/fixedpoint96.c
new file mode 100644
index 00000000..ef390971
--- /dev/null
+++ b/tests/utils/fixedpoint96.c
@@ -0,0 +1,296 @@
+#include <limits.h>
+#include "fixedpoint96.h"
+
+void
+initFixedPt(int64 i, uint32 f, fixedPt* result)
+{
+  result->i = i;
+  result->f = f;
+}
+
+// logical shift
+static void
+shiftRightSigned(int64 input, uint shiftAmount, int64* result)
+{
+  if (input < 0) {
+    *result = ~(~input >> shiftAmount);
+  } else {
+    *result = input >> shiftAmount;
+  }
+}
+
+// split 64 bit unsigned into two 32 bit unsigned parts
+// both parts live in the lowest 32 bits of the uint64
+static void
+splitUnsigned(uint64 input, uint64* upper, uint64* lower)
+{
+  *upper = input >> 32;
+  *lower = input - (*upper << 32);
+}
+
+// split 64 bit signed into two 32 bit parts
+// both parts live in the lowest 32 bits of the 64 bit int
+// upper keeps the sign
+// lower is unsigned
+static void
+splitSigned(int64 input, int64* upper, uint64* lower)
+{
+  shiftRightSigned(input, 32, upper);
+  *lower = (uint64)(input - (*upper << 32));
+}
+
+static void
+addFractional(uint32 a, uint32 b, uint32* result, uint32* carry)
+{
+  uint64 a64 = (uint64)a;
+  uint64 b64 = (uint64)b;
+
+  uint64 carry64, result64;
+  splitUnsigned(a64 + b64, &carry64, &result64);
+
+  // carry is 0 or 1
+  *carry = (uint32)carry64;
+  *result = (uint32)result64;
+}
+
+// returns 1 if sum overflows
+static int
+addSignedIntegers(int64 a, int64 b, int64* result)
+{
+  if (b >= 0 && a > LLONG_MAX - b) {
+    return 1;
+  } else if (b < 0 && a < LLONG_MIN - b) {
+    return 1;
+  }
+
+  *result = a + b;
+
+  return 0;
+}
+
+int
+roundFixedPt(fixedPt* fp, int64* result)
+{
+  return addSignedIntegers(fp->i, (int64)(fp->f >= 0x80000000), result);
+}
+
+// returns 0 if successful, 1 otherwise
+int
+add(fixedPt* a, fixedPt* b, fixedPt* result)
+{
+  uint32 carry;
+  addFractional(a->f, b->f, &result->f, &carry);
+
+  // detect overflow while trying each combination: 3 terms, 2 operations
+  int64 val;
+
+  // (a + carry) + b
+  if (addSignedIntegers(a->i, (int64)carry, &val) == 0) {
+    if (addSignedIntegers(val, b->i, &result->i) == 0) {
+      return 0;
+    }
+  }
+
+  // a + (carry + b)
+  if (addSignedIntegers((int64)carry, b->i, &val) == 0) {
+    if (addSignedIntegers(a->i, val, &result->i) == 0) {
+      return 0;
+    }
+  }
+
+  // (a + b) + carry
+  if (addSignedIntegers(a->i, b->i, &val) == 0) {
+    if (addSignedIntegers(val, (int64)carry, &result->i) == 0) {
+      return 0;
+    }
+  }
+
+  // unavoidable overflow
+  return 1;
+}
+
+// always successful
+// subtract borrow from a's next MSB [integer] part
+static void
+subtractFractional(uint32 a, uint32 b, uint32* result, int64* borrow)
+{
+  *result = a - b;
+
+  *borrow = (a < b) ? 1 : 0;
+}
+
+// returns 1 if subtraction goes out of range
+static int
+subtractSignedIntegers(int64 a, int64 b, int64* result)
+{
+  if (b < 0 && a > LLONG_MAX + b) {
+    return 1;
+  } else if (b >= 0 && a < LLONG_MIN + b) {
+    return 1;
+  }
+
+  *result = a - b;
+
+  return 0;
+}
+
+// returns 1 if result would go out of range
+int
+subtract(fixedPt* a, fixedPt* b, fixedPt* result)
+{
+  int64 borrow;
+  subtractFractional(a->f, b->f, &result->f, &borrow);
+
+  // detect overflow while trying each combination: 3 terms, 2 operations
+  int64 val;
+
+  // (a - borrow) - b
+  if (subtractSignedIntegers(a->i, borrow, &val) == 0) {
+    if (subtractSignedIntegers(val, b->i, &result->i) == 0) {
+      return 0;
+    }
+  }
+
+  // a - (borrow + b)
+  if (addSignedIntegers(borrow, b->i, &val) == 0) {
+    if (subtractSignedIntegers(a->i, val, &result->i) == 0) {
+      return 0;
+    }
+  }
+
+  // (a - b) - borrow
+  if (subtractSignedIntegers(a->i, b->i, &val) == 0) {
+    if (subtractSignedIntegers(val, borrow, &result->i) == 0) {
+      return 0;
+    }
+  }
+
+  // unavoidable overflow
+  return 1;
+}
+
+// returns 1 if integer part overflows
+// fractional part is truncated
+int
+multiply(fixedPt* a, fixedPt* b, fixedPt* result)
+{
+  // split everything into 32 bit values, stored in 64 bit types
+  // that way, multiplying 2 32 bit values will fit in 64 bits
+  // also, uint64 to int64 casts will be safe
+  uint64 af, bf, rf;
+  af = (uint64)a->f;
+  bf = (uint64)b->f;
+
+  uint64 ai0, bi0, ri0;
+  int64 ai1, bi1, ri1;
+  splitSigned(a->i, &ai1, &ai0);
+  splitSigned(b->i, &bi1, &bi0);
+
+  // actual values:
+  //   a = (2^32)*ai1 + ai0 + (2^-32)*af
+  //   b = (2^32)*bi1 + bi0 + (2^-32)*bf
+  //
+  //   r = a*b =
+  // A            (2^64) * ai1*bi1
+  // B          + (2^32) * (ai1*bi0 + ai0*bi1)
+  // C          + (ai0*bi0 + ai1*bf + af*bi1)
+  // D          + (2^-32) * (ai0*bf + af*bi0)
+  // E          + (2^-64) * af*bf
+  //
+  //
+  //        (MSB)                         (LSB)
+  //                    -----fixedPt-----
+  //   a*b= _____|_____|_ri1_|_ri0_|_rf__|_____
+  //        -----A-----
+  //              -----B-----
+  //                    -----C-----
+  //                          -----D-----
+  //                                -----E-----
+  //  perform sum from LSB to MSB
+  //    - store 32 bit result
+  //    - carry overflow to next, more significant 32 bit chunk
+
+  // naming
+  // highA : 32 MSB of A stored in 32 LSB of highA
+  // lowA : 32 LSB of A stored in 32 LSB of lowA
+
+  // (2^-64) * E
+  uint64 E = af * bf;
+  uint64 highE = E >> 32;
+  // omit lowE (truncated)
+
+  // D = (2^-32) * (D1 + D2)
+  uint64 highD1, lowD1, highD2, lowD2;
+  splitUnsigned(ai0 * bf, &highD1, &lowD1);
+  splitUnsigned(af * bi0, &highD2, &lowD2);
+
+  // highD -> result LSB integer part
+  // lowD -> result fractional part
+  uint64 highD, uCarryD;
+  splitUnsigned(lowD1 + lowD2 + highE, &uCarryD, &rf);
+  splitUnsigned(highD1 + highD2 + uCarryD, &uCarryD, &highD);
+
+  // C = C1 + C2 + C3
+  uint64 highC1;
+  int64 highC2, highC3;
+  uint64 lowC1, lowC2, lowC3;
+  // C1 is unsigned (uint32 * uint32 only fits in uint64)
+  // uint32 * int32 fits in int64
+  splitUnsigned(ai0 * bi0, &highC1, &lowC1);
+  splitSigned(ai1 * (int64)bf, &highC2, &lowC2);
+  splitSigned((int64)af * bi1, &highC3, &lowC3);
+
+  // highC -> MSB integer part
+  // lowC -> LSB integer part
+  int64 sCarryC;
+  uint64 highC;
+  splitSigned((int64)lowC1 + (int64)lowC2 + (int64)lowC3 + (int64)highD, &sCarryC, &ri0);
+  splitSigned((int64)highC1 + highC2 + highC3 + (int64)uCarryD + sCarryC, &sCarryC, &highC);
+
+  // B = (2^32) * (B1 + B2)
+  int64 highB1, highB2;
+  uint64 lowB1, lowB2;
+  splitSigned(ai1 * (int64)bi0, &highB1, &lowB1);
+  splitSigned((int64)ai0 * bi1, &highB2, &lowB2);
+
+  // lowB -> MSB integer part
+  // highB -> more significant 32 bits than we can hold in fixedPt
+  uint64 ri1Unsigned;
+  int64 sCarryB;
+  splitSigned((int64)lowB1 + (int64)lowB2 + (int64)highC, &sCarryB, &ri1Unsigned);
+  int64 highB = highB1 + highB2 + sCarryC + sCarryB;
+
+  // MSB of overall product keeps sign
+  int64 A = ai1 * bi1 + highB;
+
+  // check ri1Unsigned with A's sign is in range of int64
+  ri1Unsigned <<= 32;
+  uint64 leftmostBitSetVal = (uint64)1 << 63;
+  if (A == -1) {
+    // result < 0
+
+    if (ri1Unsigned <= LLONG_MAX) {
+      return 1;
+    }
+
+    // cast ri1Unsigned safely (set MSB to 1)
+    ri1 = (int64)(ri1Unsigned - leftmostBitSetVal) - leftmostBitSetVal;
+
+  } else if (A == 0){
+    // result >= 0
+
+    if (ri1Unsigned > LLONG_MAX) {
+      return 1;
+    }
+
+    ri1 = (int64)ri1Unsigned;
+
+  } else {
+    return 1;
+  }
+
+  result->f = (uint32)rf;
+  result->i = ri1 + ri0;
+
+  return 0;
+}
diff --git a/tests/utils/fixedpoint96.h b/tests/utils/fixedpoint96.h
new file mode 100644
index 00000000..b48ebdab
--- /dev/null
+++ b/tests/utils/fixedpoint96.h
@@ -0,0 +1,33 @@
+#ifndef FIXEDPT_H
+#define FIXEDPT_H
+
+#include "include/zfp/internal/zfp/types.h"
+
+typedef struct {
+  // the number represented = i + (2^-32)*f
+  // integer part
+  int64 i;
+  // fractional part
+  uint32 f;
+} fixedPt;
+
+void
+initFixedPt(int64 i, uint32 f, fixedPt* result);
+
+// functions with int return type:
+//   return 0 if successful
+//   return 1 if errored
+
+int
+roundFixedPt(fixedPt* fp, int64* result);
+
+int
+add(fixedPt* a, fixedPt* b, fixedPt* result);
+
+int
+subtract(fixedPt* a, fixedPt* b, fixedPt* result);
+
+int
+multiply(fixedPt* a, fixedPt* b, fixedPt* result);
+
+#endif
diff --git a/tests/utils/genChecksums.sh b/tests/utils/genChecksums.sh
new file mode 100755
index 00000000..24f95ec3
--- /dev/null
+++ b/tests/utils/genChecksums.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+set -ex
+
+MAX_DIM=4
+SCALAR_TYPES=( "Float" "Double" "Int32" "Int64" )
+
+mkdir -p checksumGenBuild
+cd checksumGenBuild
+
+cmake ../../.. -DZFP_WITH_OPENMP=OFF -DPRINT_CHECKSUMS=1
+cmake --build . -- -j
+
+for DIM in $(seq 1 $MAX_DIM);
+do
+  DIM_STR="${DIM}d"
+
+  for SCALAR_STR in "${SCALAR_TYPES[@]}"
+  do
+
+    TEST_OUTPUT_FILE="test_output"
+    TEMP_FILE="temp"
+    TEMP_CHECKSUMS_FILE="temp_checksums"
+    OUTPUT_FILE="${DIM_STR}${SCALAR_STR}.h"
+
+    ctest -V -R "testZfpEncodeBlock${DIM_STR}${SCALAR_STR}" -O $TEMP_FILE
+    cat "$TEMP_FILE" >> "$TEST_OUTPUT_FILE"
+
+    ctest -V -R "testZfpEncodeBlockStrided${DIM_STR}${SCALAR_STR}" -O $TEMP_FILE
+    cat "$TEMP_FILE" >> "$TEST_OUTPUT_FILE"
+
+    ctest -V -R "testZfpDecodeBlock${DIM_STR}${SCALAR_STR}" -O $TEMP_FILE
+    cat "$TEMP_FILE" >> "$TEST_OUTPUT_FILE"
+
+    ctest -V -R "testZfpDecodeBlockStrided${DIM_STR}${SCALAR_STR}" -O $TEMP_FILE
+    cat "$TEMP_FILE" >> "$TEST_OUTPUT_FILE"
+
+    ctest -V -R "testZfpSerial${DIM_STR}${SCALAR_STR}" -O $TEMP_FILE
+    cat "$TEMP_FILE" >> "$TEST_OUTPUT_FILE"
+
+    grep -o '{UINT64C(0x.*' $TEST_OUTPUT_FILE > $TEMP_CHECKSUMS_FILE
+    NUM_CHECKSUMS=$(wc -l < "$TEMP_CHECKSUMS_FILE")
+
+    # create valid .h file
+
+    echo "static const checksum_tuples _${DIM_STR}${SCALAR_STR}Checksums[${NUM_CHECKSUMS}] = {" > $OUTPUT_FILE
+    cat $TEMP_CHECKSUMS_FILE >> $OUTPUT_FILE
+    echo "};" >> $OUTPUT_FILE
+
+    rm $TEST_OUTPUT_FILE $TEMP_FILE $TEMP_CHECKSUMS_FILE
+  done
+done
diff --git a/tests/utils/genSmoothRandNums.c b/tests/utils/genSmoothRandNums.c
new file mode 100644
index 00000000..20c2ba47
--- /dev/null
+++ b/tests/utils/genSmoothRandNums.c
@@ -0,0 +1,918 @@
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "genSmoothRandNums.h"
+#include "fixedpoint96.h"
+#include "rand64.h"
+
+#define FLOAT_MANTISSA_BITS 23
+#define DOUBLE_MANTISSA_BITS 52
+
+size_t
+intPow(size_t base, int exponent)
+{
+  size_t result = 1;
+
+  int i;
+  for (i = 0; i < exponent; i++) {
+    result *= base;
+  }
+
+  return result;
+}
+
+static size_t
+computeOffset(size_t l, size_t k, size_t j, size_t i, size_t sideLen, int numDims)
+{
+  size_t result = 0;
+
+  switch (numDims) {
+    case 4:
+      result += l * sideLen * sideLen * sideLen;
+
+    case 3:
+      result += k * sideLen * sideLen;
+
+    case 2:
+      result += j * sideLen;
+
+    case 1:
+      result += i;
+  }
+
+  return result;
+}
+
+static void
+generateWeights(fixedPt* f, fixedPt weights[4])
+{
+  fixedPt oneHalf = {0, (uint32)0x80000000};
+  fixedPt one = {1, 0};
+  fixedPt nine = {9, 0};
+  fixedPt oneSixteenth = {0, (uint32)0x10000000};
+
+  weights[0] = (fixedPt){0, nextRand32()};
+  subtract(weights, &oneHalf, weights);
+  multiply(weights, f, weights);
+  subtract(weights, &one, weights);
+  multiply(weights, &oneSixteenth, weights);
+
+  weights[1] = (fixedPt){0, nextRand32()};
+  subtract(weights+1, &oneHalf, weights+1);
+  multiply(weights+1, f, weights+1);
+  add(weights+1, &nine, weights+1);
+  multiply(weights+1, &oneSixteenth, weights+1);
+
+  weights[2] = (fixedPt){0, nextRand32()};
+  subtract(weights+2, &oneHalf, weights+2);
+  multiply(weights+2, f, weights+2);
+  add(weights+2, &nine, weights+2);
+  multiply(weights+2, &oneSixteenth, weights+2);
+
+  weights[3] = (fixedPt){1, 0};
+  subtract(weights+3, weights, weights+3);
+  subtract(weights+3, weights+1, weights+3);
+  subtract(weights+3, weights+2, weights+3);
+}
+
+static void
+computeTensorProductDouble(fixedPt* initialVec, size_t initialVecLen, int numDims, fixedPt** outputArrPtr)
+{
+  size_t i, j, k, l, index;
+
+  size_t outputArrLen = intPow(initialVecLen, numDims);
+
+  *outputArrPtr = malloc(outputArrLen * sizeof(fixedPt));
+
+  switch(numDims) {
+    case 1:
+      for (i = 0; i < initialVecLen; i++) {
+        (*outputArrPtr)[i] = initialVec[i];
+      }
+
+      break;
+
+    case 2:
+      for (j = 0; j < initialVecLen; j++) {
+        for (i = 0; i < initialVecLen; i++) {
+          index = computeOffset(0, 0, j, i, initialVecLen, 2);
+
+          fixedPt* fp = (*outputArrPtr) + index;
+          *fp = initialVec[i];
+          multiply(fp, initialVec + j, fp);
+        }
+      }
+
+      break;
+
+    case 3:
+      for (k = 0; k < initialVecLen; k++) {
+        for (j = 0; j < initialVecLen; j++) {
+          for (i = 0; i < initialVecLen; i++) {
+            index = computeOffset(0, k, j, i, initialVecLen, 3);
+
+            fixedPt* fp = (*outputArrPtr) + index;
+            *fp = initialVec[i];
+            multiply(fp, initialVec + j, fp);
+            multiply(fp, initialVec + k, fp);
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      for (l = 0; l < initialVecLen; l++) {
+        for (k = 0; k < initialVecLen; k++) {
+          for (j = 0; j < initialVecLen; j++) {
+            for (i = 0; i < initialVecLen; i++) {
+              index = computeOffset(l, k, j, i, initialVecLen, 4);
+
+              fixedPt* fp = (*outputArrPtr) + index;
+              *fp = initialVec[i];
+              multiply(fp, initialVec + j, fp);
+              multiply(fp, initialVec + k, fp);
+              multiply(fp, initialVec + l, fp);
+            }
+          }
+        }
+      }
+
+      break;
+  }
+}
+
+// returns the length of the resulting array
+static size_t
+computeTensorProduct(int64* initialVec, size_t initialVecLen, int numDims, int64** outputArrPtr)
+{
+  size_t i, j, k, l, index;
+
+  size_t outputArrLen = intPow(initialVecLen, numDims);
+  *outputArrPtr = malloc(outputArrLen * sizeof(int64));
+
+  switch(numDims) {
+    case 1:
+      for (i = 0; i < initialVecLen; i++) {
+        (*outputArrPtr)[i] = initialVec[i];
+      }
+
+      break;
+
+    case 2:
+      for (j = 0; j < initialVecLen; j++) {
+        for (i = 0; i < initialVecLen; i++) {
+          index = computeOffset(0, 0, j, i, initialVecLen, 2);
+          (*outputArrPtr)[index] = initialVec[i] * initialVec[j];
+        }
+      }
+
+      break;
+
+    case 3:
+      for (k = 0; k < initialVecLen; k++) {
+        for (j = 0; j < initialVecLen; j++) {
+          for (i = 0; i < initialVecLen; i++) {
+            index = computeOffset(0, k, j, i, initialVecLen, 3);
+            (*outputArrPtr)[index] = initialVec[i] * initialVec[j] * initialVec[k];
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      for (l = 0; l < initialVecLen; l++) {
+        for (k = 0; k < initialVecLen; k++) {
+          for (j = 0; j < initialVecLen; j++) {
+            for (i = 0; i < initialVecLen; i++) {
+              index = computeOffset(l, k, j, i, initialVecLen, 4);
+              (*outputArrPtr)[index] = initialVec[i] * initialVec[j] * initialVec[k] * initialVec[l];
+            }
+          }
+        }
+      }
+
+      break;
+  }
+
+  return outputArrLen;
+}
+
+static void
+generateGridWeights(fixedPt* f, fixedPt** gridWeights)
+{
+  fixedPt fourWeights[4];
+  generateWeights(f, fourWeights);
+
+  computeTensorProductDouble(fourWeights, 4, 2, gridWeights);
+}
+
+static void
+generateCubeWeights(fixedPt* f, fixedPt** cubeWeights)
+{
+  fixedPt fourWeights[4];
+  generateWeights(f, fourWeights);
+
+  computeTensorProductDouble(fourWeights, 4, 3, cubeWeights);
+}
+
+static void
+generateHyperCubeWeights(fixedPt* f, fixedPt** hyperCubeWeights)
+{
+  fixedPt fourWeights[4];
+  generateWeights(f, fourWeights);
+
+  computeTensorProductDouble(fourWeights, 4, 4, hyperCubeWeights);
+}
+
+// displace val by 2*(distance out of bounds), only if out of bounds
+static int64
+knockBack(int64 val, uint64 amplitude)
+{
+  int64 maxBound = (int64)amplitude;
+  int64 minBound = -maxBound;
+  if (val > maxBound) {
+    val -= 2 * (val - maxBound);
+  } else if (val < minBound) {
+    val += 2 * (minBound - val);
+  }
+
+  return val;
+}
+
+// uses 4 points: a dot b
+// a[] is strided
+static void
+dotProd1d(int64* a, size_t stride, fixedPt b[4], uint64 amplitude, int64* result)
+{
+  fixedPt acc = {0, 0};
+
+  int i;
+  for (i = 0; i < 4; i++) {
+    fixedPt val = {a[i*stride], 0};
+
+    multiply(&val, b + i, &val);
+    add(&acc, &val, &acc);
+  }
+
+  roundFixedPt(&acc, result);
+}
+
+// uses 4x4 points: a dot b
+// a[] is strided: strideI < strideJ
+static void
+dotProd2d(int64* a, size_t strideI, size_t strideJ, fixedPt b[16], uint64 amplitude, int64* result)
+{
+  fixedPt acc = {0, 0};
+
+  int i, j;
+  for (j = 0; j < 4; j++) {
+    for (i = 0; i < 4; i++) {
+      size_t aOffset = j*strideJ + i*strideI;
+      fixedPt val = {a[aOffset], 0};
+
+      size_t bOffset = computeOffset(0, 0, j, i, 4, 2);
+      multiply(&val, b + bOffset, &val);
+      add(&acc, &val, &acc);
+    }
+  }
+
+  roundFixedPt(&acc, result);
+}
+
+// uses 4x4x4 points: a dot b
+// a[] is strided: strideI < strideJ < strideK
+static void
+dotProd3d(int64* a, size_t strideI, size_t strideJ, size_t strideK, fixedPt b[64], uint64 amplitude, int64* result)
+{
+  fixedPt acc = {0, 0};
+
+  int i, j, k;
+  for (k = 0; k < 4; k++) {
+    for (j = 0; j < 4; j++) {
+      for (i = 0; i < 4; i++) {
+        size_t aOffset = k*strideK + j*strideJ + i*strideI;
+        fixedPt val = {a[aOffset], 0};
+
+        size_t bOffset = computeOffset(0, k, j, i, 4, 3);
+        multiply(&val, b + bOffset, &val);
+        add(&acc, &val, &acc);
+      }
+    }
+  }
+
+  roundFixedPt(&acc, result);
+}
+
+// uses 4x4x4x4 points: a dot b
+// a[] is strided: strideI < strideJ < strideK < strideL
+static void
+dotProd4d(int64* a, size_t strideI, size_t strideJ, size_t strideK, size_t strideL, fixedPt b[256], uint64 amplitude, int64* result)
+{
+  fixedPt acc = {0, 0};
+
+  int i, j, k, l;
+  for (l = 0; l < 4; l++) {
+    for (k = 0; k < 4; k++) {
+      for (j = 0; j < 4; j++) {
+        for (i = 0; i < 4; i++) {
+          size_t aOffset = l*strideL + k*strideK + j*strideJ + i*strideI;
+          fixedPt val = {a[aOffset], 0};
+
+          size_t bOffset = computeOffset(l, k, j, i, 4, 4);
+          multiply(&val, b + bOffset, &val);
+          add(&acc, &val, &acc);
+        }
+      }
+    }
+  }
+
+  roundFixedPt(&acc, result);
+}
+
+// uses 4 points
+static void
+edgeWeightedSum(int64* data, size_t stride, fixedPt* f, uint64 amplitude, int64* result)
+{
+  fixedPt weights[4];
+  generateWeights(f, weights);
+
+  int64 val;
+  dotProd1d(data, stride, weights, amplitude, &val);
+
+  *result = knockBack(val, amplitude);
+}
+
+// uses 4x4 points
+static void
+faceWeightedSum(int64* data, size_t strideI, size_t strideJ, fixedPt* f, uint64 amplitude, int64* result)
+{
+  fixedPt* weights;
+  generateGridWeights(f, &weights);
+
+  int64 val;
+  dotProd2d(data, strideI, strideJ, weights, amplitude, &val);
+  free(weights);
+
+  *result = knockBack(val, amplitude);
+}
+
+// uses 4x4x4 points
+static void
+cubeWeightedSum(int64* data, size_t strideI, size_t strideJ, size_t strideK, fixedPt* f, uint64 amplitude, int64* result)
+{
+  fixedPt* weights;
+  generateCubeWeights(f, &weights);
+
+  int64 val;
+  dotProd3d(data, strideI, strideJ, strideK, weights, amplitude, &val);
+  free(weights);
+
+  *result = knockBack(val, amplitude);
+}
+
+// uses 4x4x4x4 points
+static void
+hyperCubeWeightedSum(int64* data, size_t strideI, size_t strideJ, size_t strideK, size_t strideL, fixedPt* f, uint64 amplitude, int64* result)
+{
+  fixedPt* weights;
+  generateHyperCubeWeights(f, &weights);
+
+  int64 val;
+  dotProd4d(data, strideI, strideJ, strideK, strideL, weights, amplitude, &val);
+  free(weights);
+
+  *result = knockBack(val, amplitude);
+}
+
+// resulting array: [0 (inputArr) 0]
+// size n -> (n+2)
+static void
+createPadded1dArray(int64* inputArr, size_t inputSideLen, int64* paddedArr)
+{
+  memcpy(paddedArr + 1, inputArr, inputSideLen * sizeof(int64));
+
+  paddedArr[0] = 0;
+  paddedArr[inputSideLen + 1] = 0;
+}
+
+// resulting array's outermost rows and columns are zero
+// size m*n -> (m+2)*(n+2)
+static void
+createPadded2dArray(int64* inputArr, size_t inputSideLen, int64* paddedArr)
+{
+  size_t paddedSideLen = inputSideLen + 2;
+
+  size_t i, j;
+  for (j = 0; j < paddedSideLen; j++) {
+    for (i = 0; i < paddedSideLen; i++) {
+      int64 val;
+      if (j == 0 || j == (paddedSideLen-1)
+          || i == 0 || i == (paddedSideLen-1)) {
+        val = 0;
+      } else {
+        size_t inputIndex = computeOffset(0, 0, j-1, i-1, inputSideLen, 2);
+        val = inputArr[inputIndex];
+      }
+
+      size_t paddedIndex = computeOffset(0, 0, j, i, paddedSideLen, 2);
+      paddedArr[paddedIndex] = val;
+    }
+  }
+}
+
+// resulting array's outermost entries are zero
+// size m*n*p -> (m+2)*(n+2)*(p+2)
+static void
+createPadded3dArray(int64* inputArr, size_t inputSideLen, int64* paddedArr)
+{
+  size_t paddedSideLen = inputSideLen + 2;
+
+  size_t i, j, k;
+  for (k = 0; k < paddedSideLen; k++) {
+    for (j = 0; j < paddedSideLen; j++) {
+      for (i = 0; i < paddedSideLen; i++) {
+        int64 val;
+        if (k == 0 || k == (paddedSideLen-1)
+            || j == 0 || j == (paddedSideLen-1)
+            || i == 0 || i == (paddedSideLen-1)) {
+          val = 0;
+        } else {
+          size_t inputIndex = computeOffset(0, k-1, j-1, i-1, inputSideLen, 3);
+          val = inputArr[inputIndex];
+        }
+
+        size_t paddedIndex = computeOffset(0, k, j, i, paddedSideLen, 3);
+        paddedArr[paddedIndex] = val;
+      }
+    }
+  }
+}
+
+// resulting array's outermost entries are zero
+// size m*n*p*q -> (m+2)*(n+2)*(p+2)*(q+2)
+static void
+createPadded4dArray(int64* inputArr, size_t inputSideLen, int64* paddedArr)
+{
+  size_t paddedSideLen = inputSideLen + 2;
+
+  size_t i, j, k, l;
+  for (l = 0; l < paddedSideLen; l++) {
+    for (k = 0; k < paddedSideLen; k++) {
+      for (j = 0; j < paddedSideLen; j++) {
+        for (i = 0; i < paddedSideLen; i++) {
+          int64 val;
+          if (l == 0 || l == (paddedSideLen-1)
+              || k == 0 || k == (paddedSideLen-1)
+              || j == 0 || j == (paddedSideLen-1)
+              || i == 0 || i == (paddedSideLen-1)) {
+            val = 0;
+          } else {
+            size_t inputIndex = computeOffset(l-1, k-1, j-1, i-1, inputSideLen, 4);
+            val = inputArr[inputIndex];
+          }
+
+          size_t paddedIndex = computeOffset(l, k, j, i, paddedSideLen, 4);
+          paddedArr[paddedIndex] = val;
+        }
+      }
+    }
+  }
+}
+
+// Generate a larger array containing all the original array's points
+// plus entries in between adjacent points from the original array
+//
+// These new entries are computed as weighted sums from
+// its local neighborhood , plus some random noise
+static void
+produceLargerNoisedArray(int64* inputArr, size_t inputSideLen, int numDims, uint64 amplitude, fixedPt* f, int64* outputArr)
+{
+  // pad (border/enclose) inputArr with zeros
+  size_t paddedSideLen = inputSideLen + 2;
+  size_t paddedTotalLen = intPow(paddedSideLen, numDims);
+  int64* paddedInputArr = malloc(paddedTotalLen * sizeof(int64));
+
+  size_t outputSideLen = 2*inputSideLen - 1;
+  size_t maxI = outputSideLen, maxJ = 1, maxK = 1, maxL = 1;
+  switch (numDims) {
+    case 1:
+      createPadded1dArray(inputArr, inputSideLen, paddedInputArr);
+      break;
+
+    case 2:
+      createPadded2dArray(inputArr, inputSideLen, paddedInputArr);
+      maxJ = outputSideLen;
+      break;
+
+    case 3:
+      createPadded3dArray(inputArr, inputSideLen, paddedInputArr);
+      maxJ = outputSideLen;
+      maxK = outputSideLen;
+      break;
+
+    case 4:
+      createPadded4dArray(inputArr, inputSideLen, paddedInputArr);
+      maxJ = outputSideLen;
+      maxK = outputSideLen;
+      maxL = outputSideLen;
+      break;
+  }
+
+  size_t outI, outJ, outK, outL;
+  for (outL = 0; outL < maxL; outL++) {
+    size_t inL = outL / 2;
+
+    for (outK = 0; outK < maxK; outK++) {
+      size_t inK = outK / 2;
+
+      for (outJ = 0; outJ < maxJ; outJ++) {
+        size_t inJ = outJ / 2;
+
+        for (outI = 0; outI < maxI; outI++) {
+          size_t inI = outI / 2;
+
+          int64* firstElementPtr = paddedInputArr;
+          size_t stride;
+          int64 val;
+
+
+          if (outL % 2 == 0) {
+            if (outK % 2 == 0) {
+              if (outJ % 2 == 0) {
+                if (outI % 2 == 0) {
+                  // (0000) vertex
+                  size_t inputIndex = computeOffset(inL, inK, inJ, inI, inputSideLen, numDims);
+
+                  val = inputArr[inputIndex];
+                } else {
+                  // (0001) edge centered point (i-direction)
+                  firstElementPtr += computeOffset(inL+1, inK+1, inJ+1, inI, paddedSideLen, numDims);
+
+                  edgeWeightedSum(firstElementPtr, 1, f, amplitude, &val);
+                }
+
+              } else {
+                if (outI % 2 == 0) {
+                  // (0010) edge centered point (j-direction)
+                  firstElementPtr += computeOffset(inL+1, inK+1, inJ, inI+1, paddedSideLen, numDims);
+                  stride = paddedSideLen;
+
+                  edgeWeightedSum(firstElementPtr, stride, f, amplitude, &val);
+                } else {
+                  // (0011) face centered point (ij plane)
+                  firstElementPtr += computeOffset(inL+1, inK+1, inJ, inI, paddedSideLen, numDims);
+                  size_t secondStride = paddedSideLen;
+
+                  faceWeightedSum(firstElementPtr, 1, secondStride, f, amplitude, &val);
+                }
+              }
+            } else {
+              if (outJ % 2 == 0) {
+                if (outI % 2 == 0) {
+                  // (0100) edge centered point (k-direction)
+                  firstElementPtr += computeOffset(inL+1, inK, inJ+1, inI+1, paddedSideLen, numDims);
+                  stride = intPow(paddedSideLen, 2);
+
+                  edgeWeightedSum(firstElementPtr, stride, f, amplitude, &val);
+                } else {
+                  // (0101) face centered point (ik plane)
+                  firstElementPtr += computeOffset(inL+1, inK, inJ+1, inI, paddedSideLen, numDims);
+                  size_t secondStride = intPow(paddedSideLen, 2);
+
+                  faceWeightedSum(firstElementPtr, 1, secondStride, f, amplitude, &val);
+                }
+
+              } else {
+                if (outI % 2 == 0) {
+                  // (0110) face centered point (jk plane)
+                  firstElementPtr += computeOffset(inL+1, inK, inJ, inI+1, paddedSideLen, numDims);
+                  stride = paddedSideLen;
+                  size_t secondStride = intPow(paddedSideLen, 2);
+
+                  faceWeightedSum(firstElementPtr, stride, secondStride, f, amplitude, &val);
+                } else {
+                  // (0111) cube centered point (ijk)
+                  firstElementPtr += computeOffset(inL+1, inK, inJ, inI, paddedSideLen, numDims);
+                  size_t secondStride = paddedSideLen;
+                  size_t thirdStride = intPow(paddedSideLen, 2);
+
+                  cubeWeightedSum(firstElementPtr, 1, secondStride, thirdStride, f, amplitude, &val);
+                }
+              }
+
+            }
+
+          } else {
+            if (outK % 2 == 0) {
+              if (outJ % 2 == 0) {
+                if (outI % 2 == 0) {
+                  // (1000) edge centered point (l-direction)
+                  firstElementPtr += computeOffset(inL, inK+1, inJ+1, inI+1, paddedSideLen, numDims);
+                  stride = intPow(paddedSideLen, 3);
+
+                  edgeWeightedSum(firstElementPtr, stride, f, amplitude, &val);
+                } else {
+                  // (1001) face centered point (il plane)
+                  firstElementPtr += computeOffset(inL, inK+1, inJ+1, inI, paddedSideLen, numDims);
+                  stride = 1;
+                  size_t secondStride = intPow(paddedSideLen, 3);
+
+                  faceWeightedSum(firstElementPtr, stride, secondStride, f, amplitude, &val);
+                }
+
+              } else {
+                if (outI % 2 == 0) {
+                  // (1010) face centered point (jl plane)
+                  firstElementPtr += computeOffset(inL, inK+1, inJ, inI+1, paddedSideLen, numDims);
+                  stride = paddedSideLen;
+                  size_t secondStride = intPow(paddedSideLen, 3);
+
+                  faceWeightedSum(firstElementPtr, stride, secondStride, f, amplitude, &val);
+                } else {
+                  // (1011) cube centered point (ijl)
+                  firstElementPtr += computeOffset(inL, inK+1, inJ, inI, paddedSideLen, numDims);
+                  size_t secondStride = paddedSideLen;
+                  size_t thirdStride = intPow(paddedSideLen, 3);
+
+                  cubeWeightedSum(firstElementPtr, 1, secondStride, thirdStride, f, amplitude, &val);
+                }
+              }
+            } else {
+              if (outJ % 2 == 0) {
+                if (outI % 2 == 0) {
+                  // (1100) face centered point (kl plane)
+                  firstElementPtr += computeOffset(inL, inK, inJ+1, inI+1, paddedSideLen, numDims);
+                  stride = intPow(paddedSideLen, 2);
+                  size_t secondStride = intPow(paddedSideLen, 3);
+
+                  faceWeightedSum(firstElementPtr, stride, secondStride, f, amplitude, &val);
+                } else {
+                  // (1101) cube centered point (ikl)
+                  firstElementPtr += computeOffset(inL, inK, inJ+1, inI, paddedSideLen, numDims);
+                  size_t secondStride = intPow(paddedSideLen, 2);
+                  size_t thirdStride = intPow(paddedSideLen, 3);
+
+                  cubeWeightedSum(firstElementPtr, 1, secondStride, thirdStride, f, amplitude, &val);
+                }
+
+              } else {
+                if (outI % 2 == 0) {
+                  // (1110) cube centered point (jkl)
+                  firstElementPtr += computeOffset(inL, inK, inJ, inI+1, paddedSideLen, numDims);
+                  stride = paddedSideLen;
+                  size_t secondStride = intPow(paddedSideLen, 2);
+                  size_t thirdStride = intPow(paddedSideLen, 3);
+
+                  cubeWeightedSum(firstElementPtr, stride, secondStride, thirdStride, f, amplitude, &val);
+                } else {
+                  // (1111) hyper-cube centered point (ijkl)
+                  firstElementPtr += computeOffset(inL, inK, inJ, inI, paddedSideLen, numDims);
+                  size_t secondStride = paddedSideLen;
+                  size_t thirdStride = intPow(paddedSideLen, 2);
+                  size_t fourthStride = intPow(paddedSideLen, 3);
+
+                  hyperCubeWeightedSum(firstElementPtr, 1, secondStride, thirdStride, fourthStride, f, amplitude, &val);
+                }
+              }
+
+            }
+
+          }
+
+          size_t outputIndex = computeOffset(outL, outK, outJ, outI, outputSideLen, numDims);
+          outputArr[outputIndex] = val;
+
+        }
+      }
+
+    }
+  }
+
+  free(paddedInputArr);
+}
+
+// if vals are outside [-amplitude, amplitude], then set them to the boundary value
+// *this function should do nothing*
+static void
+clampValsIntoRange(int64* arr, size_t n, uint64 amplitude)
+{
+  int64 maxBound = (int64)amplitude;
+  int64 minBound = -maxBound;
+  size_t i;
+  for (i = 0; i < n; i++) {
+    if (arr[i] < minBound) {
+      arr[i] = minBound;
+    } else if (arr[i] > maxBound) {
+      arr[i] = maxBound;
+    }
+  }
+}
+
+static void
+copyArraySubset(int64* inputArr, size_t inputSideLen, int numDims, int64* outputArr, size_t outputSideLen)
+{
+  size_t i, j, k, l;
+  switch(numDims) {
+    case 1:
+      memcpy(outputArr, inputArr, outputSideLen * sizeof(int64));
+      break;
+
+    case 2:
+      for (j = 0; j < outputSideLen; j++) {
+        for (i = 0; i < outputSideLen; i++) {
+          size_t outputIndex = computeOffset(0, 0, j, i, outputSideLen, 2);
+          size_t inputIndex = computeOffset(0, 0, j, i, inputSideLen, 2);
+          outputArr[outputIndex] = inputArr[inputIndex];
+        }
+      }
+
+      break;
+
+    case 3:
+      for (k = 0; k < outputSideLen; k++) {
+        for (j = 0; j < outputSideLen; j++) {
+          for (i = 0; i < outputSideLen; i++) {
+            size_t outputIndex = computeOffset(0, k, j, i, outputSideLen, 3);
+            size_t inputIndex = computeOffset(0, k, j, i, inputSideLen, 3);
+            outputArr[outputIndex] = inputArr[inputIndex];
+          }
+        }
+      }
+
+      break;
+
+    case 4:
+      for (l = 0; l < outputSideLen; l++) {
+        for (k = 0; k < outputSideLen; k++) {
+          for (j = 0; j < outputSideLen; j++) {
+            for (i = 0; i < outputSideLen; i++) {
+              size_t outputIndex = computeOffset(l, k, j, i, outputSideLen, 4);
+              size_t inputIndex = computeOffset(l, k, j, i, inputSideLen, 4);
+              outputArr[outputIndex] = inputArr[inputIndex];
+            }
+          }
+        }
+      }
+
+      break;
+  }
+}
+
+// this will destroy (free) inputArr
+static void
+generateNRandInts(int64* inputArr, size_t inputSideLen, size_t minTotalElements, int numDims, uint64 amplitude, int64** outputArrPtr, size_t* outputSideLen, size_t* outputTotalLen)
+{
+  // parameters used for random noise
+  fixedPt f = {7, 0};
+  fixedPt scaleFVal = {0, 0xaaaaaaaa};
+
+  int64* currArr = inputArr;
+  size_t currSideLen = inputSideLen;
+  size_t currTotalLen = intPow(inputSideLen, numDims);
+
+  int64* nextArr = NULL;
+  size_t nextSideLen = 0, nextTotalLen = 0;
+
+  while(currTotalLen < minTotalElements) {
+    nextSideLen = 2*currSideLen - 1;
+    nextTotalLen = intPow(nextSideLen, numDims);
+
+    nextArr = malloc(nextTotalLen * sizeof(int64));
+
+    produceLargerNoisedArray(currArr, currSideLen, numDims, amplitude, &f, nextArr);
+
+    free(currArr);
+    currArr = nextArr;
+    currSideLen = nextSideLen;
+    currTotalLen = nextTotalLen;
+
+    // reduce random noise multiplier
+    multiply(&f, &scaleFVal, &f);
+  }
+
+  // for safety (expected nop)
+  clampValsIntoRange(nextArr, nextTotalLen, amplitude);
+
+  // initialize output data
+  *outputSideLen = nextSideLen;
+  *outputTotalLen = nextTotalLen;
+  *outputArrPtr = malloc(*outputTotalLen * sizeof(int64));
+
+  // store output data
+  copyArraySubset(nextArr, nextSideLen, numDims, *outputArrPtr, *outputSideLen);
+
+  free(nextArr);
+}
+
+static void
+cast64ArrayTo32(int64* inputArr, size_t arrLen, int32* outputArr)
+{
+  size_t i;
+  for (i = 0; i < arrLen; i++) {
+    outputArr[i] = (int32)inputArr[i];
+  }
+}
+
+static void
+convertIntArrToFloatArr(int64* inputArr, size_t arrLen, float* outputArr)
+{
+  size_t i;
+  for (i = 0; i < arrLen; i++) {
+    outputArr[i] = ldexpf((float)inputArr[i], -12);
+  }
+}
+
+static void
+convertIntArrToDoubleArr(int64* inputArr, size_t arrLen, double* outputArr)
+{
+  size_t i;
+  for (i = 0; i < arrLen; i++) {
+    outputArr[i] = ldexp((double)inputArr[i], -26);
+  }
+}
+
+// generate array that will be initially fed into generateNRandInts()
+static void
+generateInitialArray(int64* initialVec, size_t initialVecLen, int numDims, uint64 amplitude, int64** outputArrPtr)
+{
+  size_t totalLen = computeTensorProduct(initialVec, initialVecLen, numDims, outputArrPtr);
+
+  // compute signed amplitudes
+  int64 positiveAmp = (int64)amplitude;
+  int64 negativeAmp = -positiveAmp;
+
+  // set non-zero values to signed amplitude
+  size_t i;
+  for (i = 0; i < totalLen; i++) {
+    if ((*outputArrPtr)[i] > 0) {
+      (*outputArrPtr)[i] = positiveAmp;
+    } else if ((*outputArrPtr)[i] < 0) {
+      (*outputArrPtr)[i] = negativeAmp;
+    }
+  }
+}
+
+void
+generateSmoothRandInts64(size_t minTotalElements, int numDims, int amplitudeExp, int64** outputArrPtr, size_t* outputSideLen, size_t* outputTotalLen)
+{
+  uint64 amplitude = ((uint64)1 << amplitudeExp) - 1;
+
+  // initial vector for tensor product (will be scaled to amplitude)
+  size_t initialSideLen = 5;
+  int64* initialVec = malloc(initialSideLen * sizeof(int64));
+  initialVec[0] = 0;
+  initialVec[1] = 1;
+  initialVec[2] = 0;
+  initialVec[3] = -1;
+  initialVec[4] = 0;
+
+  // initial array (tensor product of initial vector, also scaled to amplitude)
+  int64* inputArr;
+  generateInitialArray(initialVec, initialSideLen, numDims, amplitude, &inputArr);
+  free(initialVec);
+
+  resetRandGen();
+
+  // generate data (always done with int64)
+  // inputArr is free'd inside function
+  generateNRandInts(inputArr, initialSideLen, minTotalElements, numDims, amplitude, outputArrPtr, outputSideLen, outputTotalLen);
+}
+
+void
+generateSmoothRandInts32(size_t minTotalElements, int numDims, int amplitudeExp, int32** outputArr32Ptr, size_t* outputSideLen, size_t* outputTotalLen)
+{
+  int64* randArr64;
+  generateSmoothRandInts64(minTotalElements, numDims, amplitudeExp, &randArr64, outputSideLen, outputTotalLen);
+
+  *outputArr32Ptr = calloc(*outputTotalLen, sizeof(int32));
+  cast64ArrayTo32(randArr64, *outputTotalLen, *outputArr32Ptr);
+
+  free(randArr64);
+}
+
+void
+generateSmoothRandFloats(size_t minTotalElements, int numDims, float** outputArrPtr, size_t* outputSideLen, size_t* outputTotalLen)
+{
+  int64* intArr;
+  generateSmoothRandInts64(minTotalElements, numDims, FLOAT_MANTISSA_BITS, &intArr, outputSideLen, outputTotalLen);
+
+  *outputArrPtr = calloc(*outputTotalLen, sizeof(float));
+  convertIntArrToFloatArr(intArr, *outputTotalLen, *outputArrPtr);
+
+  free(intArr);
+}
+
+void
+generateSmoothRandDoubles(size_t minTotalElements, int numDims, double** outputArrPtr, size_t* outputSideLen, size_t* outputTotalLen)
+{
+  int64* intArr;
+  generateSmoothRandInts64(minTotalElements, numDims, DOUBLE_MANTISSA_BITS, &intArr, outputSideLen, outputTotalLen);
+
+  *outputArrPtr = calloc(*outputTotalLen, sizeof(double));
+  convertIntArrToDoubleArr(intArr, *outputTotalLen, *outputArrPtr);
+
+  free(intArr);
+}
diff --git a/tests/utils/genSmoothRandNums.h b/tests/utils/genSmoothRandNums.h
new file mode 100644
index 00000000..9d43c72f
--- /dev/null
+++ b/tests/utils/genSmoothRandNums.h
@@ -0,0 +1,33 @@
+#ifndef GEN_SMOOTH_RAND_INTS_H
+#define GEN_SMOOTH_RAND_INTS_H
+
+#include "zfp/internal/zfp/types.h"
+
+// used to compute (square) array sizes
+size_t
+intPow(size_t base, int exponent);
+
+// a double pointer is passed because memory allocation
+// is taken care of within the functions
+
+// generate randomly correlated integers in range:
+// [-(2^amplitudeExp - 1), 2^amplitudeExp - 1] (64 bit)
+void
+generateSmoothRandInts64(size_t minTotalElements, int numDims, int amplitudeExp, int64** outputArr, size_t* outputSideLen, size_t* outputTotalLen);
+
+// generate randomly correlated integers in range:
+// [-(2^amplitudeExp - 1), 2^amplitudeExp - 1] (32 bit)
+void
+generateSmoothRandInts32(size_t minTotalElements, int numDims, int amplitudeExp, int32** outputArr32Ptr, size_t* outputSideLen, size_t* outputTotalLen);
+
+// generate randomly correlated floats in range:
+// [-(2^11), 2^11 - 2^(-12)]
+void
+generateSmoothRandFloats(size_t minTotalElements, int numDims, float** outputArrPtr, size_t* outputSideLen, size_t* outputTotalLen);
+
+// generate randomly correlated doubles in range:
+// [-(2^26), 2^26 - 2^(-26)]
+void
+generateSmoothRandDoubles(size_t minTotalElements, int numDims, double** outputArrPtr, size_t* outputSideLen, size_t* outputTotalLen);
+
+#endif
diff --git a/tests/utils/rand32.c b/tests/utils/rand32.c
new file mode 100644
index 00000000..61697395
--- /dev/null
+++ b/tests/utils/rand32.c
@@ -0,0 +1,43 @@
+#include <math.h>
+#include "rand32.h"
+
+#define SEED 5
+
+// POSIX rand48
+#define MULTIPLIER 0x5deece66d
+#define INCREMENT 0xb
+#define MODULO ((uint64)1 << 48)
+#define MASK_31 (0x7fffffffu)
+
+static uint64 X;
+
+void
+resetRandGen()
+{
+  X = SEED;
+}
+
+// returns integer [0, 2^31 - 1]
+uint32
+nextUnsignedRand()
+{
+  X = (MULTIPLIER*X + INCREMENT) % MODULO;
+  return (uint32)((X >> 16) & MASK_31);
+}
+
+// returns integer [-(2^30), 2^30 - 1]
+int32
+nextSignedRandInt()
+{
+  return (int32)nextUnsignedRand() - 0x40000000;
+}
+
+// returns float [-(2^11), 2^11 - 2^(-12)]
+float
+nextSignedRandFlPt()
+{
+  // 23 bit signed number
+  uint32 uVal = (nextUnsignedRand() >> 7) & 0x00ffffff;
+  int32 sVal = (int32)uVal - 0x800000;
+  return ldexpf((float)sVal, -12);
+}
diff --git a/tests/utils/rand32.h b/tests/utils/rand32.h
new file mode 100644
index 00000000..a47328db
--- /dev/null
+++ b/tests/utils/rand32.h
@@ -0,0 +1,22 @@
+#ifndef RAND_32_H
+#define RAND_32_H
+
+#include "include/zfp/internal/zfp/types.h"
+
+// reset seed
+void
+resetRandGen();
+
+// returns integer [0, 2^31 - 1]
+uint32
+nextUnsignedRand();
+
+// returns integer [-(2^30), 2^30 - 1]
+int32
+nextSignedRandInt();
+
+// returns float [-(2^11), 2^11 - 2^(-12)]
+float
+nextSignedRandFlPt();
+
+#endif
diff --git a/tests/utils/rand64.c b/tests/utils/rand64.c
new file mode 100644
index 00000000..afa5c4c7
--- /dev/null
+++ b/tests/utils/rand64.c
@@ -0,0 +1,52 @@
+#include <math.h>
+#include "rand64.h"
+
+#define SEED 5
+
+// https://nuclear.llnl.gov/CNP/rng/rngman/node4.html
+#define MULTIPLIER (2862933555777941757uLL)
+#define INCREMENT (3037000493uLL)
+
+#define MAX_RAND_63 (0x7fffffffffffffffuLL)
+
+static uint64 X;
+
+void
+resetRandGen()
+{
+  X = SEED;
+}
+
+// returns integer [0, 2^63 - 1]
+uint64
+nextUnsignedRand()
+{
+  // (mod 2^64)
+  X = MULTIPLIER*X + INCREMENT;
+  return (uint64)(X & MAX_RAND_63);
+}
+
+// returns integer [-(2^62), 2^62 - 1]
+int64
+nextSignedRandInt()
+{
+  uint64 uDisplace = (uint64)1 << 62;
+  return (int64)nextUnsignedRand() - (int64)uDisplace;
+}
+
+// returns double [-(2^26), 2^26 - 2^(-26)]
+double
+nextSignedRandFlPt()
+{
+  // 52 bit signed number
+  uint64 uVal = (nextUnsignedRand() >> 11) & 0x1fffffffffffff;
+  int64 sVal = (int64)uVal - 0x10000000000000;
+  return ldexp((double)sVal, -26);
+}
+
+// returns integer [0, 2^32 - 1]
+uint32
+nextRand32()
+{
+  return (uint32)(nextUnsignedRand() >> 31);
+}
diff --git a/tests/utils/rand64.h b/tests/utils/rand64.h
new file mode 100644
index 00000000..4c4ec161
--- /dev/null
+++ b/tests/utils/rand64.h
@@ -0,0 +1,26 @@
+#ifndef RAND_64_H
+#define RAND_64_H
+
+#include "include/zfp/internal/zfp/types.h"
+
+// reset seed
+void
+resetRandGen();
+
+// returns integer [0, 2^63 - 1]
+uint64
+nextUnsignedRand();
+
+// returns integer [-(2^62), 2^62 - 1]
+int64
+nextSignedRandInt();
+
+// returns double [-(2^26), 2^26 - 2^(-26)]
+double
+nextSignedRandFlPt();
+
+// returns integer [0, 2^32 - 1]
+uint32
+nextRand32();
+
+#endif
diff --git a/tests/utils/stridedOperations.c b/tests/utils/stridedOperations.c
new file mode 100644
index 00000000..3b7868a6
--- /dev/null
+++ b/tests/utils/stridedOperations.c
@@ -0,0 +1,133 @@
+#include <string.h>
+#include "stridedOperations.h"
+
+// reversed array ([inputArrLen - 1], [inputArrLen - 2], ..., [1], [0])
+void
+reverseArray(void* inputArr, void* outputArr, size_t inputArrLen, zfp_type zfpType)
+{
+  const size_t elementSizeBytes = zfp_type_size(zfpType);
+
+  // move ptr to last element
+  inputArr = (char *)inputArr + elementSizeBytes * (inputArrLen - 1);
+
+  size_t i;
+  for (i = 0; i < inputArrLen; i++) {
+    memcpy(outputArr, inputArr, elementSizeBytes);
+
+    outputArr = (char *)outputArr + elementSizeBytes;
+    inputArr = (char *)inputArr - elementSizeBytes;
+  }
+}
+
+// interleaved array ([0], [0], [1], [1], [2], ...)
+void
+interleaveArray(void* inputArr, void* outputArr, size_t inputArrLen, zfp_type zfpType)
+{
+  const size_t elementSizeBytes = zfp_type_size(zfpType);
+
+  size_t i;
+  for (i = 0; i < inputArrLen; i++) {
+    memcpy(outputArr, inputArr, elementSizeBytes);
+    memcpy((char *)outputArr + elementSizeBytes, inputArr, elementSizeBytes);
+
+    inputArr = (char *)inputArr + elementSizeBytes;
+    outputArr = (char *)outputArr + 2 * elementSizeBytes;
+  }
+}
+
+int
+permuteSquareArray(void* inputArr, void* outputArr, size_t sideLen, int dims, zfp_type zfpType)
+{
+  const size_t elementSizeBytes = zfp_type_size(zfpType);
+
+  size_t i, j, k, l;
+
+  switch(dims) {
+    case 4:
+      // permute ijkl lkji
+      for (l = 0; l < sideLen; l++) {
+        for (k = 0; k < sideLen; k++) {
+          for (j = 0; j < sideLen; j++) {
+            for (i = 0; i < sideLen; i++) {
+              size_t index = l*sideLen*sideLen*sideLen + k*sideLen*sideLen + j*sideLen + i;
+              size_t transposedIndex = i*sideLen*sideLen*sideLen + j*sideLen*sideLen + k*sideLen + l;
+              memcpy((char *)outputArr + elementSizeBytes * index, (char *)inputArr + elementSizeBytes * transposedIndex, elementSizeBytes);
+            }
+          }
+        }
+      }
+      break;
+
+    case 3:
+      // permute ijk to kji
+      for (k = 0; k < sideLen; k++) {
+        for (j = 0; j < sideLen; j++) {
+          for (i = 0; i < sideLen; i++) {
+            size_t index = k*sideLen*sideLen + j*sideLen + i;
+            size_t transposedIndex = i*sideLen*sideLen + j*sideLen + k;
+            memcpy((char *)outputArr + elementSizeBytes * index, (char *)inputArr + elementSizeBytes * transposedIndex, elementSizeBytes);
+          }
+        }
+      }
+      break;
+
+    case 2:
+      // permute ij to ji
+      for (j = 0; j < sideLen; j++) {
+        for (i = 0; i < sideLen; i++) {
+          size_t index = j*sideLen + i;
+          size_t transposedIndex = i*sideLen + j;
+          memcpy((char *)outputArr + elementSizeBytes * index, (char *)inputArr + elementSizeBytes * transposedIndex, elementSizeBytes);
+        }
+      }
+      break;
+
+    // considered an error if requested to permute a 1 dimensional array
+    case 1:
+    default:
+      return 1;
+  }
+
+  return 0;
+}
+
+static void
+completeStrides(int dims, size_t n[4], ptrdiff_t s[4])
+{
+  int i;
+  for (i = 1; i < dims; i++) {
+    s[i] = s[i-1] * (ptrdiff_t)n[i-1];
+  }
+}
+
+void
+getReversedStrides(int dims, size_t n[4], ptrdiff_t s[4])
+{
+  s[0] = -1;
+  completeStrides(dims, n, s);
+}
+
+void
+getInterleavedStrides(int dims, size_t n[4], ptrdiff_t s[4])
+{
+  s[0] = 2;
+  completeStrides(dims, n, s);
+}
+
+void
+getPermutedStrides(int dims, size_t n[4], ptrdiff_t s[4])
+{
+  if (dims == 4) {
+    s[0] = (ptrdiff_t)(n[0] * n[1] * n[2]);
+    s[1] = (ptrdiff_t)(n[0] * n[1]);
+    s[2] = (ptrdiff_t)n[0];
+    s[3] = 1;
+  } else if (dims == 3) {
+    s[0] = (ptrdiff_t)(n[0] * n[1]);
+    s[1] = (ptrdiff_t)n[0];
+    s[2] = 1;
+  } else if (dims == 2) {
+    s[0] = (ptrdiff_t)n[0];
+    s[1] = 1;
+  }
+}
diff --git a/tests/utils/stridedOperations.h b/tests/utils/stridedOperations.h
new file mode 100644
index 00000000..7e997b33
--- /dev/null
+++ b/tests/utils/stridedOperations.h
@@ -0,0 +1,37 @@
+#ifndef STRIDED_OPERATIONS_H
+#define STRIDED_OPERATIONS_H
+
+#include <stddef.h>
+#include "zfp.h"
+
+typedef enum {
+  AS_IS = 0,
+  PERMUTED = 1,
+  INTERLEAVED = 2,
+  REVERSED = 3,
+} stride_config;
+
+// reversed array ([inputLen - 1], [inputLen - 2], ..., [1], [0])
+void
+reverseArray(void* inputArr, void* outputArr, size_t inputArrLen, zfp_type zfpType);
+
+// interleaved array ([0], [0], [1], [1], [2], ...)
+void
+interleaveArray(void* inputArr, void* outputArr, size_t inputArrLen, zfp_type zfpType);
+
+// ijkl -> lkji, or for lower dims (ex. ij -> ji)
+// returns 0 on success, 1 on failure
+// (defined to fail if dims == 1)
+int
+permuteSquareArray(void* inputArr, void* outputArr, size_t sideLen, int dims, zfp_type zfpType);
+
+void
+getReversedStrides(int dims, size_t n[4], ptrdiff_t s[4]);
+
+void
+getInterleavedStrides(int dims, size_t n[4], ptrdiff_t s[4]);
+
+void
+getPermutedStrides(int dims, size_t n[4], ptrdiff_t s[4]);
+
+#endif
diff --git a/tests/utils/testMacros.h b/tests/utils/testMacros.h
new file mode 100644
index 00000000..4791512d
--- /dev/null
+++ b/tests/utils/testMacros.h
@@ -0,0 +1,39 @@
+// generate test function names containing macros
+#define _catFuncStr2(x, y) x ## y
+#define _catFunc2(x, y) _catFuncStr2(x, y)
+
+#define _catFuncStr3(x, y, z) x ## y ## z
+#define _catFunc3(x, y, z) _catFuncStr3(x, y, z)
+
+#define _cat_cmocka_unit_test(x) cmocka_unit_test(x)
+#define _cmocka_unit_test(x) _cat_cmocka_unit_test(x)
+
+#define _cat_cmocka_unit_test_setup_teardown(x, y, z) cmocka_unit_test_setup_teardown(x, y, z)
+#define _cmocka_unit_test_setup_teardown(x, y, z) _cat_cmocka_unit_test_setup_teardown(x, y, z)
+
+#ifdef PRINT_CHECKSUMS
+  #include <stdio.h>
+  #include "zfpChecksums.h"
+
+  // for both, x is freshly computed checksum from current compression-lib implementation
+  // where-as y is the stored constant checksum
+
+  // a triplet (key1, key2, value) is printed
+  // key1: identifies what kind of compression occurred, on what input, etc
+  // key2: identifies array dimensions
+  // value: checksum
+  // (macro substitutes "printf() && 0" because we want conditional to fail after executing printf)
+  #define ASSERT_EQ_CHECKSUM(dims, zfpType, computedChecksum, key1, key2) printf("{UINT64C(0x%" PRIx64 "), UINT64C(0x%" PRIx64 "), UINT64C(0x%" PRIx64 ")},\n", key1, key2, computedChecksum)
+  #define COMPARE_NEQ_CHECKSUM(dims, zfpType, computedChecksum, key1, key2) printf("{UINT64C(0x%" PRIx64 "), UINT64C(0x%" PRIx64 "), UINT64C(0x%" PRIx64 ")},\n", key1, key2, computedChecksum) && 0
+#else
+  #define ASSERT_EQ_CHECKSUM(dims, zfpType, computedChecksum, key1, key2) assert_int_equal(computedChecksum, getChecksumByKey(dims, zfpType, key1, key2))
+  #define COMPARE_NEQ_CHECKSUM(dims, zfpType, computedChecksum, key1, key2) (computedChecksum != getChecksumByKey(dims, zfpType, key1, key2))
+#endif
+
+// for condensing repeat tests across different dimensionalities into singular tests
+#define _repeat_arg(x, n)  _repeatN(x, n)
+#define _repeatN(x, n) _repeat ## n ( x )
+#define _repeat1(x) x
+#define _repeat2(x) x, x
+#define _repeat3(x) x, x, x
+#define _repeat4(x) x, x, x, x
diff --git a/tests/utils/zfpChecksums.c b/tests/utils/zfpChecksums.c
new file mode 100644
index 00000000..f9b1f737
--- /dev/null
+++ b/tests/utils/zfpChecksums.c
@@ -0,0 +1,171 @@
+#include "zfp/internal/zfp/types.h"
+#include "zfpChecksums.h"
+
+#define NUM_INT_CHECKSUMS 19
+#define NUM_FL_PT_CHECKSUMS 35
+
+#define FAILED_CHECKSUM (UINT64C(0xffffffffffffffff))
+
+#ifndef PRINT_CHECKSUMS
+
+// raw checksums as static arrays
+#include "constants/checksums/1dDouble.h"
+#include "constants/checksums/1dFloat.h"
+#include "constants/checksums/1dInt32.h"
+#include "constants/checksums/1dInt64.h"
+
+#include "constants/checksums/2dDouble.h"
+#include "constants/checksums/2dFloat.h"
+#include "constants/checksums/2dInt32.h"
+#include "constants/checksums/2dInt64.h"
+
+#include "constants/checksums/3dDouble.h"
+#include "constants/checksums/3dFloat.h"
+#include "constants/checksums/3dInt32.h"
+#include "constants/checksums/3dInt64.h"
+
+#include "constants/checksums/4dDouble.h"
+#include "constants/checksums/4dFloat.h"
+#include "constants/checksums/4dInt32.h"
+#include "constants/checksums/4dInt64.h"
+
+// [dimensionality][zfp_type]
+static const checksum_tuples* checksums[4][4] = {
+  {
+    _1dInt32Checksums,
+    _1dInt64Checksums,
+    _1dFloatChecksums,
+    _1dDoubleChecksums,
+  },
+  {
+    _2dInt32Checksums,
+    _2dInt64Checksums,
+    _2dFloatChecksums,
+    _2dDoubleChecksums,
+  },
+  {
+    _3dInt32Checksums,
+    _3dInt64Checksums,
+    _3dFloatChecksums,
+    _3dDoubleChecksums,
+  },
+  {
+    _4dInt32Checksums,
+    _4dInt64Checksums,
+    _4dFloatChecksums,
+    _4dDoubleChecksums,
+  },
+};
+
+static const checksum_tuples*
+getChecksumPtr(int dims, zfp_type type)
+{
+  return checksums[dims - 1][type - zfp_type_int32];
+}
+
+#endif
+
+void
+computeKeyOriginalInput(test_type tt, size_t n[4], uint64* key1, uint64* key2)
+{
+  computeKey(tt, ORIGINAL_INPUT, n, zfp_mode_null, 0, key1, key2);
+}
+
+void
+computeKey(test_type tt, subject sjt, size_t n[4], zfp_mode mode, int miscParam, uint64* key1, uint64* key2)
+{
+  uint64 result = 0;
+
+  // block-level test (low-level api: full/partial block) vs calling zfp_compress/decompress(), 2 bits
+  result += (uint64)tt;
+
+  // subject is 2 bits (3 possible values)
+  // when subject is ORIGINAL_ARRAY, no compression applied, zeroes passed in for mode, miscParam
+  result <<= 2;
+  result += (uint64)sjt;
+
+  // mode is 3 bits
+  // passing zfp_mode_null implies no compression applied
+  result <<= 3;
+  result += mode;
+
+  // miscParam is either specialValueIndex (for block tests), or compressParamNum (for endtoend tests)
+  // reserve 4 bits
+  //   specialValueIndex is in [0, 9] inclusive (testing 10 different special values)
+  //   compressParamNum is in [0, 2] inclusive (testing 3 compression parameters, per fixed-* mode)
+  result <<= 4;
+  result += miscParam;
+
+  *key1 = result;
+
+  // key2 stores dimensions only (64 bits total, like zfp_field_metadata()
+  result = 0;
+
+  uint dims = n[1] ? n[2] ? n[3] ? 4 : 3 : 2 : 1;
+  switch (dims) {
+    case 1:
+      result += n[0] - 1;
+      break;
+
+    case 2:
+      result += n[0] - 1;
+      result <<= 24;
+      result += n[1] - 1;
+      break;
+
+    case 3:
+      result += n[0] - 1;
+      result <<= 16;
+      result += n[1] - 1;
+      result <<= 16;
+      result += n[2] - 1;
+      break;
+
+    case 4:
+      result += n[0] - 1;
+      result <<= 12;
+      result += n[1] - 1;
+      result <<= 12;
+      result += n[2] - 1;
+      result <<= 12;
+      result += n[3] - 1;
+      break;
+  }
+
+  *key2 = result;
+}
+
+uint64
+getChecksumByKey(int dims, zfp_type type, uint64 key1, uint64 key2)
+{
+#ifndef PRINT_CHECKSUMS
+  const checksum_tuples* keyChecksumsArr = getChecksumPtr(dims, type);
+
+  size_t arrLen;
+  switch (type) {
+    case zfp_type_int32:
+    case zfp_type_int64:
+      arrLen = NUM_INT_CHECKSUMS;
+      break;
+
+    case zfp_type_float:
+    case zfp_type_double:
+      arrLen = NUM_FL_PT_CHECKSUMS;
+      break;
+
+    default:
+      return FAILED_CHECKSUM;
+  }
+
+  size_t i;
+  for (i = 0; i < arrLen; i++) {
+    if (keyChecksumsArr[i].key1 == key1 && keyChecksumsArr[i].key2 == key2) {
+      return keyChecksumsArr[i].checksum;
+    }
+  }
+
+  return FAILED_CHECKSUM;
+#else
+  return FAILED_CHECKSUM;
+#endif
+}
diff --git a/tests/utils/zfpChecksums.h b/tests/utils/zfpChecksums.h
new file mode 100644
index 00000000..e3df1077
--- /dev/null
+++ b/tests/utils/zfpChecksums.h
@@ -0,0 +1,35 @@
+#ifndef ZFP_CHECKSUMS_H
+#define ZFP_CHECKSUMS_H
+
+#include "zfp.h"
+
+typedef enum {
+  BLOCK_FULL_TEST = 0,
+  BLOCK_PARTIAL_TEST = 1,
+  ARRAY_TEST = 2,
+} test_type;
+
+typedef enum {
+  ORIGINAL_INPUT = 0,
+  COMPRESSED_BITSTREAM = 1,
+  DECOMPRESSED_ARRAY = 2,
+} subject;
+
+// key1 holds data about test type
+// key2 holds dimension lengths
+typedef struct {
+  uint64 key1;
+  uint64 key2;
+  uint64 checksum;
+} checksum_tuples;
+
+void
+computeKeyOriginalInput(test_type tt, size_t n[4], uint64* key1, uint64* key2);
+
+void
+computeKey(test_type tt, subject sjt, size_t n[4], zfp_mode mode, int miscParam, uint64* key1, uint64* key2);
+
+uint64
+getChecksumByKey(int dims, zfp_type type, uint64 key1, uint64 key2);
+
+#endif
diff --git a/tests/utils/zfpCompressionParams.c b/tests/utils/zfpCompressionParams.c
new file mode 100644
index 00000000..073b5a28
--- /dev/null
+++ b/tests/utils/zfpCompressionParams.c
@@ -0,0 +1,20 @@
+#include <math.h>
+#include "utils/zfpCompressionParams.h"
+
+int
+computeFixedPrecisionParam(int param)
+{
+  return 1u << (param + 3);
+}
+
+size_t
+computeFixedRateParam(int param)
+{
+  return (size_t)(1u << (param + 3));
+}
+
+double
+computeFixedAccuracyParam(int param)
+{
+  return ldexp(1.0, -(1u << param));
+}
diff --git a/tests/utils/zfpCompressionParams.h b/tests/utils/zfpCompressionParams.h
new file mode 100644
index 00000000..42cead7e
--- /dev/null
+++ b/tests/utils/zfpCompressionParams.h
@@ -0,0 +1,15 @@
+#ifndef ZFP_COMPRESSION_PARAMS_H
+#define ZFP_COMPRESSION_PARAMS_H
+
+#include <stddef.h>
+
+int
+computeFixedPrecisionParam(int param);
+
+size_t
+computeFixedRateParam(int param);
+
+double
+computeFixedAccuracyParam(int param);
+
+#endif
diff --git a/tests/utils/zfpHash.c b/tests/utils/zfpHash.c
new file mode 100644
index 00000000..2e670637
--- /dev/null
+++ b/tests/utils/zfpHash.c
@@ -0,0 +1,126 @@
+#include "zfpHash.h"
+
+#define MASK_32 (0xffffffff)
+
+// Jenkins one-at-a-time hash; see http://www.burtleburtle.net/bob/hash/doobs.html
+
+static void
+hashValue(uint32 val, uint32* h)
+{
+  *h += val;
+  *h += *h << 10;
+  *h ^= *h >> 6;
+}
+
+static uint32
+hashFinish(uint32 h)
+{
+  h += h << 3;
+  h ^= h >> 11;
+  h += h << 15;
+
+  return h;
+}
+
+static void
+hashValue64(uint64 val, uint32* h1, uint32* h2)
+{
+  uint32 val1 = (uint32)(val & MASK_32);
+  hashValue(val1, h1);
+
+  uint32 val2 = (uint32)((val >> 32) & MASK_32);
+  hashValue(val2, h2);
+}
+
+uint64
+hashBitstream(uint64* ptrStart, size_t bufsizeBytes)
+{
+  size_t nx = bufsizeBytes / sizeof(uint64);
+
+  uint32 h1 = 0;
+  uint32 h2 = 0;
+
+  for (; nx--; ptrStart++) {
+    hashValue64(*ptrStart, &h1, &h2);
+  }
+
+  uint64 result1 = (uint64)hashFinish(h1);
+  uint64 result2 = (uint64)hashFinish(h2);
+
+  return result1 + (result2 << 32);
+}
+
+// hash 32-bit valued arrays (int32, float)
+
+uint32
+hashArray32(const uint32* arr, size_t nx, ptrdiff_t sx)
+{
+  uint32 h = 0;
+
+  for (; nx--; arr += sx) {
+    hashValue(*arr, &h);
+  }
+
+  return hashFinish(h);
+}
+
+// unused n[] entries are 0
+uint32
+hashStridedArray32(const uint32* arr, size_t n[4], ptrdiff_t s[4])
+{
+  uint32 h = 0;
+
+  size_t i, j, k, l;
+  for (l = 0; l < (n[3] ? n[3] : 1); arr += (s[3] - (ptrdiff_t)n[2]*s[2]), l++) {
+    for (k = 0; k < (n[2] ? n[2] : 1); arr += (s[2] - (ptrdiff_t)n[1]*s[1]), k++) {
+      for (j = 0; j < (n[1] ? n[1] : 1); arr += (s[1] - (ptrdiff_t)n[0]*s[0]), j++) {
+        for (i = 0; i < (n[0] ? n[0] : 1); arr += s[0], i++) {
+          hashValue(*arr, &h);
+        }
+      }
+    }
+  }
+
+  return hashFinish(h);
+}
+
+// hash 64-bit valued arrays (int64, double)
+
+uint64
+hashArray64(const uint64* arr, size_t nx, ptrdiff_t sx)
+{
+  uint32 h1 = 0;
+  uint32 h2 = 0;
+
+  for (; nx--; arr += sx) {
+    hashValue64(*arr, &h1, &h2);
+  }
+
+  uint64 result1 = (uint64)hashFinish(h1);
+  uint64 result2 = (uint64)hashFinish(h2);
+
+  return result1 + (result2 << 32);
+}
+
+// unused n[] entries are 0
+uint64
+hashStridedArray64(const uint64* arr, size_t n[4], ptrdiff_t s[4])
+{
+  uint32 h1 = 0;
+  uint32 h2 = 0;
+
+  size_t i, j, k, l;
+  for (l = 0; l < (n[3] ? n[3] : 1); arr += (s[3] - (ptrdiff_t)n[2]*s[2]), l++) {
+    for (k = 0; k < (n[2] ? n[2] : 1); arr += (s[2] - (ptrdiff_t)n[1]*s[1]), k++) {
+      for (j = 0; j < (n[1] ? n[1] : 1); arr += (s[1] - (ptrdiff_t)n[0]*s[0]), j++) {
+        for (i = 0; i < (n[0] ? n[0] : 1); arr += s[0], i++) {
+          hashValue64(*arr, &h1, &h2);
+        }
+      }
+    }
+  }
+  uint64 result1 = (uint64)hashFinish(h1);
+  uint64 result2 = (uint64)hashFinish(h2);
+
+  return result1 + (result2 << 32);
+}
diff --git a/tests/utils/zfpHash.h b/tests/utils/zfpHash.h
new file mode 100644
index 00000000..5718050f
--- /dev/null
+++ b/tests/utils/zfpHash.h
@@ -0,0 +1,26 @@
+#ifndef ZFP_HASH_H
+#define ZFP_HASH_H
+
+#include <stddef.h>
+#include "include/zfp/internal/zfp/types.h"
+
+uint64
+hashBitstream(uint64* ptrStart, size_t bufsizeBytes);
+
+// hash 32-bit valued arrays (int32, float)
+
+uint32
+hashArray32(const uint32* arr, size_t nx, ptrdiff_t sx);
+
+uint32
+hashStridedArray32(const uint32* arr, size_t n[4], ptrdiff_t s[4]);
+
+// hash 64-bit valued arrays (int64, double)
+
+uint64
+hashArray64(const uint64* arr, size_t nx, ptrdiff_t sx);
+
+uint64
+hashStridedArray64(const uint64* arr, size_t n[4], ptrdiff_t s[4]);
+
+#endif
diff --git a/tests/utils/zfpTimer.c b/tests/utils/zfpTimer.c
new file mode 100644
index 00000000..1acfc596
--- /dev/null
+++ b/tests/utils/zfpTimer.c
@@ -0,0 +1,56 @@
+#include "zfpTimer.h"
+#include <stdlib.h>
+
+struct zfp_timer {
+#if defined(__unix__) || defined(_WIN32)
+  clock_t timeStart, timeEnd;
+#elif defined(__MACH__)
+  uint64_t timeStart, timeEnd;
+#endif
+};
+
+zfp_timer*
+zfp_timer_alloc()
+{
+  return malloc(sizeof(zfp_timer));
+}
+
+void
+zfp_timer_free(zfp_timer* timer) {
+  free(timer);
+}
+
+int
+zfp_timer_start(zfp_timer* timer)
+{
+#if defined(__unix__) || defined(_WIN32)
+  timer->timeStart = clock();
+#elif defined(__MACH__)
+  timer->timeStart = mach_absolute_time();
+#else
+  return 1;
+#endif
+  return 0;
+}
+
+double
+zfp_timer_stop(zfp_timer* timer)
+{
+  double time;
+
+  // stop timer, compute elapsed time
+#if defined(__unix__) || defined(_WIN32)
+  timer->timeEnd = clock();
+  time = (double)((timer->timeEnd) - (timer->timeStart)) / CLOCKS_PER_SEC;
+#elif defined(__MACH__)
+  timer->timeEnd = mach_absolute_time();
+
+  mach_timebase_info_data_t tb = {0};
+  mach_timebase_info(&tb);
+  double timebase = tb.numer / tb.denom;
+  time = ((timer->timeEnd) - (timer->timeStart)) * timebase * (1E-9);
+#endif
+
+  return time;
+}
+
diff --git a/tests/utils/zfpTimer.h b/tests/utils/zfpTimer.h
new file mode 100644
index 00000000..71c425ff
--- /dev/null
+++ b/tests/utils/zfpTimer.h
@@ -0,0 +1,24 @@
+#ifndef ZFP_TIMER_H
+#define ZFP_TIMER_H
+
+#if defined(__unix__) || defined(_WIN32)
+  #include <time.h>
+#elif defined(__MACH__)
+  #include <mach/mach_time.h>
+#endif
+
+typedef struct zfp_timer zfp_timer;
+
+zfp_timer*
+zfp_timer_alloc();
+
+void
+zfp_timer_free(zfp_timer* timer);
+
+int
+zfp_timer_start(zfp_timer* timer);
+
+double
+zfp_timer_stop(zfp_timer* timer);
+
+#endif
diff --git a/travis.sh b/travis.sh
deleted file mode 100755
index e73383cd..00000000
--- a/travis.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env sh
-set -e
-
-# pass additional args in $1 (starting with whitespace character)
-run_all () {
-  run_all_cmd="ctest -V -C Debug -DC_STANDARD=${C_STANDARD:-99} -DCXX_STANDARD=${CXX_STANDARD:-98} -S \"$TRAVIS_BUILD_DIR/cmake/travis.cmake\""
-  eval "${run_all_cmd}$1"
-}
-
-mkdir build
-cd build
-
-# technically, flags are passed on to cmake/* and actually set there
-BUILD_FLAGS=""
-
-if [ -n "${COVERAGE}" ]; then
-  # build (linux)
-
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_UTILITIES=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_EXAMPLES=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_CFP=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_ZFPY=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_ZFORP=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DZFP_WITH_ALIGNED_ALLOC=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_OPENMP=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_CUDA=OFF"
-  BUILD_FLAGS="$BUILD_FLAGS -DWITH_COVERAGE=ON"
-
-  run_all "$BUILD_FLAGS"
-else
-  # build/test without OpenMP, with CFP (and custom namespace), with zfPy, with Fortran (linux only)
-  if [[ "$OSTYPE" == "darwin"* ]]; then
-    BUILD_ZFORP=OFF
-  else
-    BUILD_ZFORP=ON
-  fi
-
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_UTILITIES=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_EXAMPLES=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_CFP=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DCFP_NAMESPACE=cfp2"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_ZFPY=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_ZFORP=$BUILD_ZFORP"
-  BUILD_FLAGS="$BUILD_FLAGS -DZFP_WITH_ALIGNED_ALLOC=ON"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_OPENMP=OFF"
-  BUILD_FLAGS="$BUILD_FLAGS -DBUILD_CUDA=OFF"
-  run_all "$BUILD_FLAGS"
-
-  rm -rf ./* ;
-
-  # if OpenMP available, start a 2nd build with it
-  if cmake ../tests/ci-utils/ ; then
-    rm -rf ./* ;
-
-    # build/test with OpenMP
-    BUILD_FLAGS=""
-    BUILD_FLAGS="$BUILD_FLAGS -DBUILD_OPENMP=ON"
-    run_all "$BUILD_FLAGS"
-  fi
-fi
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index a960d2c9..1aa7a930 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -1,6 +1,16 @@
 add_executable(zfpcmd zfp.c)
-set_property(TARGET zfpcmd PROPERTY OUTPUT_NAME zfp)
+
+# protect against LNK1114: cannot overwrite the original file 'lib/Release/zfp.lib'; error code 32;
+# rationale: linker can't handle the case of an executable file having the same name as a library file
+if(NOT MSVC)
+  set_property(TARGET zfpcmd PROPERTY OUTPUT_NAME zfp)
+endif()
 target_link_libraries(zfpcmd zfp)
 if(HAVE_LIBM_MATH)
   target_link_libraries(zfpcmd m)
 endif()
+
+if(BUILD_UTILITIES)
+  install(TARGETS zfpcmd
+    DESTINATION "${CMAKE_INSTALL_BINDIR}")
+endif()
diff --git a/utils/Makefile b/utils/Makefile
index 50a40ce9..dc7ef3e9 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -1,12 +1,14 @@
 include ../Config
 
 TARGET = ../bin/zfp
+INCS = -I../include
+LIBS = -L../lib -lzfp $(LDFLAGS) -lm
 
 all: $(TARGET)
 
 $(TARGET): zfp.c ../lib/$(LIBZFP)
 	mkdir -p ../bin
-	$(CC) $(CFLAGS) zfp.c -L../lib -lzfp -lm -o $(TARGET)
+	$(CC) $(CFLAGS) $(INCS) zfp.c $(LIBS) -o $(TARGET)
 
 clean:
 	rm -f $(TARGET) fields.o
diff --git a/utils/zfp.c b/utils/zfp.c
index 97a621f5..1984532f 100644
--- a/utils/zfp.c
+++ b/utils/zfp.c
@@ -5,7 +5,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "zfp.h"
-#include "zfp/macros.h"
+#include "zfp/internal/zfp/macros.h"
 
 /*
 File I/O is done using the following combinations of i, o, s, and z:
@@ -80,7 +80,7 @@ print_error(const void* fin, const void* fout, zfp_type type, size_t n)
 }
 
 static void
-usage()
+usage(void)
 {
   fprintf(stderr, "%s\n", zfp_version_string);
   fprintf(stderr, "Usage: zfp <options>\n");
@@ -137,10 +137,10 @@ int main(int argc, char* argv[])
   zfp_type type = zfp_type_none;
   size_t typesize = 0;
   uint dims = 0;
-  uint nx = 0;
-  uint ny = 0;
-  uint nz = 0;
-  uint nw = 0;
+  size_t nx = 0;
+  size_t ny = 0;
+  size_t nz = 0;
+  size_t nw = 0;
   size_t count = 0;
   double rate = 0;
   uint precision = 0;
@@ -149,9 +149,9 @@ int main(int argc, char* argv[])
   uint maxbits = ZFP_MAX_BITS;
   uint maxprec = ZFP_MAX_PREC;
   int minexp = ZFP_MIN_EXP;
-  int header = 0;
-  int quiet = 0;
-  int stats = 0;
+  zfp_bool header = zfp_false;
+  zfp_bool quiet = zfp_false;
+  zfp_bool stats = zfp_false;
   char* inpath = 0;
   char* zfppath = 0;
   char* outpath = 0;
@@ -181,31 +181,31 @@ int main(int argc, char* argv[])
       usage();
     switch (argv[i][1]) {
       case '1':
-        if (++i == argc || sscanf(argv[i], "%u", &nx) != 1)
+        if (++i == argc || sscanf(argv[i], "%zu", &nx) != 1)
           usage();
         ny = nz = nw = 1;
         dims = 1;
         break;
       case '2':
-        if (++i == argc || sscanf(argv[i], "%u", &nx) != 1 ||
-            ++i == argc || sscanf(argv[i], "%u", &ny) != 1)
+        if (++i == argc || sscanf(argv[i], "%zu", &nx) != 1 ||
+            ++i == argc || sscanf(argv[i], "%zu", &ny) != 1)
           usage();
         nz = nw = 1;
         dims = 2;
         break;
       case '3':
-        if (++i == argc || sscanf(argv[i], "%u", &nx) != 1 ||
-            ++i == argc || sscanf(argv[i], "%u", &ny) != 1 ||
-            ++i == argc || sscanf(argv[i], "%u", &nz) != 1)
+        if (++i == argc || sscanf(argv[i], "%zu", &nx) != 1 ||
+            ++i == argc || sscanf(argv[i], "%zu", &ny) != 1 ||
+            ++i == argc || sscanf(argv[i], "%zu", &nz) != 1)
           usage();
         nw = 1;
         dims = 3;
         break;
       case '4':
-        if (++i == argc || sscanf(argv[i], "%u", &nx) != 1 ||
-            ++i == argc || sscanf(argv[i], "%u", &ny) != 1 ||
-            ++i == argc || sscanf(argv[i], "%u", &nz) != 1 ||
-            ++i == argc || sscanf(argv[i], "%u", &nw) != 1)
+        if (++i == argc || sscanf(argv[i], "%zu", &nx) != 1 ||
+            ++i == argc || sscanf(argv[i], "%zu", &ny) != 1 ||
+            ++i == argc || sscanf(argv[i], "%zu", &nz) != 1 ||
+            ++i == argc || sscanf(argv[i], "%zu", &nw) != 1)
           usage();
         dims = 4;
         break;
@@ -229,7 +229,7 @@ int main(int argc, char* argv[])
         type = zfp_type_float;
         break;
       case 'h':
-        header = 1;
+        header = zfp_true;
         break;
       case 'i':
         if (++i == argc)
@@ -247,7 +247,7 @@ int main(int argc, char* argv[])
         mode = 'p';
         break;
       case 'q':
-        quiet = 1;
+        quiet = zfp_true;
         break;
       case 'r':
         if (++i == argc || sscanf(argv[i], "%lf", &rate) != 1)
@@ -258,7 +258,7 @@ int main(int argc, char* argv[])
         mode = 'R';
         break;
       case 's':
-        stats = 1;
+        stats = zfp_true;
         break;
       case 't':
         if (++i == argc)
@@ -307,7 +307,7 @@ int main(int argc, char* argv[])
   }
 
   typesize = zfp_type_size(type);
-  count = (size_t)nx * (size_t)ny * (size_t)nz * (size_t)nw;
+  count = nx * ny * nz * nw;
 
   /* make sure one of the array dimensions is not zero */
   if (!count && dims) {
@@ -456,7 +456,7 @@ int main(int argc, char* argv[])
         zfp_stream_set_precision(zfp, precision);
         break;
       case 'r':
-        zfp_stream_set_rate(zfp, rate, type, dims, 0);
+        zfp_stream_set_rate(zfp, rate, type, dims, zfp_false);
         break;
       case 'c':
         if (!maxbits)
@@ -556,22 +556,16 @@ int main(int argc, char* argv[])
         return EXIT_FAILURE;
       }
       type = field->type;
-      switch (type) {
-        case zfp_type_float:
-          typesize = sizeof(float);
-          break;
-        case zfp_type_double:
-          typesize = sizeof(double);
-          break;
-        default:
-          fprintf(stderr, "unsupported type\n");
-          return EXIT_FAILURE;
+      typesize = zfp_type_size(type);
+      if (!typesize) {
+        fprintf(stderr, "unsupported type\n");
+        return EXIT_FAILURE;
       }
       nx = MAX(field->nx, 1u);
       ny = MAX(field->ny, 1u);
       nz = MAX(field->nz, 1u);
       nw = MAX(field->nw, 1u);
-      count = (size_t)nx * (size_t)ny * (size_t)nz * (size_t)nw;
+      count = nx * ny * nz * nw;
     }
 
     /* allocate memory for decompressed data */
@@ -616,7 +610,7 @@ int main(int argc, char* argv[])
   /* print compression and error statistics */
   if (!quiet) {
     const char* type_name[] = { "int32", "int64", "float", "double" };
-    fprintf(stderr, "type=%s nx=%u ny=%u nz=%u nw=%u", type_name[type - zfp_type_int32], nx, ny, nz, nw);
+    fprintf(stderr, "type=%s nx=%zu ny=%zu nz=%zu nw=%zu", type_name[type - zfp_type_int32], nx, ny, nz, nw);
     fprintf(stderr, " raw=%lu zfp=%lu ratio=%.3g rate=%.4g", (unsigned long)rawsize, (unsigned long)zfpsize, (double)rawsize / zfpsize, CHAR_BIT * (double)zfpsize / count);
     if (stats)
       print_error(fi, fo, type, count);
diff --git a/zfp-config-version.cmake.in b/zfp-config-version.cmake.in
index 4a77db0a..44932702 100644
--- a/zfp-config-version.cmake.in
+++ b/zfp-config-version.cmake.in
@@ -1,6 +1,8 @@
 set(PACKAGE_VERSION_MAJOR @ZFP_VERSION_MAJOR@)
 set(PACKAGE_VERSION_MINOR @ZFP_VERSION_MINOR@)
 set(PACKAGE_VERSION_PATCH @ZFP_VERSION_PATCH@)
+set(PACKAGE_VERSION_TWEAK @ZFP_VERSION_TWEAK@)
+
 set(PACKAGE_VERSION @ZFP_VERSION@)
 
 # Check whether the requested PACKAGE_FIND_VERSION is compatible
diff --git a/zfp-config.cmake.in b/zfp-config.cmake.in
index 642f6178..c9809654 100644
--- a/zfp-config.cmake.in
+++ b/zfp-config.cmake.in
@@ -3,10 +3,19 @@
 # It defines the following variables
 #  ZFP_INCLUDE_DIRS - include directories for zfp
 #  ZFP_LIBRARIES    - libraries to link against
+#  ZFP_WITH_OPENMP  - indicates if the zfp library has been built with OpenMP support
+#  ZFP_WITH_CUDA    - indicates if the zfp library has been built with CUDA support
+#  ZFP_CFP_ENABLED  - indicated if the cfp library has been built
 #
 # And the following imported targets:
 #   zfp::zfp
 #
+# If cfp is enabled the following variabled are also defined
+#  CFP_INCLUDE_DIRS - include directories for cfp
+#  CFP_LIBRARIES    - libraries to link against (cfp only)
+#
+# As well as the following imported targets:
+#   zfp::cfp
 
 include("${CMAKE_CURRENT_LIST_DIR}/zfp-config-version.cmake")
 
@@ -16,9 +25,20 @@ find_package_handle_standard_args(${CMAKE_FIND_PACKAGE_NAME} CONFIG_MODE)
 
 if(NOT TARGET zfp::zfp)
   include("${CMAKE_CURRENT_LIST_DIR}/zfp-targets.cmake")
+  set(ZFP_LIBRARIES "zfp::zfp")
+  get_target_property(ZFP_INCLUDE_DIRS zfp::zfp INTERFACE_INCLUDE_DIRECTORIES)
+endif()
+
+set(ZFP_CFP_ENABLED @BUILD_CFP@)
+if(ZFP_CFP_ENABLED AND NOT TARGET zfp::cfp)
+  include("${CMAKE_CURRENT_LIST_DIR}/cfp-targets.cmake")
+  set(CFP_LIBRARIES "zfp::cfp")
+  get_target_property(CFP_INCLUDE_DIRS zfp::cfp INTERFACE_INCLUDE_DIRECTORIES)
+endif()
+
+set(ZFP_WITH_OPENMP @ZFP_WITH_OPENMP@)
+if(ZFP_WITH_OPENMP)
+  find_package(OpenMP REQUIRED COMPONENTS C)
 endif()
 
-set(ZFP_LIBRARIES zfp::zfp)
-set(ZFP_INCLUDE_DIRS
-  $<TARGET_PROPERTY:zfp::zfp,INTERFACE_INCLUDE_DIRECTORIES>
-)
+set(ZFP_WITH_CUDA @ZFP_WITH_CUDA@)

From 56ee236a64b580a823d0fcac54605907a73b18fd Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Fri, 19 Jan 2024 09:29:52 +0100
Subject: [PATCH 3/3] update information of embedded versions

---
 doc/information.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/information.rst b/doc/information.rst
index 7ab33edf..9d263bc0 100644
--- a/doc/information.rst
+++ b/doc/information.rst
@@ -71,12 +71,12 @@ HDF5 compression filters and compression libraries sources were obtained from:
   (commit `d48f960 <https://github.com/nexusformat/HDF5-External-Filter-Plugins/tree/d48f96064cb6e229ede4bf5e5c0e1935cf691036>`_)
   using LZ4.
 * `bitshuffle plugin <https://github.com/kiyo-masui/bitshuffle>`_ (v0.5.1) using LZ4 and ZStd.
-* bzip2 plugin (from `PyTables <https://github.com/PyTables/PyTables/>`_ v3.8.0)
+* bzip2 plugin (from `PyTables <https://github.com/PyTables/PyTables/>`_ v3.9.2)
   using `BZip2 <https://sourceware.org/git/bzip2.git>`_ (v1.0.8).
 * `hdf5-blosc plugin <https://github.com/Blosc/hdf5-blosc>`_ (v1.0.0)
   using `c-blosc <https://github.com/Blosc/c-blosc>`_ (v1.21.5), LZ4, Snappy, ZLib and ZStd.
-* hdf5-blosc2 plugin (from `PyTables <https://github.com/PyTables/PyTables/>`_ v3.9.2.dev0, commit `3ba4e78 <https://github.com/PyTables/PyTables/tree/3ba4e78336f21f5e32ddf49ced4560f610de70dd/hdf5-blosc2/src>`_)
-  using `c-blosc2 <https://github.com/Blosc/c-blosc2>`_ (v2.11.1), LZ4, ZLib and ZStd.
+* hdf5-blosc2 plugin (from `PyTables <https://github.com/PyTables/PyTables/>`_ v3.9.2)
+  using `c-blosc2 <https://github.com/Blosc/c-blosc2>`_ (v2.12.0), LZ4, ZLib and ZStd.
 * `FCIDECOMP plugin <ftp://ftp.eumetsat.int/pub/OPS/out/test-data/Test-data-for-External-Users/MTG_FCI_Test-Data/FCI_Decompression_Software_V1.0.2>`_ (v1.0.2)
   using `CharLS <https://github.com/team-charls/charls>`_
   (1.x branch, commit `25160a4 <https://github.com/team-charls/charls/tree/25160a42fb62e71e4b0ce081f5cb3f8bb73938b5>`_).
@@ -87,15 +87,15 @@ HDF5 compression filters and compression libraries sources were obtained from:
   (commit `4bbe9df7e4bcb <https://github.com/szcompressor/SZ3/commit/4bbe9df7e4bcb6ae6339fcb3033100da07fe7434>`_)
   using `SZ3 <https://github.com/szcompressor/SZ3>`_ and ZStd.
 * `HDF5-ZFP plugin <https://github.com/LLNL/H5Z-ZFP>`_ (v1.1.1)
-  using `zfp <https://github.com/LLNL/zfp>`_ (v1.0.0).
+  using `zfp <https://github.com/LLNL/zfp>`_ (v1.0.1).
 * `HDF5Plugin-Zstandard <https://github.com/aparamon/HDF5Plugin-Zstandard>`_
   (commit `d5afdb5 <https://github.com/aparamon/HDF5Plugin-Zstandard/tree/d5afdb5f04116d5c2d1a869dc9c7c0c72832b143>`_) using ZStd.
 
 Sources of compression libraries shared accross multiple filters were obtained from:
 
-* `LZ4 v1.9.4 <https://github.com/Blosc/c-blosc2/tree/v2.10.2/internal-complibs/lz4-1.9.4>`_
+* `LZ4 v1.9.4 <https://github.com/Blosc/c-blosc2/tree/v2.12.0/internal-complibs/lz4-1.9.4>`_
 * `Snappy v1.1.10 <https://github.com/google/snappy>`_
-* `ZStd v1.5.5 <https://github.com/Blosc/c-blosc2/tree/v2.10.2/internal-complibs/zstd-1.5.5>`_
+* `ZStd v1.5.5 <https://github.com/Blosc/c-blosc2/tree/v2.12.0/internal-complibs/zstd-1.5.5>`_
 * `ZLib v1.2.13 <https://github.com/Blosc/c-blosc/tree/v1.21.5/internal-complibs/zlib-1.2.13>`_
 
 When compiled with Intel IPP, the LZ4 compression library is replaced with `LZ4 v1.9.3 <https://github.com/lz4/lz4/releases/tag/v1.9.3>`_ patched with a patch from Intel IPP 2021.7.0.