From c3cc3a26721486345f87de5d64d6f4cafadbfe42 Mon Sep 17 00:00:00 2001
From: Greg Eisenhauer <eisen@cc.gatech.edu>
Date: Wed, 30 Oct 2024 07:20:12 -0400
Subject: [PATCH] Start developer_docs directory with some content (#4385)

---
 developer_docs/bp5format.md                | 310 +++++++++++++++++++++
 developer_docs/bp5reader.md                | 307 ++++++++++++++++++++
 source/adios2/toolkit/format/bp5/BP5Base.h | 269 +-----------------
 3 files changed, 622 insertions(+), 264 deletions(-)
 create mode 100644 developer_docs/bp5format.md
 create mode 100644 developer_docs/bp5reader.md

diff --git a/developer_docs/bp5format.md b/developer_docs/bp5format.md
new file mode 100644
index 0000000000..9cfce39b8e
--- /dev/null
+++ b/developer_docs/bp5format.md
@@ -0,0 +1,310 @@
+# BP5 Metadata Marshaling, writer-side focus
+
+BP5 Metadata Marshalling is based upon FFS, which provides the ability
+to serialize a C-style pointer-based data structure (starting with a
+base struct) and to deserialize it in-place on the receiving side.
+This is what we'll do to encode BP5 Metadata, create a custom C-style
+struct on the writer side and then use FFS to make that same struct
+available to the reader.
+
+Normally, in order to use FFS, an application must fully describe
+the base structure using an FMFieldList, where each element
+describes a field in the structure, including the field's name,
+basic type (integer, float, etc.), size and offset from the start
+of the structure.  In "normal" scenarios, like in SST this is
+straightforward because we're describing a structure that exists
+at compile-time and all of those things are compile-time static.
+However, ADIOS metadata represents information about variables
+that we don't know about until run-time, so if we're going to use
+FFS here, things have to be a bit more dynamic.  In particular,
+we'll represent ADIOS metadata with a "virtual" structure, one
+whose description we'll construct on the fly and which will only
+ever exist virtually, making up offsets as we go.  We just have to
+be careful about keeping things aligned appropriately because we
+want this to land on the receiver and be appropriately aligned
+there.  (Normally the compiler takes care of this, but this
+virtual structure is never seen by a compiler, so we're doing it.)
+The field name that we specify to FFS is also important because we
+use it to communicate a lot of information between writer and
+reader.  While it always contains the variable name, it also
+encodes the variable type (local or global, atomic or array,
+compressed, derived, etc.).  Because the variable name only
+appears in the metametadata (ffs format), this is a great place to
+put more static information about the variable, specifically
+anything that is fixed after definition and doesn't change on a
+per-timestep basis.  More on names later.
+
+To accomplish managing the structure on the writer side, we
+principally track two things, the FMFieldList that represents the
+description of the virtual struct, and a malloc'd region where we
+build the virtual struct itself.  While the description is
+interpreted by FFS, the most important thing for BP5 to remember
+is this field's offset because that's where the (meta)data will
+go.  When we Marshal a simple atomic value (local or global), we
+calculate an appropriately aligned new offset in the buffer, add
+to the FMFieldList (maintained in Info.MetaFields on the writer)
+and copy the data into the virtual field at that offset in the
+buffer.  On future timesteps, the field already exists, so we just
+use the offset and copy the data into the buffer.  Arrays are a
+bit more complex, but lets start with the simple case.  FFS
+supports substructures, I.E. fields which themselves are a
+structure and we use that feature for all array representations.
+There are several things that may change on a per-timestep basis
+for arrays, including Shape, Count and Offset values (which are
+themselves arrays), and we also need to track the location of the
+related data block (offset in this rank's data segment).  Except
+for Shape (which we assume is set for at least this timestep), all
+of these things are per-block.
+
+Back to FFS capabilities for a moment.  FFS's pointer-based
+structures include dynamically-sized arrays, and the size of those
+arrays must be specified by an integer-typed field in that
+structure.  There are three different array lengths required here.
+Shape is of length Dims (how many dimensions the array has),
+DataBlockLocation is of length BlockCount (how many blocks were
+written on this rank), and for Count and Offsets we must have
+those per-block, so the length is Dims*BlockCount.  To satisfy
+FFS's constraints, that means we must have integer fields
+representing all three lengths in the array metadata struct, and
+we need pointers to the dynamic arrays representing Shape, Count,
+Offsets, and DataBlockLocation.  These are the BASE_FIELDS below
+and the FFS FMField entries are BASE_FIELD_ENTRIES in BP5Base.cpp.
+```
+#define BASE_FIELDS                                                                                \
+    size_t Dims;               /* How many dimensions does this array have */                      \
+    size_t BlockCount;         /* How many blocks are written   */                                 \
+    size_t DBCount;            /* Dimens * BlockCount   */                                         \
+    size_t *Shape;             /* Global dimensionality  [Dims] NULL for local */                  \
+    size_t *Count;             /* Per-block Counts    [DBCount] */                                 \
+    size_t *Offsets;           /* Per-block Offsets   [DBCount] NULL for local */                  \
+    size_t *DataBlockLocation; /* Per-block Offset in PG [BlockCount] */
+```
+```
+#define BASE_FIELD_ENTRIES                                                                         \
+    {"Dims", "integer", sizeof(size_t), FMOffset(BP5Base::MetaArrayRec *, Dims)},                  \
+        {"BlockCount", "integer", sizeof(size_t), FMOffset(BP5Base::MetaArrayRec *, BlockCount)},  \
+        {"DBCount", "integer", sizeof(size_t), FMOffset(BP5Base::MetaArrayRec *, DBCount)},        \
+        {"Shape", "integer[Dims]", sizeof(size_t), FMOffset(BP5Base::MetaArrayRec *, Shape)},      \
+        {"Count", "integer[DBCount]", sizeof(size_t), FMOffset(BP5Base::MetaArrayRec *, Count)},   \
+        {"Offset", "integer[DBCount]", sizeof(size_t),                                             \
+         FMOffset(BP5Base::MetaArrayRec *, Offsets)},                                              \
+        {"DataBlockLocation", "integer[BlockCount]", sizeof(size_t),                               \
+         FMOffset(BP5Base::MetaArrayRec *, DataBlockLocation)},
+```
+While more complex arrays metadata entries are necessary, these
+must be the first fields in those structures.  While there can't
+be a static struct declaration for all of the metadata, there is a
+static declaration for the array metadata substructure,
+`MetaArrayRec` below.
+```
+    typedef struct _MetaArrayRec
+    {
+        BASE_FIELDS
+    } MetaArrayRec;
+```
+Mostly you'll see this used like this:
+```
+MetaArrayRec *MetaEntry = (MetaArrayRec *)((char *)(MetadataBuf) +  Rec->MetaOffset);
+```
+This gives us a nice way of accessing the key fields in an array's
+metadata entry.
+
+So, what about more complex arrays?  All of our compression
+operators require the length of the encrypted field as input to
+the uncompress operator.  Generally we don't include data block
+length as part of metadata because it's easily calculated from the
+Count values and the length of the data type, but in order to
+support compression we have to communicate it from the writer to
+the reader so we can uncompress.  Therefore every field with an
+operator has as its next field (after BASE_FIELDS) DataBlockSize.
+Like DataBlockLocation, this is per block (and so it's FFS
+description also uses BlockCount).  This arrangement is
+represented by the `struct MetaArrayRecOperator` below.  Note that
+BP5 does not itself use the DataBlockSize in the metadata.  The
+size of the compressed data is returned from the compression
+operator, and is used by BP5 to copy that data into the data
+block, but after that it is only passed to the Uncompress operator
+on the receiving side, so operators like MGard may choose to use
+this differently.
+```
+    typedef struct _MetaArrayRecOperator
+    {
+        BASE_FIELDS
+        size_t *DataBlockSize; // Per-block Lengths [BlockCount]
+    } MetaArrayRecOperator;
+```
+The last case is arrays that also have Min/Max stats associated
+with them.  Since this can be combined with operators, that gives
+us two more possible structs for array metadata, a plain array
+with Min/Max or an array with an operator and Min/Max, these are
+represented by the structs `MetaArrayRecMM` and
+`MetaArrayRecOperatorMM` below.  Note that MinMax in that struct is
+a `char*`, but obviously the data type of Min/Max depends upon the
+element type of the array.  How does that work?  The actual size
+in bytes of the MinMax array is `BlockCount * sizeof(array element) * 2`, but in order to avoid introducing yet another integer-typed
+size value into the structure we've gone to some effort in order
+to leverage the existing BlockCount value.  In particular, there
+are a number of FMField lists for The MM and OperatorMM arrays,
+each giving FFS a different element size for the MinMax Array.
+ADIOS types of size 1 use `MetarrayRecMM1List`, those of size 2 use
+`MetaArrayRecMM2List`, etc., up to `MetaArrayRecMM16List`, which would
+be used by long double.  Note that BP5 doesn't define or support
+MinMax for string, complex, or structure types.
+```
+    typedef struct _MetaArrayRecMM
+    {
+        BASE_FIELDS
+        char *MinMax; // char[TYPESIZE][BlockCount]  varies by type
+    } MetaArrayRecMM;
+
+    typedef struct _MetaArrayRecOperatorMM
+    {
+        BASE_FIELDS
+        size_t *DataBlockSize; // Per-block Lengths [BlockCount]
+        char *MinMax;          // char[TYPESIZE][BlockCount]  varies by type
+    } MetaArrayRecOperatorMM;
+```
+For each of the array variations above, when we add the field
+associated with that array to the metadata field list, we specify
+the appropriate FieldList in the FFS "field_type" value, and
+allocate space for the relevant structure in the virtual metadata
+struct we're building. (Example MetaArrayRecOperatorMM8List below.)
+```
+static FMField MetaArrayRecOperatorMM8List[] = {
+    BASE_FIELD_ENTRIES
+    {"DataBlockSize", "integer[BlockCount]", sizeof(size_t),
+                       FMOffset(BP5Base::MetaArrayRecOperator *, DataBlockSize)},
+    {"MinMax", "char[16][BlockCount]", 1, FMOffset(BP5Base::MetaArrayRecOperatorMM *, MinMax)},
+    {NULL, NULL, 0, 0}};
+```
+We mentioned field names above, we actually encode a lot of
+information into the FFS field names, including the variable name,
+shape, element_size, ADIOS type, any operator that might be
+applied, the name of the substructure (if the array is a struct
+type), and even the expression that is to be used for derived
+variables.  These are all encoded in different ways, for example
+the basic shape of the variable is encoded in the three letter
+prefix of the FFS fieldname: GlobalValue: = "BPg", GlobalArray =
+"BPG"JoinedArray = "BPJ", LocalValue = "BPl", LocalArray = "BPL".
+The details of the encoding are buried in the logic, but important
+bit is knowing that there's a lot of information there and some of
+it (like the expression) is base64 encoded to avoid having special
+characters in the FFS field name.  From the BP5 point of view,
+anything that can be encoded in the field name is a good thing
+because it travels in the metametadata, not the metadata, so it
+only gets moved around if the field set changes.
+
+Speaking of changes, there are some details that are omitted above
+to get the main points across, but lets talk about other details.
+First, when you put a first block of an array, we fill out the
+Dims field, init BlockCount to 1, DBCount (the `Dims*BlockCount`
+value) to Dims and then we malloc memory to hold a copy of the
+Shape, Count and Offset values.  (We need to copy these anyway as
+part of serialization as they must be captured at the time of Put,
+so we can't, say, just reference the values in the VariableBase
+class.)  For LocalArrays, the Shape value stays at a NULL pointer,
+as does the Start value.  If after the first there's another Put()
+on that variable, we add 1 to BlockCount, increment DBCount by
+Dims, and realloc() the Count and Offset arrays so that we can add
+the new Count and Offset values after the ones that are already
+there.  This means that the Count values for block 1 start at
+`Count[Dims]`, for block 2 they start at `Count[2*Dims]`, etc.  At the
+end of the timestep after using FFSencode() to serialize the
+metadata, `FMfree_var_rec_elements()` is used to free() all these
+subarrays that we've malloc'd.  It understands the structure of
+our entire Metadata structure, walks the field list and
+deallocates appropriately.  Once this has been done, we can
+memset() the whole metadata structure back to zeros and we're
+ready to start again.  (All pointers NULL and counts are zero.)
+
+When we do start again with the next timestep, we don't start from
+scratch with a new Fieldlist and virtual structure, but instead
+try to reuse the old one.  The anticipation is that step-based HPC
+applications are highly regular and the set of variables that are
+output on step N+1 are likely the same as what they output for
+step N.  So when we get a Put() for a variable, we look up its
+entry in internal bookkeeping and if it has an entry in the
+structure we reuse it, putting the appropriate data in the virtual
+structure as described above.  This is fine if we write the exact
+same set of variables in subsequent steps, but what if we don't?
+Well, if we write a new variable, then the procedure above
+happens, but we also take steps to make sure that we generate new
+MetaMetaData (I.E. re-register the format with FFS).  We do this
+by setting the Info.MetaFormat value to NULL.
+
+Handling a non-written variable is done differently. We don't
+really want to bear the cost of new MetaMetaData frequently
+(because MetaMetaData can be big), so instead we're willing to
+bear the costs of not using some of the data in the virtual
+structure.  So if the app Puts an atomic variable on timestep N,
+but skips it on N+1, we essentially leave that fraction of the
+metadata buffer unused in N+1.  It's transmitted or stored, but it
+doesn't contain anything useful.  But the reader still needs to
+know that it wasn't written, so BP5 metadata carries with it a
+bitmap showing if a variable that is part of the metadata has
+actually been written and is valid.  This bitmap, contained in the
+BitField[BitFieldCount] fields in the MetadataFieldList is the
+ultimate authority as to what has been written.  Variables are
+assigned an index in order when they are first entered into
+metadata and if the bit at that index isn't set, that variable
+wasn't written on that timestep.
+
+Now, this does bring up a vulnerability with BP5.  If an application
+were to write a lot of variables on one step and then never use them
+again, we might end up with a big metadata block that mostly carried
+unused (junk) bytes.  We have not yet run into this in a real
+application, so it isn't specifically handled.  In an ideal world, one
+would look at the "occcupancy rate" of metadata in EndStep() and make
+a decision that for either this timestep or the next, we'd start from
+scratch with an empty field list.  There's a tradeoff here.  Do this
+too often and we've got big MetaMetadata costs, do it too little and
+our metadata has a lot of useless bytes.  Future work.  Note that this
+is mostly a writer-side thing to fix/optimize.  The reader will
+appropriately handple new metadata, including new metametadata.
+
+The stuff above applies to ADIOS variables, but attributes are always
+handled separately.  In the initial FFS-marshalling implementation,
+Attributes, while separate, were handled very similarly to variables.
+That is, there was a field list and virtual structure maintained where
+we entered attributes much like Global and local values are described
+above.  There was a metametadata generated it it and it was moved
+around like other metametadata blocks.  This old way of doing things
+is still present in the code and gets used if `MarshalAttribute()` is
+called by the engine.  Engines that use this marshall all attributes
+in `Endstep()`, calling MarshalAttribute for all attributes and only
+doing this when some attribute has changed.  The resulting Attribute
+data always contains ==all== the current attribute values, a situation
+that works out well for engines like SST where readers might join
+after timestep 0.  The SST writer can save the most recent Attribute
+data block and provide it to a newly-joined reader so that it has all
+available attributes.
+
+However, this encoding mechanism has some significant disadvantages
+under almost all situations.  This separation of metametadata and
+metadata was designed for Variables, where the set of variables was
+likely to be reused without changes repeatedly.  However, attributes
+aren't like that, particularly in the original situation where
+attributes once set can never change.  Then we're only doing this when
+we add an attribute, we're always generating new MetaMetadata whenever
+we have a change, and MetaMetadata + Metadata size is always going to
+be bigger than some simpler encoding mechanism.  So, BP5 file engine
+now does things differently.  It calls OnetimeMarshalAttribute() which
+uses a simpler FFS representation for attributes with the attribute
+"name" being part of the data, not part of the metametadata as it is
+with variables.  This means that the metametadata never changes, so we
+don't have the same issues as with the prior approach.  That
+metametadata struct (BP5AttrStruct) describes a relatively simple
+structure with two lists, one for attributes of any non-string type,
+and the other a list of string and array-of-string attributes.
+Generally we only want attributes to appear here when they change, so
+the BP5Writer calls OnetimeMarshlAttribute whenever it gets the
+NotifyEngineAttribute call (whenever an attribute changes).  However
+it also gets called in BeginStep if that step is the first every
+called, because some attributes may have been defined before the
+engine was ever created.  In BP5 file, attribute blocks then only
+every contain an attribute once, unless the attribute changes in which
+case it will appear again.  This is not such a good situation for SST
+because of the late-coming-reader issue, so that still uses the old
+marshaling mechanism.
+
+
diff --git a/developer_docs/bp5reader.md b/developer_docs/bp5reader.md
new file mode 100644
index 0000000000..ca64f074f4
--- /dev/null
+++ b/developer_docs/bp5reader.md
@@ -0,0 +1,307 @@
+# BP5 Metadata handling, reader-side focus
+
+This document is to read in the context of [BP5 Metadata
+Marshalling](bp5format.md), which covers metadata creation in BP5.
+
+BP5 Metadata overall setup includes MetaMetaData (which is just an FFS
+Format Block, essentially a marshalled version of the Metadata
+FieldList created in [BP5 Metadata Marshalling](bp5format.md)) and
+Metadata block itself (I.E. the result of FFSEncoding a the virtual
+structure created in [BP5 Metadata Marshalling](bp5format.md)).
+
+First, some FFS basics.  You'll notice that InstallMetaMetaData in
+BP5Deserializer.cpp mostly just consists of some copying and a call to
+load_external_format_FMcontext().  This just loads the format
+information (I.E. the marshalled version of the Metadata FieldList)
+into FFS.  This is a necessary first step for deserializing metadata,
+however MetaMetaData is used for Attributes (at least for the original
+version of attribute encoding where they were done with a custom
+structure (fields named for the attributes) rather than a generic one
+(attribute names in data).  Because of this we don't process
+MetaMetaData on installation, but wait to see how it is used.
+
+Next, lets look at the start of InstallMetaData.  This basically takes
+in an encoded metadata block and does everything necessary to setup
+newly read variables, etc.  The first part of this is "re-inflating"
+the virtual metadata structure from its encoded form to something just
+like it was on the writer, a C-style structure with pointers.  Note
+that while these are all valid pointers, this is not a classic C
+structure where each pointed-to entity is separately malloc'd.  That
+would be terribly inefficient.  Instead FFS keeps this as a single
+data block but with internal pointers.  Trying to free() them
+individually would not go well.  Note that this is true whether we can
+decode an incoming block *in-place* or not.
+
+That may require some explanation.  FFS' goal is to efficiently move
+pointer-based structures from one memory space to another.  In order
+to do that, it doesn't do the classic thing, copying each field
+individually into the encode buffer.  Instead it copies the base
+structure into the encode buffer, followed by the things pointed to by
+fields in the base structure, then recursively down the data structure
+until everything is in the buffer.  As this happens, pointers in
+copied structures are turned into the integer offset of the pointed-to
+copy, and all copied structures are appropriately aligned within the
+encode buffer so that hopefully when they "land" in the receiving
+memory space they'll have an appropriate alignment on that processor
+too.  However, this isn't always possible.  For example when
+transferring from a 32-bit machine to a 64-bit, lots of things change
+including the size of pointers and the required alignment of data
+types.  FFS was designed for this situation, but heterogeneity isn't
+what it used to be and a lot of that code hasn't been seriously
+exercised in some time, which is why the FMlocalize_structs() call in
+InstallMetaData() is commented out.  Normally that call would take the
+FMformatList from the encoding host, "localize" it to be suitable for
+the decoding host, and then FFS would take care of the unpleasant
+details.  However at present the world is pretty uniformly 64-bit
+little-endian and none of this should be necessary.  The
+localize_structs is commented out because for some reason that I
+couldn't quite work out, FFS still thought it was necessary and that
+change was the easiest way to avoid the problem.  Should we support
+32-bit architectures or this code survive to run on 128-bit
+architectures, things will have to change.  As it is, we should always
+be following the `FFSdecode_in_place_possible() == TRUE` code path.
+
+Something useful to note: setting the environment variable
+"BP5DumpMetadata" will cause the output of the raw incoming metadata
+by the Deserializer.  This might be a little ugly, but it can be useful.  For example, this is the portion of output for the 'c32' variable in staging_common/TestCommonWrite:
+```
+BPG_8_12_c32 = 
+  BPG_8_12_c32 = 
+Dims = 1 ,BlockCount = 1 ,DBCount = 1 ,Shape = 0x11e817388 10 ,Count = 0x11e817390 10 ,Offset = 0x11e817398 0 ,DataBlockLocation = 0x11e8173a0 272 ,MinMax = NULL,  ,
+,
+```
+You see the field name with the "BPG" prefix indicating a global
+array, element size of 8, ADIOS type of 12 (maps to complex float),
+and the actual variable name at the end.  Dims, BlockCount and DBCount
+are all 1.  The Shape Count and Offset are arrays, so they are
+represented by their base address (after decoding) followed by their
+elements.  DataBlockLocation is similar, showing the datablock at
+offset 272.  Finally there is no MinMax for complex, so that pointer
+is NULL.
+
+Lets step back a bit.  When using BP5, we expect the engine to provide
+the Deserializer with all of the MetaMetaData, and then the Metadata
+block from each rank.  For the BP5 file reader in random access mode,
+we also expect to be given the Metadata blocks for ever step.  We
+don't need all the MetaMetaData up front _per se_, but we have to have
+it _before_ any MetaData block that it was associated with.  The
+FFSdecode_* calls in InstallMetaData() produce what are essentially
+copies of the metadata structure that was created in the writer, and
+because we don't do the sort of metadata "merging" that BP3/4 did on
+the writer side, we'll have a copy of the metadata from **EACH*
+writer, and in BP5 file reader random access mode, also for each step.
+**These C-style pointer-based data structures are the core of
+in-memory BP5 metadata.** We don't do aggregation, turn variable count
+arrays int std::vector-based structures or really anything like
+that. Instead most everything in BP5Deserializer is just support for
+accessing those data structures as they are.  NOTE: Remember that the
+entry for an array variable in each of these blocks is a
+`MetaArrayRec` as described in [BP5 Metadata
+Marshalling](bp5format.md)).  That's a C structure with pointers to
+the Shape, Count, Offsets, etc for all the blocks that were written on
+that rank on that step.  The problem?  Each rank may have different
+metadata structure and therefore the MetaArrayRec structure for
+Variable X may live at a different offset in each MetaData block.  So
+coming up with the right offset to find a variable's data given rank
+and step is key to making this work.
+
+The principal data structure that the BP5 deserializer maintains is
+the BP5VarRec.  This is BP5's internal per-variable record and it
+matches one-on-one with a Variable class object in the IO, except that
+the BP5VarRec is persistent for the life of the BP5Deserializer
+object, where the Variable object may be deleted and recreated on
+every timestep in streaming mode.  Note that BP5 tries to be much more
+careful than other engines about storing engine-specific information
+in the shared IO and variable objects (I.E. it doesn't do it).  In
+order to maintain this separation, the deserializer maintains two maps
+with which it associates IO Variable objects with their persistent
+BP5VarRec entries, `VarByName` and `VarByKey`, which are indexed by
+the variable name and by the Variable instance address, respectively.
+One of these calls is often the first call upon entry to the
+deserializer's public methods.  Additionally, each BP5VarRec has a
+VarNum field.  These numbers are assigned sequentially starting with 0
+for the first Variable encountered when processing metadata.  The
+VarNum is an important value used for indexing into various arrays.
+
+The details of the BP5VarRec entries are in BP5Deserializer.h.  There
+are too many entries to go through individually, but most are obvious
+from code context, so here we'll focus on the creation and indexing
+mechanisms that drive metadata use.  BP5VarRec entries are created
+during parsing of MetaMetaData entries (FFS Formats), which happens
+the first time we encounter a MetaData entry that was encoded with
+that MetaMetaData (FFS Format).  This happens in the BuildControl()
+routine which creates a ControlInfo struct for the MetaMetaData.  The
+ControlInfo struct looks like this:
+```
+    struct ControlInfo
+    {
+        FMFormat Format;
+        int ControlCount;
+        struct ControlInfo *Next;
+        std::vector<size_t> *MetaFieldOffset;
+        std::vector<size_t> *CIVarIndex;
+        struct ControlStruct Controls[1];
+    };
+```
+
+The Format field is essentially the MetaMetaID and is what this is
+indexed by.  I.E. when we get a new MetaData block, we determine it's
+Format and look up the ControlInfo struct, which tells us everything
+we need to know about the MetaData block without parsing it.  The
+ControlCount is how many Variables are represented in this block and
+the MetaFieldOffset gives us the starting offset of each one in the
+MetaData block.  Recall from [BP5 Metadata Marshalling](bp5format.md),
+that that's either the offset of the atomic value, or for arrays the
+offset of the MetaArrayRec structure.  So, MetaFieldOffset[i] is the
+offset of the i'th variable in this block.  But that `i` index is of
+the variables that are actually in this block, and it may not
+correspond to the VarNum of that variable (which as per above is
+assigned the first time we see a Variable), to the CIVarIndex maps
+from the VarNum index to the i'th entry in this block.
+
+The Controls array is the per-variable entry in the ControlInfo struct
+and it contains info directly parsed from the FMFieldList for this
+entry plus a pointer to the VarRec that this is associated with:
+```
+    struct ControlStruct
+    {
+        int FieldOffset;
+        BP5VarRec *VarRec;
+        ShapeID OrigShapeID;
+        DataType Type;
+        int ElementSize;
+    };
+``
+Please forgive the C-style structs and code.  Much of BP5 code was
+derived from the C-based FFS marshaling method in SST.  Not everything
+was converted to a more C++ style.
+
+Now, lets first talk about the simple situation, non-random access
+mode (I.E. step mode).  In this situation if we have N ranks in the
+MPI cohort, we expect to have InstallMetaData() called for each one,
+and the BP5Deserializer keeps a simple vector m_MetadataBaseAddrs
+indexed by rank number.  Each VarRec also has a
+PerWriterMetaFieldOffset array (filled in as we did each Install(), so
+the address of the metadata for a particular variable from a
+particular rank is basically
+`m_MetadataAddrs[Rank]+VarRec->PerWriterMetaFieldOffset[Rank]`.
+You'll see this code in BP5Deserializer::GetMetadataBase(), with the
+added protection that if `VarRec->PerWriterMetaFieldOffset[Rank] ==
+0`, that WriterRank didn't write that variable on that timestep.
+
+Random access mode, where we have the metadata for a bunch of steps in
+memory at the same time, is vastly more complex.  We didn't want to
+mess up the speed and simplicity of the step-based mode, so this code
+is split out in a separate `if` in most places, but lets step through
+that branch in GetMetadataBase() because it hits on important points
+in the BP5Deserializer code.  The first few lines of this branch are:
+```
+        if (Step >= m_ControlArray.size() || WriterRank >= m_ControlArray[Step].size())
+        {
+            return NULL; // we don't have this rank in this step
+        }
+```
+
+These are bounds checks.  Like several other data structures in
+BP5Deserializer, m_ControlArray is a vector of vectors.  The first
+"dimension" here is the step, so the first predicate of this if checks
+to see if the requested Step is larger than the size of m_ControlArray
+which has entries for each step for which we have metadata.  If it is
+larger, we've got no metadata and return NULL.  The second predicate
+is maybe a little less obvious.  It turns out that the number of
+writer ranks contributing to a BP5 file is not necessarily constant.
+It is constant for a single write session, but you can close a BP5
+file and reopen it in append mode with a different number of writers.
+So the second "dimension" of the m_ControlArray is the number of
+writer ranks that was in use for that step.  If we're asking for the
+metadata for a writer rank that is larger that what was used for that
+step, we don't have it and return NULL.
+
+OK, the next bit:
+```
+        ControlInfo *CI = m_ControlArray[Step][WriterRank]; // writer control array
+        if (((*CI->MetaFieldOffset).size() <= VarRec->VarNum) ||
+            ((*CI->MetaFieldOffset)[VarRec->VarNum] == 0))
+        {
+            // Var does not appear in this record
+            return NULL;
+        }
+```
+
+`CI` here is the ControlInfo block for this WriterRank on this Step.
+Like all FMFormats, it's really a template and lots of metadata blocks
+likely have the same template, so this pointer is not unique, but it
+is the template that applies to the metadata block for this Rank and
+Step.  But we have a couple more checks.  MetaFieldOffset is indexed
+by VarNum, and it's size corresponds to the highest VarNum we had seen
+at the time that this CI was produced (I.E. the corresponding
+MetaMetaData was parsed).  If the VarNum we're interested in is larger
+than the MetaFieldOffset array, that Var was unknown when this was
+parsed, therefore it's not in this CI.  On the other hand, if the
+VarNum was known, but simply didn't appear in this CI, the
+MetaFieldOffset is 0, and we also don't have metadata here.  (Note
+that there are headers like the BitField that appear first in
+metadata, so a zero offset is never valid for a Var field.)
+
+OK, we've gotten to the point where we have a CI for this metadata
+block and the template contains the variable we're interested in,
+there's one more check:
+```
+        size_t CI_VarIndex = (*CI->CIVarIndex)[VarRec->VarNum];
+        BP5MetadataInfoStruct *BaseData =
+            (BP5MetadataInfoStruct *)(*MetadataBaseArray[Step])[WriterRank];
+        if (!BP5BitfieldTest(BaseData, (int)CI_VarIndex))
+        {
+            // Var appears in CI, but wasn't written on this step
+            return NULL;
+        }
+```
+
+MetadataBaseArray, like m_ControlArray, is a vector of vectors, and it
+contains a pointer to the metadata block for this rank/step (I.E. the
+virtual structure that we build in [BP5 Metadata
+Marshalling](bp5format.md).  We need to check to see if this variable,
+while described in the MetaMetaData, was actually written on this
+step, and to do that we have to check the bitfield.  Because the
+bitfield is indexed not by VarNum but by the index of the Variable in
+that block, we first have to lookup that index using the CIVarIndex
+vector in the CI.  This is indexed by VarNum and maps it back to
+CI_VarIndex.  Given that and the address of the metadatablock, we use
+BP5BitfieldTest to see if the variable was actually written on this
+step and return NULL if not.
+
+Finally, we're done with checks and mapping.  The address of whatever
+metadata is associated with this variable on this step and rank is the
+base address of the metadata block plus the MetadataFieldOffset:
+```
+        size_t MetadataFieldOffset = (*CI->MetaFieldOffset)[VarRec->VarNum];
+        writer_meta_base = (MetaArrayRec *)(((char *)(*MetadataBaseArray[Step])[WriterRank]) +
+                                            MetadataFieldOffset);
+```
+
+GetMetadataBase() is the workhorse of BP5 reader-side metadata.  The
+ReadRandomAccess code path may seem like a lot, but mostly the most
+complex operations there are indexes into arrays and adding offsets.
+It's got a lot of checks, but it runs pretty quick.
+
+Most of the rest of BP5Deserializer is pretty straightforward if you
+understand how GetMetadataBase() works, but there is one more
+complexity that is somewhat the bane of BP5.  Every time we mention
+"Step" in random access mode above, we mean an absolute step number.
+That is, we start with 0 at writer's first Begin/EndStep and increment
+by one on every subsequent Begin/EndStep (handling appending
+appropriately by starting with the number of steps already in the
+file).  However, many (all?)  things in ADIOS random access mode API
+have traditionally been in terms of "relative" steps.  Relative steps
+don't increment if the variable isn't written on that step.  So if you
+write 10 steps into a file, but only write variable X on the even
+absolute steps (0,2,4,6,8), then BP metadata must show 5 steps for
+that variable and they should be steps 0-4 (_FOR THAT VARIABLE_).  So
+if the user asks, for example, for the Shape of that variable on Step
+4, we must internally map that RelativeStep specification to an
+AbsoluteStep before applying the logic above.  We've tried to use the
+variable name RelStep when dealing with a relative step spec, but
+there's probably places that have been missed.  (Hopefully there's not
+logic that has been missed too.)
+
+# BP5 Read logic
\ No newline at end of file
diff --git a/source/adios2/toolkit/format/bp5/BP5Base.h b/source/adios2/toolkit/format/bp5/BP5Base.h
index d19be1df73..d4c290885c 100644
--- a/source/adios2/toolkit/format/bp5/BP5Base.h
+++ b/source/adios2/toolkit/format/bp5/BP5Base.h
@@ -19,264 +19,6 @@
 #pragma warning(disable : 4250)
 #endif
 
-/*
- *  BP5 Metadata Marshalling is based upon FFS, which provides the
- *  ability to serialize a C-style pointer-based data structure
- *  (starting with a base struct) and to deserialize it in-place on
- *  the receiving side.
- *
- *  Normally, in order to use FFS, an application must fully describe
- *  the base structure using an FMFieldList, where each element
- *  describes a field in the structure, including the field's name,
- *  basic type (integer, float, etc.), size and offset from the start
- *  of the structure.  In "normal" scenarios, like in SST this is
- *  straightforward because we're describing a structure that exists
- *  at compile-time and all of those things are compile-time static.
- *  However, ADIOS metadata represents information about variables
- *  that we don't know about until run-time, so if we're going to use
- *  FFS here, things have to be a bit more dynamic.  In particular,
- *  we'll represent ADIOS metadata with a "virtual" structure, one
- *  whose description we'll construct on the fly and which will only
- *  ever exist virtually, making up offsets as we go.  We just have to
- *  be careful about keeping things aligned appropriately because we
- *  want this to land on the receiver and be appropriately aligned
- *  there.  (Normally the compiler takes care of this, but this
- *  virtual structure is never seen by a compiler, so we're doing it.)
- *  The field name that we specify to FFS is also important because we
- *  use it to communicate a lot of information between writer and
- *  reader.  While it always contains the variable name, it also
- *  encodes the variable type (local or global, atomic or array,
- *  compressed, derived, etc.).  Because the variable name only
- *  appears in the metametadata (ffs format), this is a great place to
- *  put more static information about the variable, specifically
- *  anything that is fixed after definition and doesn't change on a
- *  per-timestep basis.  More on names later.
- *
- *  To accomplish managing the structure on the writer side, we
- *  principally track two things, the FMFieldList that represents the
- *  description of the virtual struct, and a malloc'd region where we
- *  build the virtual struct itself.  While the description is
- *  interpreted by FFS, the most important thing for BP5 to remember
- *  is this field's offset because that's where the (meta)data will
- *  go.  When we Marshal a simple atomic value (local or global), we
- *  calculate an appropriately aligned new offset in the buffer, add
- *  to the FMFieldList (maintained in Info.MetaFields on the writer)
- *  and copy the data into the virtual field at that offset in the
- *  buffer.  On future timesteps, the field already exists, so we just
- *  use the offset and copy the data into the buffer.  Arrays are a
- *  bit more complex, but lets start with the simple case.  FFS
- *  supports substructures, I.E. fields which themselves are a
- *  structure and we use that feature for all array representations.
- *  There are several things that may change on a per-timestep basis
- *  for arrays, including Shape, Count and Offset values (which are
- *  themselves arrays), and we also need to track the location of the
- *  related data block (offset in this rank's data segment).  Except
- *  for Shape (which we assume is set for at least this timestep), all
- *  of these things are per-block.
- *
- *  Back to FFS capabilities for a moment.  FFS's pointer-based
- *  structures include dynamically-sized arrays, and the size of those
- *  arrays must be specified by an integer-typed field in that
- *  structure.  There are three different array lengths required here.
- *  Shape is of length Dims (how many dimensions the array has),
- *  DataBlockLocation is of length BlockCount (how many blocks were
- *  written on this rank), and for Count and Offsets we must have
- *  those per-block, so the length is Dims*BlockCount.  To satisfy
- *  FFS's constraints, that means we must have integer fields
- *  representing all three lengths in the array metadata struct, and
- *  we need pointers to the dynamic arrays representing Shape, Count,
- *  Offsets, and DataBlockLocation.  These are the BASE_FIELDS below
- *  and the FFS FMField entries are BASE_FIELD_ENTRIES in BP5Base.cpp.
- *  While more complex arrays metadata entries are necessary, these
- *  must be the first fields in those structures.  While there can't
- *  be a static struct declaration for all of the metadata, there is a
- *  static declaration for the array metadata substructure,
- *  MetaArrayRec below.  Mostly you'll see this used like this:
- *
- *  MetaArrayRec *MetaEntry = (MetaArrayRec *)((char *)(MetadataBuf) +  Rec->MetaOffset);
- *
- *  This gives us a nice way of accessing the key fields in an array's
- *  metadata entry.
- *
- *  So, what about more complex arrays?  All of our compression
- *  operators require the length of the encrypted field as input to
- *  the uncompress operator.  Generally we don't include data block
- *  length as part of metadata because it's easily calculated from the
- *  Count values and the length of the data type, but in order to
- *  support compression we have to communicate it from the writer to
- *  the reader so we can uncompress.  Therefore every field with an
- *  operator has as its next field (after BASE_FIELDS) DataBlockSize.
- *  Like DataBlockLocation, this is per block (and so it's FFS
- *  description also uses BlockCount).  This arrangement is
- *  represented by the struct MetaArrayRecOperator below.  Note that
- *  BP5 does not itself use the DataBlockSize in the metadata.  The
- *  size of the compressed data is returned from the compression
- *  operator, and is used by BP5 to copy that data into the data
- *  block, but after that it is only passed to the Uncompress operator
- *  on the receiving side, so operators like MGard may choose to use
- *  this differently.
- *
- *  The last case is arrays that also have Min/Max stats associated
- *  with them.  Since this can be combined with operators, that gives
- *  us two more possible structs for array metadata, a plain array
- *  with Min/Max or an array with an operator and Min/Max, these are
- *  represented by the structs MetaArrayRecMM and
- *  MetaArrayRecOperatorMM below.  Note that MinMax in that struct is
- *  a char*, but obviously the data type of Min/Max depends upon the
- *  element type of the array.  How does that work?  The actual size
- *  in bytes of the MinMax array is BlockCount * sizeof(array element)
- *  * 2, but in order to avoid introducing yet another integer-typed
- *  size value into the structure we've gone to some effort in order
- *  to leverage the existing BlockCount value.  In particular, there
- *  are a number of FMField lists for The MM and OperatorMM arrays,
- *  each giving FFS a different element size for the MinMax Array.
- *  ADIOS types of size 1 use MetarrayRecMM1List, those of size 2 use
- *  MetaArrayRecMM2List, etc., up to MetaArrayRecMM16List, which would
- *  be used by long double.  Note that BP5 doesn't define or support
- *  MinMax for string, complex, or structure types.
- *
- *  For each of the array variations above, when we add the field
- *  associated with that array to the metadata field list, we specify
- *  the appropriate FieldList in the FFS "field_type" value, and
- *  allocate space for the relevant structure in the virtual metadata
- *  struct we're building.
- *
- *  We mentioned field names above, we actually encode a lot of
- *  information into the FFS field names, including the variable name,
- *  shape, element_size, ADIOS type, any operator that might be
- *  applied, the name of the substructure (if the array is a struct
- *  type), and even the expression that is to be used for derived
- *  variables.  These are all encoded in different ways, for example
- *  the basic shape of the variable is encoded in the three letter
- *  prefix of the FFS fieldname: GlobalValue: = "BPg", GlobalArray =
- *  "BPG"JoinedArray = "BPJ", LocalValue = "BPl", LocalArray = "BPL".
- *  The details of the encoding are buried in the logic, but important
- *  bit is knowing that there's a lot of information there and some of
- *  it (like the expression) is base64 encoded to avoid having special
- *  characters in the FFS field name.  From the BP5 point of view,
- *  anything that can be encoded in the field name is a good thing
- *  because it travels in the metametadata, not the metadata, so it
- *  only gets moved around if the field set changes.
- *
- *  Speaking of changes, there are some details that are omitted above
- *  to get the main points across, but lets talk about other details.
- *  First, when you put a first block of an array, we fill out the
- *  Dims field, init BlockCount to 1, DBCount (the Dims*BlockCount
- *  value) to Dims and then we malloc memory to hold a copy of the
- *  Shape, Count and Offset values.  (We need to copy these anyway as
- *  part of serialization as they must be captured at the time of Put,
- *  so we can't, say, just reference the values in the VariableBase
- *  class.)  For LocalArrays, the Shape value stays at a NULL pointer,
- *  as does the Start value.  If after the first there's another Put()
- *  on that variable, we add 1 to BlockCount, increment DBCount by
- *  Dims, and realloc() the Count and Offset arrays so that we can add
- *  the new Count and Offset values after the ones that are already
- *  there.  This means that the Count values for block 1 start at
- *  Count[Dims], for block 2 they start at Count[2*Dims], etc.  At the
- *  end of the timestep after using FFSencode() to serialize the
- *  metadata, FMfree_var_rec_elements() is used to free() all these
- *  subarrays that we've malloc'd.  It understands the structure of
- *  our entire Metadata structure, walks the field list and
- *  deallocates appropriately.  Once this has been done, we can
- *  memset() the whole metadata structure back to zeros and we're
- *  ready to start again.  (All pointers NULL and counts are zero.)
- *
- *  When we do start again with the next timestep, we don't start from
- *  scratch with a new Fieldlist and virtual structure, but instead
- *  try to reuse the old one.  The anticipation is that step-based HPC
- *  applications are highly regular and the set of variables that are
- *  output on step N+1 are likely the same as what they output for
- *  step N.  So when we get a Put() for a variable, we look up it's
- *  entry in internal bookkeeping and if it has an entry in the
- *  structure we reuse it, putting the appropriate data in the virtual
- *  structure as described above.  This is fine if we write the exact
- *  same set of variables in subsequent steps, but what if we don't?
- *  Well, if we write a new variable, then the procedure above
- *  happens, but we also take steps to make sure that we generate new
- *  MetaMetaData (I.E. re-register the format with FFS).  We do this
- *  by setting the Info.MetaFormat value to NULL.
- *
- *  Handling a non-written variable is done differently. We don't
- *  really want to bear the cost of new MetaMetaData frequently
- *  (because MetaMetaData can be big), so instead we're willing to
- *  bear the costs of not using some of the data in the virtual
- *  structure.  So if the app Puts an atomic variable on timestep N,
- *  but skips it on N+1, we essentially leave that fraction of the
- *  metadata buffer unused in N+1.  It's transmitted or stored, but it
- *  doesn't contain anything useful.  But the reader still needs to
- *  know that it wasn't written, so BP5 metadata carries with it a
- *  bitmap showing if a variable that is part of the metadata has
- *  actually been written and is valid.  This bitmap, contained in the
- *  BitField[BitFieldCount] fields in the MetadataFieldList is the
- *  ultimate authority as to what has been written.  Variables are
- *  assigned an index in order when they are first entered into
- *  metadata and if the bit at that index isn't set, that variable
- *  wasn't written on that timestep.
- *
- *  Now, this does bring up a vulnerability with BP5.  If an
- *  application were to write a lot of variables on one step and then
- *  never use them again, we might end up with a big metadata block
- *  that mostly carried unused (junk) bytes.  We have not yet run into
- *  this in a real application, so it isn't specifically handled.  In
- *  an ideal world, one would look at the "occcupancy rate" of
- *  metadata in EndStep() and make a decision that for either this
- *  timestep or the next, we'd start from scratch with an empty field
- *  list.  There's a tradeoff here.  Do this too often and we've got
- *  big MetaMetadata costs, do it too little and our metadata has a
- *  lot of useless bytes.  Future work.  Note that this is mostly a
- *  writer-side thing to fix/optimize.  The reader will appropriately
- *  handle new metadata, including new metametadata.
- *
- *  The stuff above applies to ADIOS variables, but attributes are
- *  always handled separately.  In the initial FFS-marshalling
- *  implementation, Attributes, while separate, were handled very
- *  similarly to variables.  That is, there was a field list and
- *  virtual structure maintained where we entered attributes much like
- *  Global and local values are described above.  There was a
- *  metametadata generated it it and it was moved around like other
- *  metametadata blocks.  This old way of doing things is still
- *  present in the code and gets used if MarshalAttribute is called by
- *  the engine.  Engines that use this marshall all attributes in
- *  Endstep(), calling MarshalAttribute for all attributes and only
- *  doing this when some attribute has changed.  The resulting
- *  Attribute data always contains *all* the current attribute values,
- *  a situation that works out well for engines like SST where readers
- *  might join after timestep 0.  The SST writer can save the most
- *  recent Attribute data block and provide it to a newly-joined
- *  reader so that it has all available attributes.
- *
- *  However, this encoding mechanism has some significant
- *  disadvantages under almost all situations.  This separation of
- *  metametadata and metadata was designed for Variables, where the
- *  set of variables was likely to be reused without changes
- *  repeatedly.  However, attributes aren't like that, particularly in
- *  the original situation where attributes once set can never change.
- *  Then we're only doing this when we add an attribute, we're always
- *  generating new MetaMetadata whenever we have a change, and
- *  MetaMetadata + Metadata size is always going to be bigger than
- *  some simpler encoding mechanism.  So, BP5 file engine now does
- *  things differently.  It calls OnetimeMarshalAttribute() which uses
- *  a simpler FFS representation for attributes with the attribute
- *  "name" being part of the data, not part of the metametadata as it
- *  is with variables.  This means that the metametadata never
- *  changes, so we don't have the same issues as with the prior
- *  approach.  That metametadata struct (BP5AttrStruct) describes a
- *  relatively simple structure with two lists, one for attributes of
- *  any non-string type, and the other a list of string and
- *  array-of-string attributes.  Generally we only want attributes to
- *  appear here when they change, so the BP5Writer calls
- *  OnetimeMarshlAttribute whenever it gets the NotifyEngineAttribute
- *  call (whenever an attribute changes).  However it also gets called
- *  in BeginStep if that step is the first every called, because some
- *  attributes may have been defined before the engine was ever
- *  created.  In BP5 file, attribute blocks then only every contain an
- *  attribute once, unless the attribute changes in which case it will
- *  appear again.  This is not such a good situation for SST because
- *  of the late-coming-reader issue, so that still uses the old
- *  marshaling mechanism.
- *
- */
-
 namespace adios2
 {
 namespace format
@@ -297,12 +39,11 @@ class BP5Base
 
 #define BASE_FIELDS                                                                                \
     size_t Dims;               /* How many dimensions does this array have */                      \
-    size_t BlockCount;         /* How many blocks are written	*/                                   \
-    size_t DBCount;            /* Dimens * BlockCount	*/                                           \
-    size_t *Shape;             /* Global dimensionality  [Dims]	NULL for local */                  \
-    size_t *Count;             /* Per-block Counts	  [DBCount] */                                  \
-    size_t *Offsets;           /* Per-block Offsets	  [DBCount]	NULL for local                     \
-                                */                                                                 \
+    size_t BlockCount;         /* How many blocks are written   */                                 \
+    size_t DBCount;            /* Dimens * BlockCount   */                                         \
+    size_t *Shape;             /* Global dimensionality  [Dims] NULL for local */                  \
+    size_t *Count;             /* Per-block Counts    [DBCount] */                                 \
+    size_t *Offsets;           /* Per-block Offsets   [DBCount] NULL for local */                  \
     size_t *DataBlockLocation; /* Per-block Offset in PG [BlockCount] */
 
     typedef struct _MetaArrayRec