diff --git a/lib/gis/lz4.c b/lib/gis/lz4.c
index 1575be2c481..9b29b51218a 100644
--- a/lib/gis/lz4.c
+++ b/lib/gis/lz4.c
@@ -1,6 +1,6 @@
 /*
    LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2017, Yann Collet.
+   Copyright (C) 2011-2023, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -8,9 +8,9 @@
    modification, are permitted provided that the following conditions are
    met:
 
-   * Redistributions of source code must retain the above copyright
+       * Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
-   * Redistributions in binary form must reproduce the above
+       * Redistributions in binary form must reproduce the above
    copyright notice, this list of conditions and the following disclaimer
    in the documentation and/or other materials provided with the
    distribution.
@@ -28,17 +28,18 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 homepage : http://www.lz4.org
-   - LZ4 source repository : https://github.com/lz4/lz4
- */
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
 
 /*-************************************
  *  Tuning parameters
  **************************************/
 /*
  * LZ4_HEAPMODE :
- * Select how default compression functions will allocate memory for their hash
- * table, in memory stack (0:default, fastest), or in memory heap (1:requires
+ * Select how stateless compression functions like `LZ4_compress_default()`
+ * allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires
  * malloc()).
  */
 #ifndef LZ4_HEAPMODE
@@ -46,10 +47,16 @@
 #endif
 
 /*
- * ACCELERATION_DEFAULT :
+ * LZ4_ACCELERATION_DEFAULT :
  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
  */
-#define ACCELERATION_DEFAULT 1
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX     65537
 
 /*-************************************
  *  CPU Feature Detection
@@ -58,20 +65,16 @@
  * By default, access to unaligned memory is controlled by `memcpy()`, which is
  * safe and portable. Unfortunately, on some target/compiler combinations, the
  * generated assembly is sub-optimal. The below switch allow to select different
- * access method for improved performance.
- * Method 0 (default) : use `memcpy()`. Safe and portable.
- * Method 1 : `__packed` statement. It depends on compiler extension
- *          (ie, not portable).
- *          This method is safe if your compiler supports it, and *generally*
- *          as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets which assembly generation
- *            depends on alignment. But in some circumstances, it's the only
- *            known way to get the most performance (ie GCC + ARMv6)
- * See
+ * access method for improved performance. Method 0 (default) : use `memcpy()`.
+ * Safe and portable. Method 1 : `__packed` statement. It depends on compiler
+ * extension (ie, not portable). This method is safe if your compiler supports
+ * it, and *generally* as fast or faster than `memcpy`. Method 2 : direct
+ * access. This method is portable but violate C standard. It can generate buggy
+ * code on targets which assembly generation depends on alignment. But in some
+ * circumstances, it's the only known way to get the most performance (ie GCC +
+ * ARMv6) See
  * https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html
- * for details.
- * Prefer these methods in priority order (0 > 1 > 2)
+ * for details. Prefer these methods in priority order (0 > 1 > 2)
  */
 #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */
 #if defined(__GNUC__) &&                                     \
@@ -79,7 +82,8 @@
      defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
      defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__))
 #define LZ4_FORCE_MEMORY_ACCESS 2
-#elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+#elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || \
+    defined(_MSC_VER)
 #define LZ4_FORCE_MEMORY_ACCESS 1
 #endif
 #endif
@@ -92,36 +96,58 @@
 #if defined(_MSC_VER) &&                                                    \
     defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware \
                            bit count */
+#undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */
 #define LZ4_FORCE_SW_BITCOUNT
 #endif
 
 /*-************************************
  *  Dependency
  **************************************/
-#define LZ4_STATIC_LINKING_ONLY
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+#define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
 #define LZ4_DISABLE_DEPRECATE_WARNINGS    /* due to \
                                              LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
 #include "lz4.h"
 /* see also "memory routines" below */
 
 /*-************************************
  *  Compiler Options
  **************************************/
-#ifdef _MSC_VER /* Visual Studio */
-#include <intrin.h>
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */
+#include <intrin.h>                         /* only present in VS2005+ */
 #pragma warning( \
     disable : 4127) /* disable: C4127: conditional expression is constant */
-#pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) \
-                                 */
+#pragma warning( \
+    disable : 6237) /* disable: C6237: conditional expression is always 0 */
+#pragma warning(                                                             \
+    disable : 6239) /* disable: C6239: (<non-zero constant> && <expression>) \
+                       always evaluates to the result of <expression> */
+#pragma warning(                                                             \
+    disable : 6240) /* disable: C6240: (<expression> && <non-zero constant>) \
+                       always evaluates to the result of <expression> */
+#pragma warning(disable : 6326) /* disable: C6326: Potential comparison of a \
+                                   constant with another constant */
 #endif                          /* _MSC_VER */
 
 #ifndef LZ4_FORCE_INLINE
-#ifdef _MSC_VER /* Visual Studio */
+#if defined(_MSC_VER) && !defined(__clang__) /* MSVC */
 #define LZ4_FORCE_INLINE static __forceinline
 #else
 #if defined(__cplusplus) || \
     defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
 #define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
 #else
 #define LZ4_FORCE_INLINE static inline
@@ -132,8 +158,8 @@
 #endif /* _MSC_VER */
 #endif /* LZ4_FORCE_INLINE */
 
-/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
- * Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
  * together with a simple 8-byte copy loop as a fall-back path.
  * However, this optimization hurts the decompression speed by >30%,
  * because the execution does not go to the optimized loop
@@ -141,18 +167,19 @@
  * before going to the fall-back path become useless overhead.
  * This optimization happens only with the -O3 flag, and -O2 generates
  * a simple 8-byte copy loop.
- * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
  * functions are annotated with __attribute__((optimize("O2"))),
- * and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
- * of LZ4_wildCopy does not affect the compression speed.
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
  */
-#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
-#define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
-#define LZ4_FORCE_O2_INLINE_GCC_PPC64LE \
-    __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && \
+    !defined(__clang__)
+#define LZ4_FORCE_O2 __attribute__((optimize("O2")))
+#undef LZ4_FORCE_INLINE
+#define LZ4_FORCE_INLINE \
+    static __inline __attribute__((optimize("O2"), always_inline))
 #else
-#define LZ4_FORCE_O2_GCC_PPC64LE
-#define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
+#define LZ4_FORCE_O2
 #endif
 
 #if (defined(__GNUC__) && (__GNUC__ >= 3)) ||                   \
@@ -170,19 +197,134 @@
 #define unlikely(expr) expect((expr) != 0, 0)
 #endif
 
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST /* can be externally provided */
+#define LZ4_ALIGN_TEST 1
+#endif
+
 /*-************************************
  *  Memory routines
  **************************************/
+
+/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
+ *  Disable relatively high-level LZ4/HC functions that use dynamic memory
+ *  allocation functions (malloc(), calloc(), free()).
+ *
+ *  Note that this is a compile-time switch. And since it disables
+ *  public/stable LZ4 v1 API functions, we don't recommend using this
+ *  symbol to generate a library for distribution.
+ *
+ *  The following public functions are removed when this symbol is defined.
+ *  - lz4   : LZ4_createStream, LZ4_freeStream,
+ *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create
+ * (deprecated)
+ *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
+ *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
+ *  - lz4frame, lz4file : All LZ4F_* functions
+ */
+#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+#define ALLOC(s)          lz4_error_memory_allocation_is_disabled
+#define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
+#define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
+#elif defined(LZ4_USER_MEMORY_FUNCTIONS)
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void *LZ4_malloc(size_t s);
+void *LZ4_calloc(size_t n, size_t s);
+void LZ4_free(void *p);
+#define ALLOC(s)          LZ4_malloc(s)
+#define ALLOC_AND_ZERO(s) LZ4_calloc(1, s)
+#define FREEMEM(p)        LZ4_free(p)
+#else
 #include <stdlib.h> /* malloc, calloc, free */
 #define ALLOC(s)          malloc(s)
 #define ALLOC_AND_ZERO(s) calloc(1, s)
 #define FREEMEM(p)        free(p)
+#endif
+
+#if !LZ4_FREESTANDING
 #include <string.h> /* memset, memcpy */
-#define MEM_INIT(p, v, s) memset((p), (v), (s))
+#endif
+#if !defined(LZ4_memset)
+#define LZ4_memset(p, v, s) memset((p), (v), (s))
+#endif
+#define MEM_INIT(p, v, s) LZ4_memset((p), (v), (s))
+
+/*-************************************
+ *  Common Constants
+ **************************************/
+#define MINMATCH          4
+
+#define WILDCOPYLENGTH    8
+#define LASTLITERALS      5 /* see ../doc/lz4_Block_format.md#parsing-restrictions \
+                             */
+#define MFLIMIT           12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE                                           \
+    ((2 * WILDCOPYLENGTH) -                                                \
+     MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without \
+                  overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT + 1);
+
+#define KB                        *(1 << 10)
+#define MB                        *(1 << 20)
+#define GB                        *(1U << 30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > \
+     LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
+#error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS  4
+#define ML_MASK  ((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+
+/*-************************************
+ *  Error detection
+ **************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 1)
+#include <assert.h>
+#else
+#ifndef assert
+#define assert(condition) ((void)0)
+#endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)                           \
+    {                                                  \
+        enum { LZ4_static_assert = 1 / (int)(!!(c)) }; \
+    } /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
+#include <stdio.h>
+static int g_debuglog_enable = 1;
+#define DEBUGLOG(l, ...)                                 \
+    {                                                    \
+        if ((g_debuglog_enable) && (l <= LZ4_DEBUG)) {   \
+            fprintf(stderr, __FILE__ " %i: ", __LINE__); \
+            fprintf(stderr, __VA_ARGS__);                \
+            fprintf(stderr, " \n");                      \
+        }                                                \
+    }
+#else
+#define DEBUGLOG(l, ...) \
+    {                    \
+    } /* disabled */
+#endif
+
+static int LZ4_isAligned(const void *ptr, size_t alignment)
+{
+    return ((size_t)ptr & (alignment - 1)) == 0;
+}
 
 /*-************************************
- *  Basic Types
+ *  Types
  **************************************/
+#include <limits.h>
 #if defined(__cplusplus) || \
     (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #include <stdint.h>
@@ -193,6 +335,9 @@ typedef int32_t S32;
 typedef uint64_t U64;
 typedef uintptr_t uptrval;
 #else
+#if UINT_MAX != 4294967295UL
+#error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+#endif
 typedef unsigned char BYTE;
 typedef unsigned short U16;
 typedef unsigned int U32;
@@ -207,9 +352,41 @@ typedef U64 reg_t; /* 64-bits in x32 mode */
 typedef size_t reg_t; /* 32-bits in x32 mode */
 #endif
 
+typedef enum {
+    notLimited = 0,
+    limitedOutput = 1,
+    fillOutput = 2
+} limitedOutput_directive;
+
 /*-************************************
  *  Reading and writing into memory
  **************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in
+ * freestanding environments. This is needed when decompressing the Linux
+ * Kernel, for example.
+ */
+#if !defined(LZ4_memcpy)
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+#endif
+
+#if !defined(LZ4_memmove)
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memmove __builtin_memmove
+#else
+#define LZ4_memmove memmove
+#endif
+#endif
+
 static unsigned LZ4_isLittleEndian(void)
 {
     const union {
@@ -219,6 +396,13 @@ static unsigned LZ4_isLittleEndian(void)
     return one.c[0];
 }
 
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define LZ4_PACK(__Declaration__) __Declaration__ __attribute__((__packed__))
+#elif defined(_MSC_VER)
+#define LZ4_PACK(__Declaration__) \
+    __pragma(pack(push, 1)) __Declaration__ __pragma(pack(pop))
+#endif
+
 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS == 2)
 /* lie to the compiler about data alignment; use with caution */
 
@@ -226,12 +410,10 @@ static U16 LZ4_read16(const void *memPtr)
 {
     return *(const U16 *)memPtr;
 }
-
 static U32 LZ4_read32(const void *memPtr)
 {
     return *(const U32 *)memPtr;
 }
-
 static reg_t LZ4_read_ARCH(const void *memPtr)
 {
     return *(const reg_t *)memPtr;
@@ -241,7 +423,6 @@ static void LZ4_write16(void *memPtr, U16 value)
 {
     *(U16 *)memPtr = value;
 }
-
 static void LZ4_write32(void *memPtr, U32 value)
 {
     *(U32 *)memPtr = value;
@@ -252,71 +433,63 @@ static void LZ4_write32(void *memPtr, U32 value)
 /* __pack instructions are safer, but compiler specific, hence potentially
  * problematic for some compilers */
 /* currently only defined for gcc and icc */
-typedef union {
-    U16 u16;
-    U32 u32;
-    reg_t uArch;
-} __attribute__((packed)) unalign;
+LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16;
+LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32;
+LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST;
 
 static U16 LZ4_read16(const void *ptr)
 {
-    return ((const unalign *)ptr)->u16;
+    return ((const LZ4_unalign16 *)ptr)->u16;
 }
-
 static U32 LZ4_read32(const void *ptr)
 {
-    return ((const unalign *)ptr)->u32;
+    return ((const LZ4_unalign32 *)ptr)->u32;
 }
-
 static reg_t LZ4_read_ARCH(const void *ptr)
 {
-    return ((const unalign *)ptr)->uArch;
+    return ((const LZ4_unalignST *)ptr)->uArch;
 }
 
 static void LZ4_write16(void *memPtr, U16 value)
 {
-    ((unalign *)memPtr)->u16 = value;
+    ((LZ4_unalign16 *)memPtr)->u16 = value;
 }
-
 static void LZ4_write32(void *memPtr, U32 value)
 {
-    ((unalign *)memPtr)->u32 = value;
+    ((LZ4_unalign32 *)memPtr)->u32 = value;
 }
 
-#else /* safe and portable access through memcpy() */
+#else /* safe and portable access using memcpy() */
 
 static U16 LZ4_read16(const void *memPtr)
 {
     U16 val;
-
-    memcpy(&val, memPtr, sizeof(val));
+    LZ4_memcpy(&val, memPtr, sizeof(val));
     return val;
 }
 
 static U32 LZ4_read32(const void *memPtr)
 {
     U32 val;
-
-    memcpy(&val, memPtr, sizeof(val));
+    LZ4_memcpy(&val, memPtr, sizeof(val));
     return val;
 }
 
 static reg_t LZ4_read_ARCH(const void *memPtr)
 {
     reg_t val;
-
-    memcpy(&val, memPtr, sizeof(val));
+    LZ4_memcpy(&val, memPtr, sizeof(val));
     return val;
 }
 
 static void LZ4_write16(void *memPtr, U16 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    LZ4_memcpy(memPtr, &value, sizeof(value));
 }
 
 static void LZ4_write32(void *memPtr, U32 value)
 {
-    memcpy(memPtr, &value, sizeof(value));
+    LZ4_memcpy(memPtr, &value, sizeof(value));
 }
 
 #endif /* LZ4_FORCE_MEMORY_ACCESS */
@@ -328,10 +501,22 @@ static U16 LZ4_readLE16(const void *memPtr)
     }
     else {
         const BYTE *p = (const BYTE *)memPtr;
+        return (U16)((U16)p[0] | (p[1] << 8));
+    }
+}
 
-        return (U16)((U16)p[0] + (p[1] << 8));
+#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
+static U32 LZ4_readLE32(const void *memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read32(memPtr);
+    }
+    else {
+        const BYTE *p = (const BYTE *)memPtr;
+        return (U32)p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
     }
 }
+#endif
 
 static void LZ4_writeLE16(void *memPtr, U16 value)
 {
@@ -340,7 +525,6 @@ static void LZ4_writeLE16(void *memPtr, U16 value)
     }
     else {
         BYTE *p = (BYTE *)memPtr;
-
         p[0] = (BYTE)value;
         p[1] = (BYTE)(value >> 8);
     }
@@ -348,74 +532,129 @@ static void LZ4_writeLE16(void *memPtr, U16 value)
 
 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd
  */
-LZ4_FORCE_O2_INLINE_GCC_PPC64LE
-void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void *dstPtr, const void *srcPtr, void *dstEnd)
 {
     BYTE *d = (BYTE *)dstPtr;
     const BYTE *s = (const BYTE *)srcPtr;
     BYTE *const e = (BYTE *)dstEnd;
 
     do {
-        memcpy(d, s, 8);
+        LZ4_memcpy(d, s, 8);
         d += 8;
         s += 8;
     } while (d < e);
 }
 
-/*-************************************
- *  Common Constants
- **************************************/
-#define MINMATCH       4
+static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
+static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
 
-#define WILDCOPYLENGTH 8
-#define LASTLITERALS   5
-#define MFLIMIT        (WILDCOPYLENGTH + MINMATCH)
-static const int LZ4_minLength = (MFLIMIT + 1);
+#ifndef LZ4_FAST_DEC_LOOP
+#if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+#define LZ4_FAST_DEC_LOOP 1
+#elif defined(__aarch64__) && defined(__APPLE__)
+#define LZ4_FAST_DEC_LOOP 1
+#elif defined(__aarch64__) && !defined(__clang__)
+/* On non-Apple aarch64, we disable this optimization for clang because
+ * on certain mobile chipsets, performance is reduced with clang. For
+ * more information refer to https://github.com/lz4/lz4/pull/707 */
+#define LZ4_FAST_DEC_LOOP 1
+#else
+#define LZ4_FAST_DEC_LOOP 0
+#endif
+#endif
 
-#define KB           *(1 << 10)
-#define MB           *(1 << 20)
-#define GB           *(1U << 30)
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void LZ4_memcpy_using_offset_base(BYTE *dstPtr,
+                                                   const BYTE *srcPtr,
+                                                   BYTE *dstEnd,
+                                                   const size_t offset)
+{
+    assert(srcPtr + offset == dstPtr);
+    if (offset < 8) {
+        LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
+        dstPtr[0] = srcPtr[0];
+        dstPtr[1] = srcPtr[1];
+        dstPtr[2] = srcPtr[2];
+        dstPtr[3] = srcPtr[3];
+        srcPtr += inc32table[offset];
+        LZ4_memcpy(dstPtr + 4, srcPtr, 4);
+        srcPtr -= dec64table[offset];
+        dstPtr += 8;
+    }
+    else {
+        LZ4_memcpy(dstPtr, srcPtr, 8);
+        dstPtr += 8;
+        srcPtr += 8;
+    }
 
-#define MAXD_LOG     16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
 
-#define ML_BITS      4
-#define ML_MASK      ((1U << ML_BITS) - 1)
-#define RUN_BITS     (8 - ML_BITS)
-#define RUN_MASK     ((1U << RUN_BITS) - 1)
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond
+ * dstEnd this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void LZ4_wildCopy32(void *dstPtr, const void *srcPtr,
+                                     void *dstEnd)
+{
+    BYTE *d = (BYTE *)dstPtr;
+    const BYTE *s = (const BYTE *)srcPtr;
+    BYTE *const e = (BYTE *)dstEnd;
 
-/*-************************************
- *  Error detection
- **************************************/
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 1)
-#include <assert.h>
-#else
-#ifndef assert
-#define assert(condition) ((void)0)
-#endif
-#endif
+    do {
+        LZ4_memcpy(d, s, 16);
+        LZ4_memcpy(d + 16, s + 16, 16);
+        d += 32;
+        s += 32;
+    } while (d < e);
+}
 
-#define LZ4_STATIC_ASSERT(c)                           \
-    {                                                  \
-        enum { LZ4_static_assert = 1 / (int)(!!(c)) }; \
-    } /* use after variable declarations */
+/* LZ4_memcpy_using_offset()  presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 12 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void LZ4_memcpy_using_offset(BYTE *dstPtr, const BYTE *srcPtr,
+                                              BYTE *dstEnd, const size_t offset)
+{
+    BYTE v[8];
 
-#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2)
-#include <stdio.h>
-static int g_debuglog_enable = 1;
+    assert(dstEnd >= dstPtr + MINMATCH);
 
-#define DEBUGLOG(l, ...)                               \
-    {                                                  \
-        if ((g_debuglog_enable) && (l <= LZ4_DEBUG)) { \
-            fprintf(stderr, __FILE__ ": ");            \
-            fprintf(stderr, __VA_ARGS__);              \
-            fprintf(stderr, " \n");                    \
-        }                                              \
+    switch (offset) {
+    case 1:
+        MEM_INIT(v, *srcPtr, 8);
+        break;
+    case 2:
+        LZ4_memcpy(v, srcPtr, 2);
+        LZ4_memcpy(&v[2], srcPtr, 2);
+#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier \
+                                             */
+#pragma warning(push)
+#pragma warning( \
+    disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
+#endif
+        LZ4_memcpy(&v[4], v, 4);
+#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier \
+                                             */
+#pragma warning(pop)
+#endif
+        break;
+    case 4:
+        LZ4_memcpy(v, srcPtr, 4);
+        LZ4_memcpy(&v[4], srcPtr, 4);
+        break;
+    default:
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+        return;
     }
-#else
-#define DEBUGLOG(l, ...) \
-    {                    \
-    } /* disabled */
+
+    LZ4_memcpy(dstPtr, v, 8);
+    dstPtr += 8;
+    while (dstPtr < dstEnd) {
+        LZ4_memcpy(dstPtr, v, 8);
+        dstPtr += 8;
+    }
+}
 #endif
 
 /*-************************************
@@ -423,64 +662,89 @@ static int g_debuglog_enable = 1;
  **************************************/
 static unsigned LZ4_NbCommonBytes(reg_t val)
 {
+    assert(val != 0);
     if (LZ4_isLittleEndian()) {
         if (sizeof(val) == 8) {
-#if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+#if defined(_MSC_VER) && (_MSC_VER >= 1800) &&     \
+    (defined(_M_AMD64) && !defined(_M_ARM64EC)) && \
+    !defined(LZ4_FORCE_SW_BITCOUNT)
+/*-*************************************************************************************************
+ * ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications
+ *on ARM64 Windows 11. The ARM64EC ABI does not support AVX/AVX2/AVX512
+ *instructions, nor their relevant intrinsics including _tzcnt_u64. Therefore,
+ *we need to neuter the _tzcnt_u64 code path for ARM64EC.
+ ****************************************************************************************************/
+#if defined(__clang__) && (__clang_major__ < 10)
+            /* Avoid undefined clang-cl intrinsics issue.
+             * See https://github.com/lz4/lz4/pull/1017 for details. */
+            return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
+#else
+            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+            return (unsigned)_tzcnt_u64(val) >> 3;
+#endif
+#elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
             unsigned long r = 0;
-
             _BitScanForward64(&r, (U64)val);
-            return (int)(r >> 3);
-#elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 3))) && \
+            return (unsigned)r >> 3;
+#elif (defined(__clang__) ||                                                \
+       (defined(__GNUC__) &&                                                \
+        ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
     !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctzll((U64)val) >> 3);
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
 #else
-            static const int DeBruijnBytePos[64] = {
-                0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7,
-                0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7,
-                7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6,
-                7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7};
-            return DeBruijnBytePos[((U64)((val & -(long long)val) *
-                                          0x0218A392CDABBD3FULL)) >>
-                                   58];
+            const U64 m = 0x0101010101010101ULL;
+            val ^= val - 1;
+            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
 #endif
         }
-        else { /* 32 bits */
-#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+        else /* 32 bits */ {
+#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
             unsigned long r;
-
             _BitScanForward(&r, (U32)val);
-            return (int)(r >> 3);
-#elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 3))) && \
-    !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctz((U32)val) >> 3);
+            return (unsigned)r >> 3;
+#elif (defined(__clang__) ||                                                \
+       (defined(__GNUC__) &&                                                \
+        ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+    !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
 #else
-            static const int DeBruijnBytePos[32] = {
-                0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1,
-                3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1};
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >>
-                                   27];
+            const U32 m = 0x01010101;
+            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
 #endif
         }
     }
-    else {                      /* Big Endian CPU */
-        if (sizeof(val) == 8) { /* 64-bits */
-#if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-
-            _BitScanReverse64(&r, val);
-            return (unsigned)(r >> 3);
-#elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 3))) && \
-    !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clzll((U64)val) >> 3);
+    else /* Big Endian CPU */ {
+        if (sizeof(val) == 8) {
+#if (defined(__clang__) ||                                                \
+     (defined(__GNUC__) &&                                                \
+      ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+    !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
+#else
+#if 1
+            /* this method is probably faster,
+             * but adds a 128 bytes lookup table */
+            static const unsigned char ctz7_tab[128] = {
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1,
+                0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0,
+                1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
+                0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0,
+                2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1,
+                0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0,
+                1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+            };
+            U64 const mask = 0x0101010101010101ULL;
+            U64 const t = (((val >> 8) - mask) | val) & mask;
+            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
 #else
+            /* this method doesn't consume memory space like the previous one,
+             * but it contains several branches,
+             * that may end up slowing execution */
             static const U32 by32 =
-                sizeof(val) *
-                4; /* 32 on 64 bits (goal), 16 on 32 bits.
-                      Just to avoid some static analyzer complaining about shift
-                      by 32 on 32-bits target. Note that this code path is never
-                      triggered in 32-bits mode. */
+                sizeof(val) * 4; /* 32 on 64 bits (goal), 16 on 32 bits.
+Just to avoid some static analyzer complaining about shift by 32 on 32-bits
+target. Note that this code path is never triggered in 32-bits mode. */
             unsigned r;
-
             if (!(val >> by32)) {
                 r = 4;
             }
@@ -497,30 +761,21 @@ static unsigned LZ4_NbCommonBytes(reg_t val)
             }
             r += (!val);
             return r;
+#endif
 #endif
         }
-        else { /* 32 bits */
-#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-
-            _BitScanReverse(&r, (unsigned long)val);
-            return (unsigned)(r >> 3);
-#elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 3))) && \
+        else /* 32 bits */ {
+#if (defined(__clang__) ||                                                \
+     (defined(__GNUC__) &&                                                \
+      ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
     !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clz((U32)val) >> 3);
+            return (unsigned)__builtin_clz((U32)val) >> 3;
 #else
-            unsigned r;
-
-            if (!(val >> 16)) {
-                r = 2;
-                val >>= 8;
-            }
-            else {
-                r = 0;
-                val >>= 24;
-            }
-            r += (!val);
-            return r;
+            val >>= 8;
+            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+                   (val + 0x00FF0000)) >>
+                  24;
+            return (unsigned)val ^ 3;
 #endif
         }
     }
@@ -534,7 +789,6 @@ unsigned LZ4_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit)
 
     if (likely(pIn < pInLimit - (STEPSIZE - 1))) {
         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
-
         if (!diff) {
             pIn += STEPSIZE;
             pMatch += STEPSIZE;
@@ -546,7 +800,6 @@ unsigned LZ4_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit)
 
     while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
-
         if (!diff) {
             pIn += STEPSIZE;
             pMatch += STEPSIZE;
@@ -571,7 +824,6 @@ unsigned LZ4_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit)
 }
 
 #ifndef LZ4_COMMONDEFS_ONLY
-
 /*-************************************
  *  Local Constants
  **************************************/
@@ -582,11 +834,6 @@ static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression run
 /*-************************************
  *  Local Structures and types
  **************************************/
-typedef enum {
-    notLimited = 0,
-    limitedOutput = 1,
-    fillOutput = 2
-} limitedOutput_directive;
 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
 
 /**
@@ -602,10 +849,10 @@ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
  *                   else in memory, starting at ctx->dictionary with length
  *                   ctx->dictSize.
- * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
- *                   content is in a separate context, pointed to by
- *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
- *                   entries in the current context that refer to positions
+ * - usingDictCtx  : Everything concerning the preceding content is
+ *                   in a separate context, pointed to by ctx->dictCtx.
+ *                   ctx->dictionary, ctx->dictSize, and table entries
+ *                   in the current context that refer to positions
  *                   preceding the beginning of the current compression are
  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
  *                   ->dictSize describe the location and size of the preceding
@@ -620,9 +867,6 @@ typedef enum {
 } dict_directive;
 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
 
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
 /*-************************************
  *  Local Utils
  **************************************/
@@ -630,26 +874,43 @@ int LZ4_versionNumber(void)
 {
     return LZ4_VERSION_NUMBER;
 }
-
 const char *LZ4_versionString(void)
 {
     return LZ4_VERSION_STRING;
 }
-
 int LZ4_compressBound(int isize)
 {
     return LZ4_COMPRESSBOUND(isize);
 }
-
 int LZ4_sizeofState(void)
 {
-    return LZ4_STREAMSIZE;
+    return sizeof(LZ4_stream_t);
+}
+
+/*-****************************************
+ *  Internal Definitions, used only in Tests
+ *******************************************/
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict(LZ4_stream_t *LZ4_dict, const char *source,
+                              char *dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char *source, char *dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void *dictStart, size_t dictSize);
+int LZ4_decompress_safe_partial_forceExtDict(
+    const char *source, char *dest, int compressedSize, int targetOutputSize,
+    int dstCapacity, const void *dictStart, size_t dictSize);
+#if defined(__cplusplus)
 }
+#endif
 
 /*-******************************
  *  Compression functions
  ********************************/
-static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
     if (tableType == byU16)
         return ((sequence * 2654435761U) >>
@@ -658,16 +919,17 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
         return ((sequence * 2654435761U) >> ((MINMATCH * 8) - LZ4_HASHLOG));
 }
 
-static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
 {
-    static const U64 prime5bytes = 889523592379ULL;
-    static const U64 prime8bytes = 11400714785074694791ULL;
     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG + 1 : LZ4_HASHLOG;
-
-    if (LZ4_isLittleEndian())
+    if (LZ4_isLittleEndian()) {
+        const U64 prime5bytes = 889523592379ULL;
         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
-    else
+    }
+    else {
+        const U64 prime8bytes = 11400714785074694791ULL;
         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+    }
 }
 
 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void *const p,
@@ -675,72 +937,81 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void *const p,
 {
     if ((sizeof(reg_t) == 8) && (tableType != byU16))
         return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+
+#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
+    return LZ4_hash4(LZ4_readLE32(p), tableType);
+#else
     return LZ4_hash4(LZ4_read32(p), tableType);
+#endif
 }
 
-static void LZ4_putIndexOnHash(U32 idx, U32 h, void *tableBase,
-                               tableType_t const tableType)
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void *tableBase,
+                                    tableType_t const tableType)
 {
     switch (tableType) {
-    default:           /* fallthrough */
-    case clearedTable: /* fallthrough */
-    case byPtr: {      /* illegal! */
+    default:             /* fallthrough */
+    case clearedTable: { /* illegal! */
         assert(0);
         return;
     }
+    case byPtr: {
+        const BYTE **hashTable = (const BYTE **)tableBase;
+        hashTable[h] = NULL;
+        return;
+    }
     case byU32: {
         U32 *hashTable = (U32 *)tableBase;
-
-        hashTable[h] = idx;
+        hashTable[h] = 0;
         return;
     }
     case byU16: {
         U16 *hashTable = (U16 *)tableBase;
-
-        assert(idx < 65536);
-        hashTable[h] = (U16)idx;
+        hashTable[h] = 0;
         return;
     }
     }
 }
 
-static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
-                                  tableType_t const tableType,
-                                  const BYTE *srcBase)
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void *tableBase,
+                                         tableType_t const tableType)
 {
     switch (tableType) {
-    case clearedTable: { /* illegal! */
+    default:           /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: {      /* illegal! */
         assert(0);
         return;
     }
-    case byPtr: {
-        const BYTE **hashTable = (const BYTE **)tableBase;
-
-        hashTable[h] = p;
-        return;
-    }
     case byU32: {
         U32 *hashTable = (U32 *)tableBase;
-
-        hashTable[h] = (U32)(p - srcBase);
+        hashTable[h] = idx;
         return;
     }
     case byU16: {
         U16 *hashTable = (U16 *)tableBase;
-
-        hashTable[h] = (U16)(p - srcBase);
+        assert(idx < 65536);
+        hashTable[h] = (U16)idx;
         return;
     }
     }
 }
 
+/* LZ4_putPosition*() : only used in byPtr mode */
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE *p, U32 h,
+                                            void *tableBase,
+                                            tableType_t const tableType)
+{
+    const BYTE **const hashTable = (const BYTE **)tableBase;
+    assert(tableType == byPtr);
+    (void)tableType;
+    hashTable[h] = p;
+}
+
 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE *p, void *tableBase,
-                                      tableType_t tableType,
-                                      const BYTE *srcBase)
+                                      tableType_t tableType)
 {
     U32 const h = LZ4_hashPosition(p, tableType);
-
-    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType);
 }
 
 /* LZ4_getIndexOnHash() :
@@ -749,19 +1020,17 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE *p, void *tableBase,
  * Assumption 1 : only valid if tableType == byU32 or byU16.
  * Assumption 2 : h is presumed valid (within limits of hash table)
  */
-static U32 LZ4_getIndexOnHash(U32 h, const void *tableBase,
-                              tableType_t tableType)
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void *tableBase,
+                                        tableType_t tableType)
 {
     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
     if (tableType == byU32) {
         const U32 *const hashTable = (const U32 *)tableBase;
-
         assert(h < (1U << (LZ4_MEMORY_USAGE - 2)));
         return hashTable[h];
     }
     if (tableType == byU16) {
         const U16 *const hashTable = (const U16 *)tableBase;
-
         assert(h < (1U << (LZ4_MEMORY_USAGE - 1)));
         return hashTable[h];
     }
@@ -770,34 +1039,21 @@ static U32 LZ4_getIndexOnHash(U32 h, const void *tableBase,
 }
 
 static const BYTE *LZ4_getPositionOnHash(U32 h, const void *tableBase,
-                                         tableType_t tableType,
-                                         const BYTE *srcBase)
+                                         tableType_t tableType)
 {
-    if (tableType == byPtr) {
+    assert(tableType == byPtr);
+    (void)tableType;
+    {
         const BYTE *const *hashTable = (const BYTE *const *)tableBase;
-
         return hashTable[h];
     }
-    if (tableType == byU32) {
-        const U32 *const hashTable = (const U32 *)tableBase;
-
-        return hashTable[h] + srcBase;
-    }
-    {
-        const U16 *const hashTable = (const U16 *)tableBase;
-
-        return hashTable[h] + srcBase;
-    } /* default, to ensure a return */
 }
 
-LZ4_FORCE_INLINE const BYTE *LZ4_getPosition(const BYTE *p,
-                                             const void *tableBase,
-                                             tableType_t tableType,
-                                             const BYTE *srcBase)
+LZ4_FORCE_INLINE const BYTE *
+LZ4_getPosition(const BYTE *p, const void *tableBase, tableType_t tableType)
 {
     U32 const h = LZ4_hashPosition(p, tableType);
-
-    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+    return LZ4_getPositionOnHash(h, tableBase, tableType);
 }
 
 LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal *const cctx,
@@ -808,25 +1064,27 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal *const cctx,
      * therefore safe to use no matter what mode we're in. Otherwise, we figure
      * out if it's safe to leave as is or whether it needs to be reset.
      */
-    if (cctx->tableType != clearedTable) {
-        if (cctx->tableType != tableType ||
-            (tableType == byU16 &&
-             cctx->currentOffset + inputSize >= 0xFFFFU) ||
-            (tableType == byU32 && cctx->currentOffset > 1 GB) ||
+    if ((tableType_t)cctx->tableType != clearedTable) {
+        assert(inputSize >= 0);
+        if ((tableType_t)cctx->tableType != tableType ||
+            ((tableType == byU16) &&
+             cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) ||
+            ((tableType == byU32) && cctx->currentOffset > 1 GB) ||
             tableType == byPtr || inputSize >= 4 KB) {
             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
             cctx->currentOffset = 0;
-            cctx->tableType = clearedTable;
+            cctx->tableType = (U32)clearedTable;
         }
         else {
             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
         }
     }
 
-    /* Adding a gap, so all previous entries are > MAX_DISTANCE back, is faster
-     * than compressing without a gap. However, compressing with
-     * currentOffset == 0 is faster still, so we preserve that case.
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
+     * is faster than compressing without a gap.
+     * However, compressing with currentOffset == 0 is faster still,
+     * so we preserve that case.
      */
     if (cctx->currentOffset != 0 && tableType == byU32) {
         DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
@@ -839,16 +1097,21 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal *const cctx,
     cctx->dictSize = 0;
 }
 
-/** LZ4_compress_generic() :
-    inlined, to ensure branches are decided at compilation time */
-LZ4_FORCE_INLINE int LZ4_compress_generic(
+/** LZ4_compress_generic_validated() :
+ *  inlined, to ensure branches are decided at compilation time.
+ *  The following conditions are presumed already validated:
+ *  - source != NULL
+ *  - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
     LZ4_stream_t_internal *const cctx, const char *const source,
     char *const dest, const int inputSize,
-    int *inputConsumed, /* only written when outputLimited == fillOutput */
-    const int maxOutputSize, const limitedOutput_directive outputLimited,
+    int *inputConsumed, /* only written when outputDirective == fillOutput */
+    const int maxOutputSize, const limitedOutput_directive outputDirective,
     const tableType_t tableType, const dict_directive dictDirective,
-    const dictIssue_directive dictIssue, const U32 acceleration)
+    const dictIssue_directive dictIssue, const int acceleration)
 {
+    int result;
     const BYTE *ip = (const BYTE *)source;
 
     U32 const startIndex = cctx->currentOffset;
@@ -864,13 +1127,13 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     const U32 dictDelta = (dictDirective == usingDictCtx)
                               ? startIndex - dictCtx->currentOffset
                               : 0; /* make indexes in dictCtx comparable with
-                                      index in current context */
+                                      indexes in current context */
 
     int const maybe_extMem =
         (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
     U32 const prefixIdxLimit =
         startIndex - dictSize; /* used when dictDirective == dictSmall */
-    const BYTE *const dictEnd = dictionary + dictSize;
+    const BYTE *const dictEnd = dictionary ? dictionary + dictSize : dictionary;
     const BYTE *anchor = (const BYTE *)source;
     const BYTE *const iend = ip + inputSize;
     const BYTE *const mflimitPlusOne = iend - MFLIMIT + 1;
@@ -878,7 +1141,8 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
 
     /* the dictCtx currentOffset is indexed on the start of the dictionary,
      * while a dictionary in the current context precedes the currentOffset */
-    const BYTE *dictBase = dictDirective == usingDictCtx
+    const BYTE *dictBase = (dictionary == NULL) ? NULL
+                           : (dictDirective == usingDictCtx)
                                ? dictionary + dictSize - dictCtx->currentOffset
                                : dictionary + dictSize - startIndex;
 
@@ -888,18 +1152,20 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     U32 offset = 0;
     U32 forwardH;
 
-    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize,
-             tableType);
-    /* Init conditions */
-    if (outputLimited == fillOutput && maxOutputSize < 1)
-        return 0; /* Impossible to store anything */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE)
-        return 0; /* Unsupported inputSize, too large (or negative) */
-    if ((tableType == byU16) && (inputSize >= LZ4_64Klimit))
-        return 0; /* Size too large (not within 64K limit) */
+    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u",
+             inputSize, tableType);
+    assert(ip != NULL);
+    if (tableType == byU16)
+        assert(inputSize <
+               LZ4_64Klimit); /* Size too large (not within 64K limit) */
     if (tableType == byPtr)
         assert(dictDirective ==
                noDict); /* only supported use case with byPtr */
+    /* If init conditions are not met, we don't have to mark stream
+     * as having dirty context, since no action was taken yet */
+    if (outputDirective == fillOutput && maxOutputSize < 1) {
+        return 0;
+    } /* Impossible to store anything */
     assert(acceleration >= 1);
 
     lowLimit =
@@ -916,31 +1182,38 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         cctx->dictSize += (U32)inputSize;
     }
     cctx->currentOffset += (U32)inputSize;
-    cctx->tableType = tableType;
+    cctx->tableType = (U32)tableType;
 
     if (inputSize < LZ4_minLength)
         goto _last_literals; /* Input too small, no compression (all literals)
                               */
 
     /* First Byte */
-    LZ4_putPosition(ip, cctx->hashTable, tableType, base);
-    ip++;
-    forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
+    {
+        U32 const h = LZ4_hashPosition(ip, tableType);
+        if (tableType == byPtr) {
+            LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr);
+        }
+        else {
+            LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType);
+        }
+    }
+    ip++;
+    forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
     for (;;) {
         const BYTE *match;
         BYTE *token;
+        const BYTE *filledIp;
 
         /* Find a match */
         if (tableType == byPtr) {
             const BYTE *forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
-
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
             do {
                 U32 const h = forwardH;
-
                 ip = forwardIp;
                 forwardIp += step;
                 step = (searchMatchNb++ >> LZ4_skipTrigger);
@@ -949,20 +1222,18 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                     goto _last_literals;
                 assert(ip < mflimitPlusOne);
 
-                match =
-                    LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType);
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType);
 
-            } while ((match + MAX_DISTANCE < ip) ||
+            } while ((match + LZ4_DISTANCE_MAX < ip) ||
                      (LZ4_read32(match) != LZ4_read32(ip)));
         }
         else { /* byU32, byU16 */
 
             const BYTE *forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
-
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
             do {
                 U32 const h = forwardH;
                 U32 const current = (U32)(forwardIp - base);
@@ -1002,6 +1273,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                                  "startIndex=%5u",
                                  matchIndex, startIndex);
                         assert(startIndex - matchIndex >= MINMATCH);
+                        assert(dictBase);
                         match = dictBase + matchIndex;
                         lowLimit = dictionary;
                     }
@@ -1016,16 +1288,20 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                 forwardH = LZ4_hashPosition(forwardIp, tableType);
                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
 
-                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit))
-                    continue; /* match outside of valid area */
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex,
+                         current - matchIndex);
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) {
+                    continue;
+                } /* match outside of valid area */
                 assert(matchIndex < current);
-                if ((tableType != byU16) &&
-                    (matchIndex + MAX_DISTANCE < current))
-                    continue; /* too far */
-                if (tableType == byU16)
-                    assert((current - matchIndex) <=
-                           MAX_DISTANCE); /* too_far presumed impossible with
-                                             byU16 */
+                if (((tableType != byU16) ||
+                     (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) &&
+                    (matchIndex + LZ4_DISTANCE_MAX < current)) {
+                    continue;
+                } /* too far */
+                assert(
+                    (current - matchIndex) <=
+                    LZ4_DISTANCE_MAX); /* match now expected within distance */
 
                 if (LZ4_read32(match) == LZ4_read32(ip)) {
                     if (maybe_extMem)
@@ -1037,36 +1313,41 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         }
 
         /* Catch up */
-        while (((ip > anchor) & (match > lowLimit)) &&
-               (unlikely(ip[-1] == match[-1]))) {
-            ip--;
-            match--;
+        filledIp = ip;
+        assert(ip > anchor); /* this is always true as ip has been advanced
+                                before entering the main loop */
+        if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) {
+            do {
+                ip--;
+                match--;
+            } while (((ip > anchor) & (match > lowLimit)) &&
+                     (unlikely(ip[-1] == match[-1])));
         }
 
         /* Encode Literals */
         {
             unsigned const litLength = (unsigned)(ip - anchor);
-
             token = op++;
-            if ((outputLimited ==
+            if ((outputDirective ==
                  limitedOutput) && /* Check output buffer overflow */
                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) +
                               (litLength / 255) >
-                          olimit)))
-                return 0;
-            if ((outputLimited == fillOutput) &&
-                (unlikely(
-                    op + (litLength + 240) / 255 /* litlen */ +
-                        litLength /* literals */ + 2 /* offset */ +
-                        1 /* token */ + MFLIMIT - MINMATCH
-                    /* min last literals so last match is <= end - MFLIMIT */
-                    > olimit))) {
+                          olimit))) {
+                return 0; /* cannot compress within `dst` budget. Stored indexes
+                             in hash table are nonetheless fine */
+            }
+            if ((outputDirective == fillOutput) &&
+                (unlikely(op + (litLength + 240) / 255 /* litlen */ +
+                              litLength /* literals */ + 2 /* offset */ +
+                              1 /* token */ + MFLIMIT -
+                              MINMATCH /* min last literals so last match is <=
+                                          end - MFLIMIT */
+                          > olimit))) {
                 op--;
                 goto _last_literals;
             }
             if (litLength >= RUN_MASK) {
-                int len = (int)litLength - RUN_MASK;
-
+                unsigned len = litLength - RUN_MASK;
                 *token = (RUN_MASK << ML_BITS);
                 for (; len >= 255; len -= 255)
                     *op++ = 255;
@@ -1076,7 +1357,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                 *token = (BYTE)(litLength << ML_BITS);
 
             /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op + litLength);
+            LZ4_wildCopy8(op, anchor, op + litLength);
             op += litLength;
             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
                      (int)(anchor - (const BYTE *)source), litLength,
@@ -1095,9 +1376,10 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
          * higher 4-bits for literal length supposed already written
          */
 
-        if ((outputLimited == fillOutput) &&
-            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH
-             /* min last literals so last match is <= end - MFLIMIT */
+        if ((outputDirective == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT -
+                 MINMATCH /* min last literals so last match is <= end - MFLIMIT
+                           */
              > olimit)) {
             /* the match was too close to the end, rewind and go to last
              * literals */
@@ -1109,14 +1391,14 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         if (maybe_extMem) { /* static test */
             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset,
                      (int)(ip - (const BYTE *)source));
-            assert(offset <= MAX_DISTANCE && offset > 0);
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
             LZ4_writeLE16(op, (U16)offset);
             op += 2;
         }
         else {
             DEBUGLOG(6, "             with offset=%u  (same segment)",
                      (U32)(ip - match));
-            assert(ip - match <= MAX_DISTANCE);
+            assert(ip - match <= LZ4_DISTANCE_MAX);
             LZ4_writeLE16(op, (U16)(ip - match));
             op += 2;
         }
@@ -1129,12 +1411,11 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
                  dictDirective == usingDictCtx) &&
                 (lowLimit == dictionary) /* match within extDict */) {
                 const BYTE *limit = ip + (dictEnd - match);
-
                 assert(dictEnd > match);
                 if (limit > matchlimit)
                     limit = matchlimit;
                 matchCode = LZ4_count(ip + MINMATCH, match + MINMATCH, limit);
-                ip += MINMATCH + matchCode;
+                ip += (size_t)matchCode + MINMATCH;
                 if (ip == limit) {
                     unsigned const more =
                         LZ4_count(limit, (const BYTE *)source, matchlimit);
@@ -1148,24 +1429,43 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             else {
                 matchCode =
                     LZ4_count(ip + MINMATCH, match + MINMATCH, matchlimit);
-                ip += MINMATCH + matchCode;
+                ip += (size_t)matchCode + MINMATCH;
                 DEBUGLOG(6, "             with matchLength=%u",
                          matchCode + MINMATCH);
             }
 
-            if ((outputLimited) && /* Check output buffer overflow */
-                (unlikely(op + (1 + LASTLITERALS) + (matchCode >> 8) >
+            if ((outputDirective) && /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode + 240) / 255 >
                           olimit))) {
-                if (outputLimited == limitedOutput)
-                    return 0;
-                if (outputLimited == fillOutput) {
+                if (outputDirective == fillOutput) {
                     /* Match description too long : reduce it */
                     U32 newMatchCode =
                         15 /* in token */ -
                         1 /* to avoid needing a zero byte */ +
-                        ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
+                        ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
                     ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
                     matchCode = newMatchCode;
+                    if (unlikely(ip <= filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends
+                         * up less than filledIp we have positions in the hash
+                         * table beyond the current position. This is a problem
+                         * if we reuse the hash table. So we have to remove
+                         * these positions from the hash table.
+                         */
+                        const BYTE *ptr;
+                        DEBUGLOG(5, "Clearing %u positions",
+                                 (U32)(filledIp - ip));
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
+                }
+                else {
+                    assert(outputDirective == limitedOutput);
+                    return 0; /* cannot compress within `dst` budget. Stored
+                                 indexes in hash table are nonetheless fine */
                 }
             }
             if (matchCode >= ML_MASK) {
@@ -1183,6 +1483,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             else
                 *token += (BYTE)(matchCode);
         }
+        /* Ensure we have enough space for the last literals. */
+        assert(
+            !(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
 
         anchor = ip;
 
@@ -1191,14 +1494,23 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             break;
 
         /* Fill table */
-        LZ4_putPosition(ip - 2, cctx->hashTable, tableType, base);
+        {
+            U32 const h = LZ4_hashPosition(ip - 2, tableType);
+            if (tableType == byPtr) {
+                LZ4_putPositionOnHash(ip - 2, h, cctx->hashTable, byPtr);
+            }
+            else {
+                U32 const idx = (U32)((ip - 2) - base);
+                LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType);
+            }
+        }
 
         /* Test next position */
         if (tableType == byPtr) {
 
-            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
-            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
-            if ((match + MAX_DISTANCE >= ip) &&
+            match = LZ4_getPosition(ip, cctx->hashTable, tableType);
+            LZ4_putPosition(ip, cctx->hashTable, tableType);
+            if ((match + LZ4_DISTANCE_MAX >= ip) &&
                 (LZ4_read32(match) == LZ4_read32(ip))) {
                 token = op++;
                 *token = 0;
@@ -1214,6 +1526,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             if (dictDirective == usingDictCtx) {
                 if (matchIndex < startIndex) {
                     /* there was no match, try the dictionary */
+                    assert(tableType == byU32);
                     matchIndex =
                         LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
                     match = dictBase + matchIndex;
@@ -1229,6 +1542,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             }
             else if (dictDirective == usingExtDict) {
                 if (matchIndex < startIndex) {
+                    assert(dictBase);
                     match = dictBase + matchIndex;
                     lowLimit =
                         dictionary; /* required for match length counter */
@@ -1246,9 +1560,10 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
             assert(matchIndex < current);
             if (((dictIssue == dictSmall) ? (matchIndex >= prefixIdxLimit)
                                           : 1) &&
-                ((tableType == byU16)
+                (((tableType == byU16) &&
+                  (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX))
                      ? 1
-                     : (matchIndex + MAX_DISTANCE >= current)) &&
+                     : (matchIndex + LZ4_DISTANCE_MAX >= current)) &&
                 (LZ4_read32(match) == LZ4_read32(ip))) {
                 token = op++;
                 *token = 0;
@@ -1269,20 +1584,24 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
     /* Encode Last Literals */
     {
         size_t lastRun = (size_t)(iend - anchor);
-
-        if ((outputLimited) && /* Check output buffer overflow */
+        if ((outputDirective) && /* Check output buffer overflow */
             (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > olimit)) {
-            if (outputLimited == fillOutput) {
+            if (outputDirective == fillOutput) {
                 /* adapt lastRun to fill 'dst' */
-                lastRun = (olimit - op) - 1;
-                lastRun -= (lastRun + 240) / 255;
+                assert(olimit >= op);
+                lastRun = (size_t)(olimit - op) - 1 /*token*/;
+                lastRun -= (lastRun + 256 - RUN_MASK) /
+                           256; /*additional length tokens*/
+            }
+            else {
+                assert(outputDirective == limitedOutput);
+                return 0; /* cannot compress within `dst` budget. Stored indexes
+                             in hash table are nonetheless fine */
             }
-            if (outputLimited == limitedOutput)
-                return 0;
         }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
         if (lastRun >= RUN_MASK) {
             size_t accumulator = lastRun - RUN_MASK;
-
             *op++ = RUN_MASK << ML_BITS;
             for (; accumulator >= 255; accumulator -= 255)
                 *op++ = 255;
@@ -1291,28 +1610,72 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
         else {
             *op++ = (BYTE)(lastRun << ML_BITS);
         }
-        memcpy(op, anchor, lastRun);
+        LZ4_memcpy(op, anchor, lastRun);
         ip = anchor + lastRun;
         op += lastRun;
     }
 
-    if (outputLimited == fillOutput) {
+    if (outputDirective == fillOutput) {
         *inputConsumed = (int)(((const char *)ip) - source);
     }
+    result = (int)(((char *)op) - dest);
+    assert(result > 0);
     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes",
-             inputSize, (int)(((char *)op) - dest));
-    return (int)(((char *)op) - dest);
+             inputSize, result);
+    return result;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time;
+ *  takes care of src == (NULL, 0)
+ *  and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+    LZ4_stream_t_internal *const cctx, const char *const src, char *const dst,
+    const int srcSize,
+    int *inputConsumed, /* only written when outputDirective == fillOutput */
+    const int dstCapacity, const limitedOutput_directive outputDirective,
+    const tableType_t tableType, const dict_directive dictDirective,
+    const dictIssue_directive dictIssue, const int acceleration)
+{
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", srcSize,
+             dstCapacity);
+
+    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) {
+        return 0;
+    }                   /* Unsupported srcSize, too large (or negative) */
+    if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */
+        if (outputDirective != notLimited && dstCapacity <= 0)
+            return 0; /* no output, can't write anything */
+        DEBUGLOG(5, "Generating an empty block");
+        assert(outputDirective == notLimited || dstCapacity >= 1);
+        assert(dst != NULL);
+        dst[0] = 0;
+        if (outputDirective == fillOutput) {
+            assert(inputConsumed != NULL);
+            *inputConsumed = 0;
+        }
+        return 1;
+    }
+    assert(src != NULL);
+
+    return LZ4_compress_generic_validated(
+        cctx, src, dst, srcSize,
+        inputConsumed, /* only written into if outputDirective == fillOutput */
+        dstCapacity, outputDirective, tableType, dictDirective, dictIssue,
+        acceleration);
 }
 
 int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
                                int inputSize, int maxOutputSize,
                                int acceleration)
 {
-    LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
-
+    LZ4_stream_t_internal *const ctx =
+        &LZ4_initStream(state, sizeof(LZ4_stream_t))->internal_donotuse;
+    assert(ctx != NULL);
     if (acceleration < 1)
-        acceleration = ACCELERATION_DEFAULT;
-    LZ4_resetStream((LZ4_stream_t *)state);
+        acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX)
+        acceleration = LZ4_ACCELERATION_MAX;
     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
         if (inputSize < LZ4_64Klimit) {
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0,
@@ -1321,7 +1684,7 @@ int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
         }
         else {
             const tableType_t tableType =
-                ((sizeof(void *) == 4) && ((uptrval)source > MAX_DISTANCE))
+                ((sizeof(void *) == 4) && ((uptrval)source > LZ4_DISTANCE_MAX))
                     ? byPtr
                     : byU32;
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0,
@@ -1331,14 +1694,13 @@ int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
     }
     else {
         if (inputSize < LZ4_64Klimit) {
-            ;
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL,
                                         maxOutputSize, limitedOutput, byU16,
                                         noDict, noDictIssue, acceleration);
         }
         else {
             const tableType_t tableType =
-                ((sizeof(void *) == 4) && ((uptrval)source > MAX_DISTANCE))
+                ((sizeof(void *) == 4) && ((uptrval)source > LZ4_DISTANCE_MAX))
                     ? byPtr
                     : byU32;
             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL,
@@ -1361,15 +1723,17 @@ int LZ4_compress_fast_extState_fastReset(void *state, const char *src,
                                          char *dst, int srcSize,
                                          int dstCapacity, int acceleration)
 {
-    LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
-
+    LZ4_stream_t_internal *const ctx =
+        &((LZ4_stream_t *)state)->internal_donotuse;
     if (acceleration < 1)
-        acceleration = ACCELERATION_DEFAULT;
+        acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX)
+        acceleration = LZ4_ACCELERATION_MAX;
+    assert(ctx != NULL);
 
     if (dstCapacity >= LZ4_compressBound(srcSize)) {
         if (srcSize < LZ4_64Klimit) {
             const tableType_t tableType = byU16;
-
             LZ4_prepareTable(ctx, srcSize, tableType);
             if (ctx->currentOffset) {
                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0,
@@ -1384,7 +1748,7 @@ int LZ4_compress_fast_extState_fastReset(void *state, const char *src,
         }
         else {
             const tableType_t tableType =
-                ((sizeof(void *) == 4) && ((uptrval)src > MAX_DISTANCE))
+                ((sizeof(void *) == 4) && ((uptrval)src > LZ4_DISTANCE_MAX))
                     ? byPtr
                     : byU32;
             LZ4_prepareTable(ctx, srcSize, tableType);
@@ -1396,7 +1760,6 @@ int LZ4_compress_fast_extState_fastReset(void *state, const char *src,
     else {
         if (srcSize < LZ4_64Klimit) {
             const tableType_t tableType = byU16;
-
             LZ4_prepareTable(ctx, srcSize, tableType);
             if (ctx->currentOffset) {
                 return LZ4_compress_generic(
@@ -1411,7 +1774,7 @@ int LZ4_compress_fast_extState_fastReset(void *state, const char *src,
         }
         else {
             const tableType_t tableType =
-                ((sizeof(void *) == 4) && ((uptrval)src > MAX_DISTANCE))
+                ((sizeof(void *) == 4) && ((uptrval)src > LZ4_DISTANCE_MAX))
                     ? byPtr
                     : byU32;
             LZ4_prepareTable(ctx, srcSize, tableType);
@@ -1422,23 +1785,21 @@ int LZ4_compress_fast_extState_fastReset(void *state, const char *src,
     }
 }
 
-int LZ4_compress_fast(const char *source, char *dest, int inputSize,
-                      int maxOutputSize, int acceleration)
+int LZ4_compress_fast(const char *src, char *dest, int srcSize, int dstCapacity,
+                      int acceleration)
 {
     int result;
-
 #if (LZ4_HEAPMODE)
-    LZ4_stream_t *ctxPtr =
-        ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
-
+    LZ4_stream_t *const ctxPtr = (LZ4_stream_t *)ALLOC(
+        sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
     if (ctxPtr == NULL)
         return 0;
 #else
     LZ4_stream_t ctx;
     LZ4_stream_t *const ctxPtr = &ctx;
 #endif
-    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize,
-                                        maxOutputSize, acceleration);
+    result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity,
+                                        acceleration);
 
 #if (LZ4_HEAPMODE)
     FREEMEM(ctxPtr);
@@ -1446,83 +1807,78 @@ int LZ4_compress_fast(const char *source, char *dest, int inputSize,
     return result;
 }
 
-int LZ4_compress_default(const char *source, char *dest, int inputSize,
-                         int maxOutputSize)
-{
-    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
-}
-
-/* hidden debug function */
-/* strangely enough, gcc generates faster code when this function is
- * uncommented, even if unused */
-int LZ4_compress_fast_force(const char *source, char *dest, int inputSize,
-                            int maxOutputSize, int acceleration)
+int LZ4_compress_default(const char *src, char *dst, int srcSize,
+                         int dstCapacity)
 {
-    LZ4_stream_t ctx;
-
-    LZ4_resetStream(&ctx);
-
-    if (inputSize < LZ4_64Klimit)
-        return LZ4_compress_generic(&ctx.internal_donotuse, source, dest,
-                                    inputSize, NULL, maxOutputSize,
-                                    limitedOutput, byU16, noDict, noDictIssue,
-                                    acceleration);
-    else
-        return LZ4_compress_generic(
-            &ctx.internal_donotuse, source, dest, inputSize, NULL,
-            maxOutputSize, limitedOutput, sizeof(void *) == 8 ? byU32 : byPtr,
-            noDict, noDictIssue, acceleration);
+    return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1);
 }
 
 /* Note!: This function leaves the stream in an unclean/broken state!
  * It is not safe to subsequently use the same state with a _fastReset() or
  * _continue() call without resetting it. */
-static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
-                                          char *dst, int *srcSizePtr,
-                                          int targetDstSize)
+static int LZ4_compress_destSize_extState_internal(LZ4_stream_t *state,
+                                                   const char *src, char *dst,
+                                                   int *srcSizePtr,
+                                                   int targetDstSize,
+                                                   int acceleration)
 {
-    LZ4_resetStream(state);
+    void *const s = LZ4_initStream(state, sizeof(*state));
+    assert(s != NULL);
+    (void)s;
 
     if (targetDstSize >=
         LZ4_compressBound(
             *srcSizePtr)) { /* compression success is guaranteed */
         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr,
-                                          targetDstSize, 1);
+                                          targetDstSize, acceleration);
     }
     else {
         if (*srcSizePtr < LZ4_64Klimit) {
-            return LZ4_compress_generic(
-                &state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr,
-                targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst,
+                                        *srcSizePtr, srcSizePtr, targetDstSize,
+                                        fillOutput, byU16, noDict, noDictIssue,
+                                        acceleration);
         }
         else {
-            tableType_t const tableType =
-                ((sizeof(void *) == 4) && ((uptrval)src > MAX_DISTANCE))
+            tableType_t const addrMode =
+                ((sizeof(void *) == 4) && ((uptrval)src > LZ4_DISTANCE_MAX))
                     ? byPtr
                     : byU32;
-            return LZ4_compress_generic(
-                &state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr,
-                targetDstSize, fillOutput, tableType, noDict, noDictIssue, 1);
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst,
+                                        *srcSizePtr, srcSizePtr, targetDstSize,
+                                        fillOutput, addrMode, noDict,
+                                        noDictIssue, acceleration);
         }
     }
 }
 
+int LZ4_compress_destSize_extState(void *state, const char *src, char *dst,
+                                   int *srcSizePtr, int targetDstSize,
+                                   int acceleration)
+{
+    int const r = LZ4_compress_destSize_extState_internal(
+        (LZ4_stream_t *)state, src, dst, srcSizePtr, targetDstSize,
+        acceleration);
+    /* clean the state on exit */
+    LZ4_initStream(state, sizeof(LZ4_stream_t));
+    return r;
+}
+
 int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
                           int targetDstSize)
 {
 #if (LZ4_HEAPMODE)
-    LZ4_stream_t *ctx = (LZ4_stream_t *)ALLOC(
+    LZ4_stream_t *const ctx = (LZ4_stream_t *)ALLOC(
         sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */
-
     if (ctx == NULL)
         return 0;
 #else
     LZ4_stream_t ctxBody;
-    LZ4_stream_t *ctx = &ctxBody;
+    LZ4_stream_t *const ctx = &ctxBody;
 #endif
 
-    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr,
-                                                targetDstSize);
+    int result = LZ4_compress_destSize_extState_internal(
+        ctx, src, dst, srcSizePtr, targetDstSize, 1);
 
 #if (LZ4_HEAPMODE)
     FREEMEM(ctx);
@@ -1534,25 +1890,53 @@ int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
  *  Streaming functions
  ********************************/
 
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 LZ4_stream_t *LZ4_createStream(void)
 {
-    LZ4_stream_t *lz4s = (LZ4_stream_t *)ALLOC(sizeof(LZ4_stream_t));
-
-    LZ4_STATIC_ASSERT(
-        LZ4_STREAMSIZE >=
-        sizeof(LZ4_stream_t_internal)); /* A compilation error here means
-                                           LZ4_STREAMSIZE is not large enough */
+    LZ4_stream_t *const lz4s = (LZ4_stream_t *)ALLOC(sizeof(LZ4_stream_t));
+    LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
     if (lz4s == NULL)
         return NULL;
-    LZ4_resetStream(lz4s);
+    LZ4_initStream(lz4s, sizeof(*lz4s));
     return lz4s;
 }
+#endif
 
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+    typedef struct {
+        char c;
+        LZ4_stream_t t;
+    } t_a;
+    return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+    return 1; /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t *LZ4_initStream(void *buffer, size_t size)
+{
+    DEBUGLOG(5, "LZ4_initStream");
+    if (buffer == NULL) {
+        return NULL;
+    }
+    if (size < sizeof(LZ4_stream_t)) {
+        return NULL;
+    }
+    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment()))
+        return NULL;
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+    return (LZ4_stream_t *)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
 void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
 {
     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
-    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
 }
 
 void LZ4_resetStream_fast(LZ4_stream_t *ctx)
@@ -1560,6 +1944,7 @@ void LZ4_resetStream_fast(LZ4_stream_t *ctx)
     LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
 }
 
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 int LZ4_freeStream(LZ4_stream_t *LZ4_stream)
 {
     if (!LZ4_stream)
@@ -1568,15 +1953,18 @@ int LZ4_freeStream(LZ4_stream_t *LZ4_stream)
     FREEMEM(LZ4_stream);
     return (0);
 }
+#endif
 
+typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e;
 #define HASH_UNIT sizeof(reg_t)
-int LZ4_loadDict(LZ4_stream_t *LZ4_dict, const char *dictionary, int dictSize)
+int LZ4_loadDict_internal(LZ4_stream_t *LZ4_dict, const char *dictionary,
+                          int dictSize, LoadDict_mode_e _ld)
 {
-    LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+    LZ4_stream_t_internal *const dict = &LZ4_dict->internal_donotuse;
     const tableType_t tableType = byU32;
     const BYTE *p = (const BYTE *)dictionary;
     const BYTE *const dictEnd = p + dictSize;
-    const BYTE *base;
+    U32 idx32;
 
     DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary,
              LZ4_dict);
@@ -1593,56 +1981,97 @@ int LZ4_loadDict(LZ4_stream_t *LZ4_dict, const char *dictionary, int dictSize)
      * there are only valid offsets in the window, which allows an optimization
      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
      * dictionary isn't a full 64k. */
-
-    if ((dictEnd - p) > 64 KB)
-        p = dictEnd - 64 KB;
-    base = dictEnd - 64 KB - dict->currentOffset;
-    dict->dictionary = p;
-    dict->dictSize = (U32)(dictEnd - p);
     dict->currentOffset += 64 KB;
-    dict->tableType = tableType;
 
     if (dictSize < (int)HASH_UNIT) {
         return 0;
     }
 
+    if ((dictEnd - p) > 64 KB)
+        p = dictEnd - 64 KB;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->tableType = (U32)tableType;
+    idx32 = dict->currentOffset - dict->dictSize;
+
     while (p <= dictEnd - HASH_UNIT) {
-        LZ4_putPosition(p, dict->hashTable, tableType, base);
+        U32 const h = LZ4_hashPosition(p, tableType);
+        /* Note: overwriting => favors positions end of dictionary */
+        LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
         p += 3;
+        idx32 += 3;
+    }
+
+    if (_ld == _ld_slow) {
+        /* Fill hash table with additional references, to improve compression
+         * capability */
+        p = dict->dictionary;
+        idx32 = dict->currentOffset - dict->dictSize;
+        while (p <= dictEnd - HASH_UNIT) {
+            U32 const h = LZ4_hashPosition(p, tableType);
+            U32 const limit = dict->currentOffset - 64 KB;
+            if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) {
+                /* Note: not overwriting => favors positions beginning of
+                 * dictionary */
+                LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
+            }
+            p++;
+            idx32++;
+        }
     }
 
-    return dict->dictSize;
+    return (int)dict->dictSize;
+}
+
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict, const char *dictionary, int dictSize)
+{
+    return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast);
 }
 
-void LZ4_attach_dictionary(LZ4_stream_t *working_stream,
-                           const LZ4_stream_t *dictionary_stream)
+int LZ4_loadDictSlow(LZ4_stream_t *LZ4_dict, const char *dictionary,
+                     int dictSize)
 {
-    if (dictionary_stream != NULL) {
+    return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow);
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t *workingStream,
+                           const LZ4_stream_t *dictionaryStream)
+{
+    const LZ4_stream_t_internal *dictCtx =
+        (dictionaryStream == NULL) ? NULL
+                                   : &(dictionaryStream->internal_donotuse);
+
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", workingStream,
+             dictionaryStream, dictCtx != NULL ? dictCtx->dictSize : 0);
+
+    if (dictCtx != NULL) {
         /* If the current offset is zero, we will never look in the
          * external dictionary context, since there is no value a table
          * entry can take that indicate a miss. In that case, we need
          * to bump the offset to something non-zero.
          */
-        if (working_stream->internal_donotuse.currentOffset == 0) {
-            working_stream->internal_donotuse.currentOffset = 64 KB;
+        if (workingStream->internal_donotuse.currentOffset == 0) {
+            workingStream->internal_donotuse.currentOffset = 64 KB;
+        }
+
+        /* Don't actually attach an empty dictionary.
+         */
+        if (dictCtx->dictSize == 0) {
+            dictCtx = NULL;
         }
-        working_stream->internal_donotuse.dictCtx =
-            &(dictionary_stream->internal_donotuse);
-    }
-    else {
-        working_stream->internal_donotuse.dictCtx = NULL;
     }
+    workingStream->internal_donotuse.dictCtx = dictCtx;
 }
 
 static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict, int nextSize)
 {
-    if (LZ4_dict->currentOffset + nextSize >
+    assert(nextSize >= 0);
+    if (LZ4_dict->currentOffset + (unsigned)nextSize >
         0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */
         /* rescale hash table */
         U32 const delta = LZ4_dict->currentOffset - 64 KB;
         const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
         int i;
-
         DEBUGLOG(4, "LZ4_renormDictT");
         for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
             if (LZ4_dict->hashTable[i] < delta)
@@ -1662,45 +2091,55 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
                                int acceleration)
 {
     const tableType_t tableType = byU32;
-    LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
-    const BYTE *dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+    LZ4_stream_t_internal *const streamPtr = &LZ4_stream->internal_donotuse;
+    const char *dictEnd =
+        streamPtr->dictSize
+            ? (const char *)streamPtr->dictionary + streamPtr->dictSize
+            : NULL;
 
-    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)",
+             inputSize, streamPtr->dictSize);
 
-    if (streamPtr->initCheck)
-        return 0; /* Uninitialized structure detected */
-    LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */
+    LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */
     if (acceleration < 1)
-        acceleration = ACCELERATION_DEFAULT;
+        acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX)
+        acceleration = LZ4_ACCELERATION_MAX;
 
     /* invalidate tiny dictionaries */
-    if ((streamPtr->dictSize - 1 < 4) /* intentional underflow */
-        && (dictEnd != (const BYTE *)source)) {
+    if ((streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */
+        && (dictEnd != source)    /* prefix mode */
+        && (inputSize > 0) /* tolerance : don't lose history, in case next
+                              invocation would use prefix mode */
+        && (streamPtr->dictCtx == NULL) /* usingDictCtx */
+    ) {
         DEBUGLOG(
             5,
             "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small",
             streamPtr->dictSize, streamPtr->dictionary);
+        /* remove dictionary existence from history, to employ faster prefix
+         * mode */
         streamPtr->dictSize = 0;
         streamPtr->dictionary = (const BYTE *)source;
-        dictEnd = (const BYTE *)source;
+        dictEnd = source;
     }
 
     /* Check overlapping input/dictionary space */
     {
-        const BYTE *sourceEnd = (const BYTE *)source + inputSize;
-
-        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+        const char *const sourceEnd = source + inputSize;
+        if ((sourceEnd > (const char *)streamPtr->dictionary) &&
+            (sourceEnd < dictEnd)) {
             streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
             if (streamPtr->dictSize > 64 KB)
                 streamPtr->dictSize = 64 KB;
             if (streamPtr->dictSize < 4)
                 streamPtr->dictSize = 0;
-            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+            streamPtr->dictionary = (const BYTE *)dictEnd - streamPtr->dictSize;
         }
     }
 
     /* prefix mode : source data follows dictionary */
-    if (dictEnd == (const BYTE *)source) {
+    if (dictEnd == source) {
         if ((streamPtr->dictSize < 64 KB) &&
             (streamPtr->dictSize < streamPtr->currentOffset))
             return LZ4_compress_generic(streamPtr, source, dest, inputSize,
@@ -1717,7 +2156,6 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
     /* external dictionary mode */
     {
         int result;
-
         if (streamPtr->dictCtx) {
             /* We depend here on the fact that dictCtx'es (produced by
              * LZ4_loadDict) guarantee that their tables contain no references
@@ -1730,7 +2168,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
                  * cost to copy the dictionary's tables into the active context,
                  * so that the compression loop is only looking into one table.
                  */
-                memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
+                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
                 result = LZ4_compress_generic(
                     streamPtr, source, dest, inputSize, NULL, maxOutputSize,
                     limitedOutput, tableType, usingExtDict, noDictIssue,
@@ -1743,7 +2181,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
                     acceleration);
             }
         }
-        else {
+        else { /* small data <= 4 KB */
             if ((streamPtr->dictSize < 64 KB) &&
                 (streamPtr->dictSize < streamPtr->currentOffset)) {
                 result = LZ4_compress_generic(
@@ -1768,7 +2206,7 @@ int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
 int LZ4_compress_forceExtDict(LZ4_stream_t *LZ4_dict, const char *source,
                               char *dest, int srcSize)
 {
-    LZ4_stream_t_internal *streamPtr = &LZ4_dict->internal_donotuse;
+    LZ4_stream_t_internal *const streamPtr = &LZ4_dict->internal_donotuse;
     int result;
 
     LZ4_renormDictT(streamPtr, srcSize);
@@ -1793,22 +2231,33 @@ int LZ4_compress_forceExtDict(LZ4_stream_t *LZ4_dict, const char *source,
 
 /*! LZ4_saveDict() :
  *  If previously compressed data block is not guaranteed to remain available at
- * its memory location, save it into a safer place (char* safeBuffer). Note :
- * you don't need to call LZ4_loadDict() afterwards, dictionary is immediately
- * usable, you can therefore call LZ4_compress_fast_continue(). Return : saved
- * dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ * its memory location, save it into a safer place (char* safeBuffer). Note : no
+ * need to call LZ4_loadDict() afterwards, dictionary is immediately usable, one
+ * can therefore call LZ4_compress_fast_continue() right after.
+ * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if
+ * error.
  */
 int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
 {
     LZ4_stream_t_internal *const dict = &LZ4_dict->internal_donotuse;
-    const BYTE *const previousDictEnd = dict->dictionary + dict->dictSize;
 
-    if ((U32)dictSize > 64 KB)
-        dictSize = 64 KB; /* useless to define a dictionary > 64 KB */
-    if ((U32)dictSize > dict->dictSize)
-        dictSize = dict->dictSize;
+    DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize,
+             safeBuffer);
 
-    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+    if ((U32)dictSize > 64 KB) {
+        dictSize = 64 KB;
+    } /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) {
+        dictSize = (int)dict->dictSize;
+    }
+
+    if (safeBuffer == NULL)
+        assert(dictSize == 0);
+    if (dictSize > 0) {
+        const BYTE *const previousDictEnd = dict->dictionary + dict->dictSize;
+        assert(dict->dictionary);
+        LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
+    }
 
     dict->dictionary = (const BYTE *)safeBuffer;
     dict->dictSize = (U32)dictSize;
@@ -1816,316 +2265,797 @@ int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
     return dictSize;
 }
 
-/*-*****************************
+/*-*******************************
  *  Decompression functions
- *******************************/
+ ********************************/
+
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+/* variant for decompress_unsafe()
+ * does not know end of input
+ * presumes input is well formed
+ * note : will consume at least one byte */
+static size_t read_long_length_no_check(const BYTE **pp)
+{
+    size_t b, l = 0;
+    do {
+        b = **pp;
+        (*pp)++;
+        l += b;
+    } while (b == 255);
+    DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes",
+             l, l / 255 + 1)
+    return l;
+}
+
+/* core decoder variant for LZ4_decompress_fast*()
+ * for legacy support only : these entry points are deprecated.
+ * - Presumes input is correctly formed (no defense vs malformed inputs)
+ * - Does not know input size (presume input buffer is "large enough")
+ * - Decompress a full block (only)
+ * @return : nb of bytes read from input.
+ * Note : this variant is not optimized for speed, just for maintenance.
+ *        the goal is to remove support of decompress_fast*() variants by v2.0
+ **/
+LZ4_FORCE_INLINE int LZ4_decompress_unsafe_generic(
+    const BYTE *const istart, BYTE *const ostart, int decompressedSize,
+
+    size_t prefixSize,
+    const BYTE *const dictStart, /* only if dict==usingExtDict */
+    const size_t dictSize        /* note: =0 if dictStart==NULL */
+)
+{
+    const BYTE *ip = istart;
+    BYTE *op = (BYTE *)ostart;
+    BYTE *const oend = ostart + decompressedSize;
+    const BYTE *const prefixStart = ostart - prefixSize;
+
+    DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
+    if (dictStart == NULL)
+        assert(dictSize == 0);
+
+    while (1) {
+        /* start new sequence */
+        unsigned token = *ip++;
+
+        /* literals */
+        {
+            size_t ll = token >> ML_BITS;
+            if (ll == 15) {
+                /* long literal length */
+                ll += read_long_length_no_check(&ip);
+            }
+            if ((size_t)(oend - op) < ll)
+                return -1;           /* output buffer overflow */
+            LZ4_memmove(op, ip, ll); /* support in-place decompression */
+            op += ll;
+            ip += ll;
+            if ((size_t)(oend - op) < MFLIMIT) {
+                if (op == oend)
+                    break; /* end of block */
+                DEBUGLOG(
+                    5,
+                    "invalid: literals end at distance %zi from end of block",
+                    oend - op);
+                /* incorrect end of block :
+                 * last match must start at least MFLIMIT==12 bytes before end
+                 * of output block */
+                return -1;
+            }
+        }
+
+        /* match */
+        {
+            size_t ml = token & 15;
+            size_t const offset = LZ4_readLE16(ip);
+            ip += 2;
+
+            if (ml == 15) {
+                /* long literal length */
+                ml += read_long_length_no_check(&ip);
+            }
+            ml += MINMATCH;
+
+            if ((size_t)(oend - op) < ml)
+                return -1; /* output buffer overflow */
+
+            {
+                const BYTE *match = op - offset;
+
+                /* out of range */
+                if (offset > (size_t)(op - prefixStart) + dictSize) {
+                    DEBUGLOG(6, "offset out of range");
+                    return -1;
+                }
+
+                /* check special case : extDict */
+                if (offset > (size_t)(op - prefixStart)) {
+                    /* extDict scenario */
+                    const BYTE *const dictEnd = dictStart + dictSize;
+                    const BYTE *extMatch =
+                        dictEnd - (offset - (size_t)(op - prefixStart));
+                    size_t const extml = (size_t)(dictEnd - extMatch);
+                    if (extml > ml) {
+                        /* match entirely within extDict */
+                        LZ4_memmove(op, extMatch, ml);
+                        op += ml;
+                        ml = 0;
+                    }
+                    else {
+                        /* match split between extDict & prefix */
+                        LZ4_memmove(op, extMatch, extml);
+                        op += extml;
+                        ml -= extml;
+                    }
+                    match = prefixStart;
+                }
+
+                /* match copy - slow variant, supporting overlap copy */
+                {
+                    size_t u;
+                    for (u = 0; u < ml; u++) {
+                        op[u] = match[u];
+                    }
+                }
+            }
+            op += ml;
+            if ((size_t)(oend - op) < LASTLITERALS) {
+                DEBUGLOG(
+                    5, "invalid: match ends at distance %zi from end of block",
+                    oend - op);
+                /* incorrect end of block :
+                 * last match must stop at least LASTLITERALS==5 bytes before
+                 * end of output block */
+                return -1;
+            }
+        } /* match */
+    } /* main loop */
+    return (int)(ip - istart);
+}
+
+/* Read the variable-length literal or match length.
+ *
+ * @ip : input pointer
+ * @ilimit : position after which if length is not decoded, the input is
+ *necessarily corrupted.
+ * @initial_check - check ip >= ipmax before start of loop.  Returns
+ *initial_error if so.
+ * @error (output) - error code.  Must be set to 0 before call.
+ **/
+typedef size_t Rvl_t;
+static const Rvl_t rvl_error = (Rvl_t)(-1);
+LZ4_FORCE_INLINE Rvl_t read_variable_length(const BYTE **ip, const BYTE *ilimit,
+                                            int initial_check)
+{
+    Rvl_t s, length = 0;
+    assert(ip != NULL);
+    assert(*ip != NULL);
+    assert(ilimit != NULL);
+    if (initial_check && unlikely((*ip) >= ilimit)) { /* read limit reached */
+        return rvl_error;
+    }
+    s = **ip;
+    (*ip)++;
+    length += s;
+    if (unlikely((*ip) > ilimit)) { /* read limit reached */
+        return rvl_error;
+    }
+    /* accumulator overflow detection (32-bit mode only) */
+    if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1) / 2))) {
+        return rvl_error;
+    }
+    if (likely(s != 255))
+        return length;
+    do {
+        s = **ip;
+        (*ip)++;
+        length += s;
+        if (unlikely((*ip) > ilimit)) { /* read limit reached */
+            return rvl_error;
+        }
+        /* accumulator overflow detection (32-bit mode only) */
+        if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1) / 2))) {
+            return rvl_error;
+        }
+    } while (s == 255);
+
+    return length;
+}
+
 /*! LZ4_decompress_generic() :
  *  This generic decompression function covers all use cases.
  *  It shall be instantiated several times, using different sets of directives.
  *  Note that it is important for performance that this function really get
  * inlined, in order to remove useless branches during compilation optimization.
  */
-LZ4_FORCE_O2_GCC_PPC64LE LZ4_FORCE_INLINE int LZ4_decompress_generic(
+LZ4_FORCE_INLINE int LZ4_decompress_generic(
     const char *const src, char *const dst, int srcSize,
-    int outputSize,              /* If endOnInput==endOnInputSize, this value is
-                                    `dstCapacity` */
-    int endOnInput,              /* endOnOutputSize, endOnInputSize */
-    int partialDecoding,         /* full, partial */
-    int targetOutputSize,        /* only used if partialDecoding==partial */
-    int dict,                    /* noDict, withPrefix64k, usingExtDict */
+    int outputSize, /* If endOnInput==endOnInputSize, this value is
+                       `dstCapacity` */
+
+    earlyEnd_directive partialDecoding, /* full, partial */
+    dict_directive dict,         /* noDict, withPrefix64k, usingExtDict */
     const BYTE *const lowPrefix, /* always <= dst, == dst when no prefix */
     const BYTE *const dictStart, /* only if dict==usingExtDict */
     const size_t dictSize        /* note : = 0 if noDict */
 )
 {
-    const BYTE *ip = (const BYTE *)src;
-    const BYTE *const iend = ip + srcSize;
-
-    BYTE *op = (BYTE *)dst;
-    BYTE *const oend = op + outputSize;
-    BYTE *cpy;
-    BYTE *oexit = op + targetOutputSize;
-
-    const BYTE *const dictEnd = (const BYTE *)dictStart + dictSize;
-    const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
-    const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
-
-    const int safeDecode = (endOnInput == endOnInputSize);
-    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
-
-    /* Set up the "end" pointers for the shortcut. */
-    const BYTE *const shortiend =
-        iend - (endOnInput ? 14 : 8) /*maxLL */ - 2 /*offset */;
-    const BYTE *const shortoend =
-        oend - (endOnInput ? 14 : 8) /*maxLL */ - 18 /*maxML */;
-
-    DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i)", srcSize);
-
-    /* Special cases */
-    if ((partialDecoding) && (oexit > oend - MFLIMIT))
-        oexit =
-            oend -
-            MFLIMIT; /* targetOutputSize too high => just decode everything */
-    if ((endOnInput) && (unlikely(outputSize == 0)))
-        return ((srcSize == 1) && (*ip == 0)) ? 0
-                                              : -1; /* Empty output buffer */
-    if ((!endOnInput) && (unlikely(outputSize == 0)))
-        return (*ip == 0 ? 1 : -1);
-    if ((endOnInput) && unlikely(srcSize == 0))
+    if ((src == NULL) || (outputSize < 0)) {
         return -1;
+    }
+
+    {
+        const BYTE *ip = (const BYTE *)src;
+        const BYTE *const iend = ip + srcSize;
+
+        BYTE *op = (BYTE *)dst;
+        BYTE *const oend = op + outputSize;
+        BYTE *cpy;
+
+        const BYTE *const dictEnd =
+            (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+        const int checkOffset = (dictSize < (int)(64 KB));
+
+        /* Set up the "end" pointers for the shortcut. */
+        const BYTE *const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
+        const BYTE *const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
 
-    /* Main Loop : decode sequences */
-    while (1) {
         const BYTE *match;
         size_t offset;
+        unsigned token;
+        size_t length;
 
-        unsigned const token = *ip++;
-        size_t length = token >> ML_BITS; /* literal length */
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize,
+                 outputSize);
 
-        assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+        /* Special cases */
+        assert(lowPrefix <= op);
+        if (unlikely(outputSize == 0)) {
+            /* Empty output buffer */
+            if (partialDecoding)
+                return 0;
+            return ((srcSize == 1) && (*ip == 0)) ? 0 : -1;
+        }
+        if (unlikely(srcSize == 0)) {
+            return -1;
+        }
 
-        /* A two-stage shortcut for the most common case:
-         * 1) If the literal length is 0..14, and there is enough space,
-         * enter the shortcut and copy 16 bytes on behalf of the literals
-         * (in the fast mode, only 8 bytes can be safely copied this way).
-         * 2) Further if the match length is 4..18, copy 18 bytes in a similar
-         * manner; but we ensure that there's enough space in the output for
-         * those 18 bytes earlier, upon entering the shortcut (in other words,
-         * there is a combined check for both stages).
-         */
-        if ((endOnInput ? length != RUN_MASK : length <= 8)
-            /* strictly "less than" on input, to re-enter the loop with at least
-               one byte */
-            && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend))) {
-            /* Copy the literals */
-            memcpy(op, ip, endOnInput ? 16 : 8);
-            op += length;
-            ip += length;
-
-            /* The second stage: prepare for match copying, decode full info.
-             * If it doesn't work out, the info won't be wasted. */
-            length = token & ML_MASK; /* match length */
+        /* LZ4_FAST_DEC_LOOP:
+         * designed for modern OoO performance cpus,
+         * where copying reliably 32-bytes is preferable to an unpredictable
+         * branch. note : fast loop may show a regression for some client arm
+         * chips. */
+#if LZ4_FAST_DEC_LOOP
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+            DEBUGLOG(6, "move to safe decode loop");
+            goto safe_decode;
+        }
+
+        /* Fast loop : decode sequences as long as output <
+         * oend-FASTLOOP_SAFE_DISTANCE */
+        DEBUGLOG(6, "using fast decode loop");
+        while (1) {
+            /* Main fastloop assertion: We can always wildcopy
+             * FASTLOOP_SAFE_DISTANCE */
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+            assert(ip < iend);
+            token = *ip++;
+            length = token >> ML_BITS; /* literal length */
+            DEBUGLOG(7, "blockPos%6u: litLength token = %u",
+                     (unsigned)(op - (BYTE *)dst), (unsigned)length);
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                size_t const addl =
+                    read_variable_length(&ip, iend - RUN_MASK, 1);
+                if (addl == rvl_error) {
+                    DEBUGLOG(6, "error reading long literal length");
+                    goto _output_error;
+                }
+                length += addl;
+                if (unlikely((uptrval)(op) + length < (uptrval)(op))) {
+                    goto _output_error;
+                } /* overflow detection */
+                if (unlikely((uptrval)(ip) + length < (uptrval)(ip))) {
+                    goto _output_error;
+                } /* overflow detection */
+
+                /* copy literals */
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+                if ((op + length > oend - 32) || (ip + length > iend - 32)) {
+                    goto safe_literal_copy;
+                }
+                LZ4_wildCopy32(op, ip, op + length);
+                ip += length;
+                op += length;
+            }
+            else if (ip <= iend - (16 + 1 /*max lit + offset + nextToken*/)) {
+                /* We don't need to check oend, since we check it once for each
+                 * loop below */
+                DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe",
+                         (unsigned)length);
+                /* Literals can only be <= 14, but hope compilers optimize
+                 * better when copy by a register size */
+                LZ4_memcpy(op, ip, 16);
+                ip += length;
+                op += length;
+            }
+            else {
+                goto safe_literal_copy;
+            }
+
+            /* get offset */
             offset = LZ4_readLE16(ip);
             ip += 2;
+            DEBUGLOG(6, "blockPos%6u: offset = %u",
+                     (unsigned)(op - (BYTE *)dst), (unsigned)offset);
             match = op - offset;
+            assert(match <= op); /* overflow check */
+
+            /* get matchlength */
+            length = token & ML_MASK;
+            DEBUGLOG(7, "  match length token = %u (len==%u)", (unsigned)length,
+                     (unsigned)length + MINMATCH);
+
+            if (length == ML_MASK) {
+                size_t const addl =
+                    read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
+                if (addl == rvl_error) {
+                    DEBUGLOG(5, "error reading long match length");
+                    goto _output_error;
+                }
+                length += addl;
+                length += MINMATCH;
+                DEBUGLOG(7, "  long match length == %u", (unsigned)length);
+                if (unlikely((uptrval)(op) + length < (uptrval)op)) {
+                    goto _output_error;
+                } /* overflow detection */
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+            }
+            else {
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    DEBUGLOG(7, "moving to safe_match_copy (ml==%u)",
+                             (unsigned)length);
+                    goto safe_match_copy;
+                }
 
-            /* Do not deal with overlapping matches. */
-            if ((length != ML_MASK) && (offset >= 8) &&
-                (dict == withPrefix64k || match >= lowPrefix)) {
-                /* Copy the match. */
-                memcpy(op + 0, match + 0, 8);
-                memcpy(op + 8, match + 8, 8);
-                memcpy(op + 16, match + 16, 2);
-                op += length + MINMATCH;
-                /* Both stages worked, load the next token. */
-                continue;
+                /* Fastpath check: skip LZ4_wildCopy32 when true */
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+                    if (offset >= 8) {
+                        assert(match >= lowPrefix);
+                        assert(match <= op);
+                        assert(op + 18 <= oend);
+
+                        LZ4_memcpy(op, match, 8);
+                        LZ4_memcpy(op + 8, match + 8, 8);
+                        LZ4_memcpy(op + 16, match + 16, 2);
+                        op += length;
+                        continue;
+                    }
+                }
             }
 
-            /* The second stage didn't work out, but the info is ready.
-             * Propel it right to the point of match copying. */
-            goto _copy_match;
-        }
+            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) {
+                DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers",
+                         op - lowPrefix, op - match);
+                goto _output_error;
+            }
+            /* match starting within external dictionary */
+            if ((dict == usingExtDict) && (match < lowPrefix)) {
+                assert(dictEnd != NULL);
+                if (unlikely(op + length > oend - LASTLITERALS)) {
+                    if (partialDecoding) {
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close "
+                                    "to dstEnd");
+                        length = MIN(length, (size_t)(oend - op));
+                    }
+                    else {
+                        DEBUGLOG(6, "end-of-block condition violated")
+                        goto _output_error;
+                    }
+                }
 
-        /* decode literal length */
-        if (length == RUN_MASK) {
-            unsigned s;
+                if (length <= (size_t)(lowPrefix - match)) {
+                    /* match fits entirely within external dictionary : just
+                     * copy */
+                    LZ4_memmove(op, dictEnd - (lowPrefix - match), length);
+                    op += length;
+                }
+                else {
+                    /* match stretches into both external dictionary and current
+                     * block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize >
+                        (size_t)(op - lowPrefix)) { /* overlap copy */
+                        BYTE *const endOfMatch = op + restSize;
+                        const BYTE *copyFrom = lowPrefix;
+                        while (op < endOfMatch) {
+                            *op++ = *copyFrom++;
+                        }
+                    }
+                    else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                    }
+                }
+                continue;
+            }
 
-            if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0))
-                goto _output_error; /* overflow detection */
-            do {
-                s = *ip++;
-                length += s;
-            } while (likely(endOnInput ? ip < iend - RUN_MASK : 1) &
-                     (s == 255));
-            if ((safeDecode) &&
-                unlikely((uptrval)(op) + length < (uptrval)(op)))
-                goto _output_error; /* overflow detection */
-            if ((safeDecode) &&
-                unlikely((uptrval)(ip) + length < (uptrval)(ip)))
-                goto _output_error; /* overflow detection */
-        }
+            /* copy match within block */
+            cpy = op + length;
 
-        /* copy literals */
-        cpy = op + length;
-        if (((endOnInput) &&
-             ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) ||
-              (ip + length > iend - (2 + 1 + LASTLITERALS)))) ||
-            ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
-            if (partialDecoding) {
-                if (cpy > oend)
-                    goto _output_error; /* Error : write attempt beyond end of
-                                           output buffer */
-                if ((endOnInput) && (ip + length > iend))
-                    goto _output_error; /* Error : read attempt beyond end of
-                                           input buffer */
+            assert((op <= oend) && (oend - op >= 32));
+            if (unlikely(offset < 16)) {
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
             }
             else {
-                if ((!endOnInput) && (cpy != oend))
-                    goto _output_error; /* Error : block decoding must stop
-                                           exactly there */
-                if ((endOnInput) && ((ip + length != iend) || (cpy > oend)))
-                    goto _output_error; /* Error : input must be consumed */
+                LZ4_wildCopy32(op, match, cpy);
             }
-            memcpy(op, ip, length);
-            ip += length;
-            op += length;
-            break; /* Necessarily EOF, due to parsing restrictions */
+
+            op = cpy; /* wildcopy correction */
         }
-        LZ4_wildCopy(op, ip, cpy);
-        ip += length;
-        op = cpy;
+    safe_decode:
+#endif
 
-        /* get offset */
-        offset = LZ4_readLE16(ip);
-        ip += 2;
-        match = op - offset;
+        /* Main Loop : decode remaining sequences where output <
+         * FASTLOOP_SAFE_DISTANCE */
+        DEBUGLOG(6, "using safe decode loop");
+        while (1) {
+            assert(ip < iend);
+            token = *ip++;
+            length = token >> ML_BITS; /* literal length */
+            DEBUGLOG(7, "blockPos%6u: litLength token = %u",
+                     (unsigned)(op - (BYTE *)dst), (unsigned)length);
+
+            /* A two-stage shortcut for the most common case:
+             * 1) If the literal length is 0..14, and there is enough space,
+             * enter the shortcut and copy 16 bytes on behalf of the literals
+             * (in the fast mode, only 8 bytes can be safely copied this way).
+             * 2) Further if the match length is 4..18, copy 18 bytes in a
+             * similar manner; but we ensure that there's enough space in the
+             * output for those 18 bytes earlier, upon entering the shortcut (in
+             * other words, there is a combined check for both stages).
+             */
+            if ((length != RUN_MASK)
+                /* strictly "less than" on input, to re-enter the loop with at
+                   least one byte */
+                && likely((ip < shortiend) & (op <= shortoend))) {
+                /* Copy the literals */
+                LZ4_memcpy(op, ip, 16);
+                op += length;
+                ip += length;
+
+                /* The second stage: prepare for match copying, decode full
+                 * info. If it doesn't work out, the info won't be wasted. */
+                length = token & ML_MASK; /* match length */
+                DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)",
+                         (unsigned)(op - (BYTE *)dst), (unsigned)length,
+                         (unsigned)length + 4);
+                offset = LZ4_readLE16(ip);
+                ip += 2;
+                match = op - offset;
+                assert(match <= op); /* check overflow */
+
+                /* Do not deal with overlapping matches. */
+                if ((length != ML_MASK) && (offset >= 8) &&
+                    (dict == withPrefix64k || match >= lowPrefix)) {
+                    /* Copy the match. */
+                    LZ4_memcpy(op + 0, match + 0, 8);
+                    LZ4_memcpy(op + 8, match + 8, 8);
+                    LZ4_memcpy(op + 16, match + 16, 2);
+                    op += length + MINMATCH;
+                    /* Both stages worked, load the next token. */
+                    continue;
+                }
 
-        /* get matchlength */
-        length = token & ML_MASK;
+                /* The second stage didn't work out, but the info is ready.
+                 * Propel it right to the point of match copying. */
+                goto _copy_match;
+            }
 
-    _copy_match:
-        if ((checkOffset) && (unlikely(match + dictSize < lowPrefix)))
-            goto _output_error; /* Error : offset outside buffers */
-        LZ4_write32(
-            op,
-            (U32)
-                offset); /* costs ~1%; silence an msan warning when offset==0 */
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                size_t const addl =
+                    read_variable_length(&ip, iend - RUN_MASK, 1);
+                if (addl == rvl_error) {
+                    goto _output_error;
+                }
+                length += addl;
+                if (unlikely((uptrval)(op) + length < (uptrval)(op))) {
+                    goto _output_error;
+                } /* overflow detection */
+                if (unlikely((uptrval)(ip) + length < (uptrval)(ip))) {
+                    goto _output_error;
+                } /* overflow detection */
+            }
 
-        if (length == ML_MASK) {
-            unsigned s;
+#if LZ4_FAST_DEC_LOOP
+        safe_literal_copy:
+#endif
+            /* copy literals */
+            cpy = op + length;
+
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+            if ((cpy > oend - MFLIMIT) ||
+                (ip + length > iend - (2 + 1 + LASTLITERALS))) {
+                /* We've either hit the input parsing restriction or the output
+                 * parsing restriction. In the normal scenario, decoding a full
+                 * block, it must be the last sequence, otherwise it's an error
+                 * (invalid input or dimensions). In partialDecoding scenario,
+                 * it's necessary to ensure there is no buffer overflow.
+                 */
+                if (partialDecoding) {
+                    /* Since we are partial decoding we may be in this block
+                     * because of the output parsing restriction, which is not
+                     * valid since the output buffer is allowed to be
+                     * undersized.
+                     */
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to "
+                                "input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u",
+                             (unsigned)length);
+                    DEBUGLOG(
+                        7, "partialDecoding: remaining space in dstBuffer : %i",
+                        (int)(oend - op));
+                    DEBUGLOG(
+                        7, "partialDecoding: remaining space in srcBuffer : %i",
+                        (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
+                     */
+                    if (ip + length > iend) {
+                        length = (size_t)(iend - ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
+                     */
+                    if (cpy > oend) {
+                        cpy = oend;
+                        assert(op <= oend);
+                        length = (size_t)(oend - op);
+                    }
+                }
+                else {
+                    /* We must be on the last sequence (or invalid) because of
+                     * the parsing limitations so check that we exactly consume
+                     * the input and don't overrun the output buffer.
+                     */
+                    if ((ip + length != iend) || (cpy > oend)) {
+                        DEBUGLOG(5, "should have been last run of literals")
+                        DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", ip,
+                                 (int)length, ip + length, iend);
+                        DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", cpy,
+                                 oend - MFLIMIT);
+                        DEBUGLOG(5,
+                                 "after writing %u bytes / %i bytes available",
+                                 (unsigned)(op - (BYTE *)dst), outputSize);
+                        goto _output_error;
+                    }
+                }
+                LZ4_memmove(op, ip,
+                            length); /* supports overlapping memory regions, for
+                                        in-place decompression scenarios */
+                ip += length;
+                op += length;
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
+                 */
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) {
+                    break;
+                }
+            }
+            else {
+                LZ4_wildCopy8(op, ip,
+                              cpy); /* can overwrite up to 8 bytes beyond cpy */
+                ip += length;
+                op = cpy;
+            }
 
-            do {
-                s = *ip++;
-                if ((endOnInput) && (ip > iend - LASTLITERALS))
+            /* get offset */
+            offset = LZ4_readLE16(ip);
+            ip += 2;
+            match = op - offset;
+
+            /* get matchlength */
+            length = token & ML_MASK;
+            DEBUGLOG(7, "blockPos%6u: matchLength token = %u",
+                     (unsigned)(op - (BYTE *)dst), (unsigned)length);
+
+        _copy_match:
+            if (length == ML_MASK) {
+                size_t const addl =
+                    read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
+                if (addl == rvl_error) {
                     goto _output_error;
-                length += s;
-            } while (s == 255);
-            if ((safeDecode) && unlikely((uptrval)(op) + length < (uptrval)op))
-                goto _output_error; /* overflow detection */
-        }
-        length += MINMATCH;
+                }
+                length += addl;
+                if (unlikely((uptrval)(op) + length < (uptrval)op))
+                    goto _output_error; /* overflow detection */
+            }
+            length += MINMATCH;
 
-        /* check external dictionary */
-        if ((dict == usingExtDict) && (match < lowPrefix)) {
-            if (unlikely(op + length > oend - LASTLITERALS))
-                goto _output_error; /* doesn't respect parsing restriction */
+#if LZ4_FAST_DEC_LOOP
+        safe_match_copy:
+#endif
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix)))
+                goto _output_error; /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict == usingExtDict) && (match < lowPrefix)) {
+                assert(dictEnd != NULL);
+                if (unlikely(op + length > oend - LASTLITERALS)) {
+                    if (partialDecoding)
+                        length = MIN(length, (size_t)(oend - op));
+                    else
+                        goto _output_error; /* doesn't respect parsing
+                                               restriction */
+                }
 
-            if (length <= (size_t)(lowPrefix - match)) {
-                /* match can be copied as a single segment from external
-                 * dictionary */
-                memmove(op, dictEnd - (lowPrefix - match), length);
-                op += length;
+                if (length <= (size_t)(lowPrefix - match)) {
+                    /* match fits entirely within external dictionary : just
+                     * copy */
+                    LZ4_memmove(op, dictEnd - (lowPrefix - match), length);
+                    op += length;
+                }
+                else {
+                    /* match stretches into both external dictionary and current
+                     * block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize >
+                        (size_t)(op - lowPrefix)) { /* overlap copy */
+                        BYTE *const endOfMatch = op + restSize;
+                        const BYTE *copyFrom = lowPrefix;
+                        while (op < endOfMatch)
+                            *op++ = *copyFrom++;
+                    }
+                    else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                    }
+                }
+                continue;
             }
-            else {
-                /* match encompass external dictionary and current block */
-                size_t const copySize = (size_t)(lowPrefix - match);
-                size_t const restSize = length - copySize;
-
-                memcpy(op, dictEnd - copySize, copySize);
-                op += copySize;
-                if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
-                    BYTE *const endOfMatch = op + restSize;
-                    const BYTE *copyFrom = lowPrefix;
-
-                    while (op < endOfMatch)
-                        *op++ = *copyFrom++;
+            assert(match >= lowPrefix);
+
+            /* copy match within block */
+            cpy = op + length;
+
+            /* partialDecoding : may end anywhere within the block */
+            assert(op <= oend);
+            if (partialDecoding && (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
+                size_t const mlen = MIN(length, (size_t)(oend - op));
+                const BYTE *const matchEnd = match + mlen;
+                BYTE *const copyEnd = op + mlen;
+                if (matchEnd > op) { /* overlap copy */
+                    while (op < copyEnd) {
+                        *op++ = *match++;
+                    }
                 }
                 else {
-                    memcpy(op, lowPrefix, restSize);
-                    op += restSize;
+                    LZ4_memcpy(op, match, mlen);
                 }
+                op = copyEnd;
+                if (op == oend) {
+                    break;
+                }
+                continue;
             }
-            continue;
-        }
 
-        /* copy match within block */
-        cpy = op + length;
-        if (unlikely(offset < 8)) {
-            op[0] = match[0];
-            op[1] = match[1];
-            op[2] = match[2];
-            op[3] = match[3];
-            match += inc32table[offset];
-            memcpy(op + 4, match, 4);
-            match -= dec64table[offset];
-        }
-        else {
-            memcpy(op, match, 8);
-            match += 8;
-        }
-        op += 8;
-
-        if (unlikely(cpy > oend - 12)) {
-            BYTE *const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
-
-            if (cpy > oend - LASTLITERALS)
-                goto _output_error; /* Error : last LASTLITERALS bytes must be
-                                       literals (uncompressed) */
-            if (op < oCopyLimit) {
-                LZ4_wildCopy(op, match, oCopyLimit);
-                match += oCopyLimit - op;
-                op = oCopyLimit;
+            if (unlikely(offset < 8)) {
+                LZ4_write32(op, 0); /* silence msan warning when offset==0 */
+                op[0] = match[0];
+                op[1] = match[1];
+                op[2] = match[2];
+                op[3] = match[3];
+                match += inc32table[offset];
+                LZ4_memcpy(op + 4, match, 4);
+                match -= dec64table[offset];
             }
-            while (op < cpy)
-                *op++ = *match++;
-        }
-        else {
-            memcpy(op, match, 8);
-            if (length > 16)
-                LZ4_wildCopy(op + 8, match + 8, cpy);
+            else {
+                LZ4_memcpy(op, match, 8);
+                match += 8;
+            }
+            op += 8;
+
+            if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
+                BYTE *const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+                if (cpy > oend - LASTLITERALS) {
+                    goto _output_error;
+                } /* Error : last LASTLITERALS bytes must be literals
+                     (uncompressed) */
+                if (op < oCopyLimit) {
+                    LZ4_wildCopy8(op, match, oCopyLimit);
+                    match += oCopyLimit - op;
+                    op = oCopyLimit;
+                }
+                while (op < cpy) {
+                    *op++ = *match++;
+                }
+            }
+            else {
+                LZ4_memcpy(op, match, 8);
+                if (length > 16) {
+                    LZ4_wildCopy8(op + 8, match + 8, cpy);
+                }
+            }
+            op = cpy; /* wildcopy correction */
         }
-        op = cpy; /* correction */
-    }
 
-    /* end of decoding */
-    if (endOnInput)
+        /* end of decoding */
+        DEBUGLOG(5, "decoded %i bytes", (int)(((char *)op) - dst));
         return (int)(((char *)op) - dst); /* Nb of output bytes decoded */
-    else
-        return (int)(((const char *)ip) - src); /* Nb of input bytes read */
 
-    /* Overflow error detected */
-_output_error:
-    return (int)(-(((const char *)ip) - src)) - 1;
+        /* Overflow error detected */
+    _output_error:
+        return (int)(-(((const char *)ip) - src)) - 1;
+    }
 }
 
 /*===== Instantiate the API decoding functions. =====*/
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
                         int maxDecompressedSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize,
-                                  maxDecompressedSize, endOnInputSize, full, 0,
+                                  maxDecompressedSize, decode_full_block,
                                   noDict, (BYTE *)dest, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
-int LZ4_decompress_safe_partial(const char *source, char *dest,
-                                int compressedSize, int targetOutputSize,
-                                int maxDecompressedSize)
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char *src, char *dst, int compressedSize,
+                                int targetOutputSize, int dstCapacity)
 {
-    return LZ4_decompress_generic(
-        source, dest, compressedSize, maxDecompressedSize, endOnInputSize,
-        partial, targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  partial_decode, noDict, (BYTE *)dst, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
 {
-    return LZ4_decompress_generic(source, dest, 0, originalSize,
-                                  endOnOutputSize, full, 0, withPrefix64k,
-                                  (BYTE *)dest - 64 KB, NULL, 0);
+    DEBUGLOG(5, "LZ4_decompress_fast");
+    return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest,
+                                         originalSize, 0, NULL, 0);
 }
 
 /*===== Instantiate a few more decoding cases, used more than once. =====*/
 
-LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
     int
     LZ4_decompress_safe_withPrefix64k(const char *source, char *dest,
                                       int compressedSize, int maxOutputSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, withPrefix64k,
+                                  decode_full_block, withPrefix64k,
+                                  (BYTE *)dest - 64 KB, NULL, 0);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_partial_withPrefix64k(const char *source,
+                                                     char *dest,
+                                                     int compressedSize,
+                                                     int targetOutputSize,
+                                                     int dstCapacity)
+{
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
+                                  partial_decode, withPrefix64k,
                                   (BYTE *)dest - 64 KB, NULL, 0);
 }
 
@@ -2133,43 +3063,62 @@ LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
 int LZ4_decompress_fast_withPrefix64k(const char *source, char *dest,
                                       int originalSize)
 {
-    /* LZ4_decompress_fast doesn't validate match offsets,
-     * and thus serves well with any prefixed dictionary. */
-    return LZ4_decompress_fast(source, dest, originalSize);
+    return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest,
+                                         originalSize, 64 KB, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest,
                                                int compressedSize,
                                                int maxOutputSize,
                                                size_t prefixSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, noDict,
+                                  decode_full_block, noDict,
                                   (BYTE *)dest - prefixSize, NULL, 0);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c
-                          */
-#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict
-    int
-    LZ4_decompress_safe_extDict(const char *source, char *dest,
-                                int compressedSize, int maxOutputSize,
-                                const void *dictStart, size_t dictSize)
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_partial_withSmallPrefix(
+    const char *source, char *dest, int compressedSize, int targetOutputSize,
+    int dstCapacity, size_t prefixSize)
+{
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
+                                  partial_decode, noDict,
+                                  (BYTE *)dest - prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char *source, char *dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void *dictStart, size_t dictSize)
 {
-    return LZ4_decompress_generic(
-        source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0,
-        usingExtDict, (BYTE *)dest, (const BYTE *)dictStart, dictSize);
+    DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  decode_full_block, usingExtDict, (BYTE *)dest,
+                                  (const BYTE *)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial_forceExtDict(
+    const char *source, char *dest, int compressedSize, int targetOutputSize,
+    int dstCapacity, const void *dictStart, size_t dictSize)
+{
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
+                                  partial_decode, usingExtDict, (BYTE *)dest,
+                                  (const BYTE *)dictStart, dictSize);
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
+LZ4_FORCE_O2
 static int LZ4_decompress_fast_extDict(const char *source, char *dest,
                                        int originalSize, const void *dictStart,
                                        size_t dictSize)
 {
-    return LZ4_decompress_generic(
-        source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict,
-        (BYTE *)dest, (const BYTE *)dictStart, dictSize);
+    return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest,
+                                         originalSize, 0,
+                                         (const BYTE *)dictStart, dictSize);
 }
 
 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
@@ -2183,37 +3132,30 @@ int LZ4_decompress_safe_doubleDict(const char *source, char *dest,
                                    size_t dictSize)
 {
     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                  endOnInputSize, full, 0, usingExtDict,
+                                  decode_full_block, usingExtDict,
                                   (BYTE *)dest - prefixSize,
                                   (const BYTE *)dictStart, dictSize);
 }
 
-LZ4_FORCE_INLINE
-int LZ4_decompress_fast_doubleDict(const char *source, char *dest,
-                                   int originalSize, size_t prefixSize,
-                                   const void *dictStart, size_t dictSize)
-{
-    return LZ4_decompress_generic(
-        source, dest, 0, originalSize, endOnOutputSize, full, 0, usingExtDict,
-        (BYTE *)dest - prefixSize, (const BYTE *)dictStart, dictSize);
-}
-
 /*===== streaming decompression functions =====*/
 
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 LZ4_streamDecode_t *LZ4_createStreamDecode(void)
 {
-    LZ4_streamDecode_t *lz4s =
-        (LZ4_streamDecode_t *)ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
-    return lz4s;
+    LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >=
+                      sizeof(LZ4_streamDecode_t_internal));
+    return (LZ4_streamDecode_t *)ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
 }
 
 int LZ4_freeStreamDecode(LZ4_streamDecode_t *LZ4_stream)
 {
-    if (!LZ4_stream)
-        return 0; /* support free on NULL */
+    if (LZ4_stream == NULL) {
+        return 0;
+    } /* support free on NULL */
     FREEMEM(LZ4_stream);
     return 0;
 }
+#endif
 
 /*! LZ4_setStreamDecode() :
  *  Use this function to instruct where to find the dictionary.
@@ -2225,9 +3167,14 @@ int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
                         const char *dictionary, int dictSize)
 {
     LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
-
     lz4sd->prefixSize = (size_t)dictSize;
-    lz4sd->prefixEnd = (const BYTE *)dictionary + dictSize;
+    if (dictSize) {
+        assert(dictionary != NULL);
+        lz4sd->prefixEnd = (const BYTE *)dictionary + dictSize;
+    }
+    else {
+        lz4sd->prefixEnd = (const BYTE *)dictionary;
+    }
     lz4sd->externalDict = NULL;
     lz4sd->extDictSize = 0;
     return 1;
@@ -2256,14 +3203,14 @@ int LZ4_decoderRingBufferSize(int maxBlockSize)
 }
 
 /*
- *_continue() :
- These decoding functions allow decompression of multiple blocks in "streaming"
- mode. Previously decoded blocks must still be available at the memory position
- where they were decoded. If it's not possible, save the relevant part of
- decoded data into a safe buffer, and indicate where it stands using
- LZ4_setStreamDecode()
- */
-LZ4_FORCE_O2_GCC_PPC64LE
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in
+"streaming" mode. Previously decoded blocks must still be available at the
+memory position where they were decoded. If it's not possible, save the relevant
+part of decoded data into a safe buffer, and indicate where it stands using
+LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
 int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
                                  const char *source, char *dest,
                                  int compressedSize, int maxOutputSize)
@@ -2278,7 +3225,7 @@ int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
             LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
         if (result <= 0)
             return result;
-        lz4sd->prefixSize = result;
+        lz4sd->prefixSize = (size_t)result;
         lz4sd->prefixEnd = (BYTE *)dest + result;
     }
     else if (lz4sd->prefixEnd == (BYTE *)dest) {
@@ -2295,54 +3242,58 @@ int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
                 lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0)
             return result;
-        lz4sd->prefixSize += result;
+        lz4sd->prefixSize += (size_t)result;
         lz4sd->prefixEnd += result;
     }
     else {
         /* The buffer wraps around, or they're switching to another buffer. */
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_safe_extDict(source, dest, compressedSize,
-                                             maxOutputSize, lz4sd->externalDict,
-                                             lz4sd->extDictSize);
+        result = LZ4_decompress_safe_forceExtDict(
+            source, dest, compressedSize, maxOutputSize, lz4sd->externalDict,
+            lz4sd->extDictSize);
         if (result <= 0)
             return result;
-        lz4sd->prefixSize = result;
+        lz4sd->prefixSize = (size_t)result;
         lz4sd->prefixEnd = (BYTE *)dest + result;
     }
 
     return result;
 }
 
-LZ4_FORCE_O2_GCC_PPC64LE
-int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
-                                 const char *source, char *dest,
-                                 int originalSize)
+LZ4_FORCE_O2 int
+LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+                             const char *source, char *dest, int originalSize)
 {
-    LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+    LZ4_streamDecode_t_internal *const lz4sd =
+        (assert(LZ4_streamDecode != NULL),
+         &LZ4_streamDecode->internal_donotuse);
     int result;
 
+    DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
+    assert(originalSize >= 0);
+
     if (lz4sd->prefixSize == 0) {
+        DEBUGLOG(5, "first invocation : no prefix nor extDict");
         assert(lz4sd->extDictSize == 0);
         result = LZ4_decompress_fast(source, dest, originalSize);
         if (result <= 0)
             return result;
-        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixSize = (size_t)originalSize;
         lz4sd->prefixEnd = (BYTE *)dest + originalSize;
     }
     else if (lz4sd->prefixEnd == (BYTE *)dest) {
-        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
-            result = LZ4_decompress_fast(source, dest, originalSize);
-        else
-            result = LZ4_decompress_fast_doubleDict(
-                source, dest, originalSize, lz4sd->prefixSize,
-                lz4sd->externalDict, lz4sd->extDictSize);
+        DEBUGLOG(5, "continue using existing prefix");
+        result = LZ4_decompress_unsafe_generic(
+            (const BYTE *)source, (BYTE *)dest, originalSize, lz4sd->prefixSize,
+            lz4sd->externalDict, lz4sd->extDictSize);
         if (result <= 0)
             return result;
-        lz4sd->prefixSize += originalSize;
+        lz4sd->prefixSize += (size_t)originalSize;
         lz4sd->prefixEnd += originalSize;
     }
     else {
+        DEBUGLOG(5, "prefix becomes extDict");
         lz4sd->extDictSize = lz4sd->prefixSize;
         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
@@ -2350,7 +3301,7 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
                                              lz4sd->extDictSize);
         if (result <= 0)
             return result;
-        lz4sd->prefixSize = originalSize;
+        lz4sd->prefixSize = (size_t)originalSize;
         lz4sd->prefixEnd = (BYTE *)dest + originalSize;
     }
 
@@ -2358,11 +3309,11 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
 }
 
 /*
-   Advanced decoding functions :
-   *_usingDict() :
-   These decoding functions work the same as "_continue" ones,
-   the dictionary must be explicitly provided within parameters
- */
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
 
 int LZ4_decompress_safe_usingDict(const char *source, char *dest,
                                   int compressedSize, int maxOutputSize,
@@ -2371,14 +3322,42 @@ int LZ4_decompress_safe_usingDict(const char *source, char *dest,
     if (dictSize == 0)
         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
     if (dictStart + dictSize == dest) {
-        if (dictSize >= 64 KB - 1)
+        if (dictSize >= 64 KB - 1) {
             return LZ4_decompress_safe_withPrefix64k(
                 source, dest, compressedSize, maxOutputSize);
-        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize,
-                                                   maxOutputSize, dictSize);
+        }
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_withSmallPrefix(
+            source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
     }
-    return LZ4_decompress_safe_extDict(source, dest, compressedSize,
-                                       maxOutputSize, dictStart, dictSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize,
+                                            maxOutputSize, dictStart,
+                                            (size_t)dictSize);
+}
+
+int LZ4_decompress_safe_partial_usingDict(const char *source, char *dest,
+                                          int compressedSize,
+                                          int targetOutputSize, int dstCapacity,
+                                          const char *dictStart, int dictSize)
+{
+    if (dictSize == 0)
+        return LZ4_decompress_safe_partial(source, dest, compressedSize,
+                                           targetOutputSize, dstCapacity);
+    if (dictStart + dictSize == dest) {
+        if (dictSize >= 64 KB - 1) {
+            return LZ4_decompress_safe_partial_withPrefix64k(
+                source, dest, compressedSize, targetOutputSize, dstCapacity);
+        }
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_partial_withSmallPrefix(
+            source, dest, compressedSize, targetOutputSize, dstCapacity,
+            (size_t)dictSize);
+    }
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_partial_forceExtDict(
+        source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart,
+        (size_t)dictSize);
 }
 
 int LZ4_decompress_fast_usingDict(const char *source, char *dest,
@@ -2386,9 +3365,12 @@ int LZ4_decompress_fast_usingDict(const char *source, char *dest,
                                   int dictSize)
 {
     if (dictSize == 0 || dictStart + dictSize == dest)
-        return LZ4_decompress_fast(source, dest, originalSize);
+        return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest,
+                                             originalSize, (size_t)dictSize,
+                                             NULL, 0);
+    assert(dictSize >= 0);
     return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart,
-                                       dictSize);
+                                       (size_t)dictSize);
 }
 
 /*=*************************************************
@@ -2400,25 +3382,20 @@ int LZ4_compress_limitedOutput(const char *source, char *dest, int inputSize,
 {
     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
 }
-
-int LZ4_compress(const char *source, char *dest, int inputSize)
+int LZ4_compress(const char *src, char *dest, int srcSize)
 {
-    return LZ4_compress_default(source, dest, inputSize,
-                                LZ4_compressBound(inputSize));
+    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
 }
-
 int LZ4_compress_limitedOutput_withState(void *state, const char *src,
                                          char *dst, int srcSize, int dstSize)
 {
     return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
 }
-
 int LZ4_compress_withState(void *state, const char *src, char *dst, int srcSize)
 {
     return LZ4_compress_fast_extState(state, src, dst, srcSize,
                                       LZ4_compressBound(srcSize), 1);
 }
-
 int LZ4_compress_limitedOutput_continue(LZ4_stream_t *LZ4_stream,
                                         const char *src, char *dst, int srcSize,
                                         int dstCapacity)
@@ -2426,7 +3403,6 @@ int LZ4_compress_limitedOutput_continue(LZ4_stream_t *LZ4_stream,
     return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize,
                                       dstCapacity, 1);
 }
-
 int LZ4_compress_continue(LZ4_stream_t *LZ4_stream, const char *source,
                           char *dest, int inputSize)
 {
@@ -2435,17 +3411,15 @@ int LZ4_compress_continue(LZ4_stream_t *LZ4_stream, const char *source,
 }
 
 /*
-   These decompression functions are deprecated and should no longer be used.
-   They are only provided here for compatibility with older user programs.
-   - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
-   - LZ4_uncompress_unknownOutputSize is totally equivalent to
-   LZ4_decompress_safe
- */
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
 int LZ4_uncompress(const char *source, char *dest, int outputSize)
 {
     return LZ4_decompress_fast(source, dest, outputSize);
 }
-
 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
                                      int maxOutputSize)
 {
@@ -2456,7 +3430,7 @@ int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
 
 int LZ4_sizeofStreamState(void)
 {
-    return LZ4_STREAMSIZE;
+    return sizeof(LZ4_stream_t);
 }
 
 int LZ4_resetStreamState(void *state, char *inputBuffer)
@@ -2466,11 +3440,13 @@ int LZ4_resetStreamState(void *state, char *inputBuffer)
     return 0;
 }
 
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 void *LZ4_create(char *inputBuffer)
 {
     (void)inputBuffer;
     return LZ4_createStream();
 }
+#endif
 
 char *LZ4_slideInputBuffer(void *state)
 {
diff --git a/lib/gis/lz4.h b/lib/gis/lz4.h
index 9ab16d35e0e..b689d827112 100644
--- a/lib/gis/lz4.h
+++ b/lib/gis/lz4.h
@@ -1,37 +1,37 @@
 /*
  *  LZ4 - Fast LZ compression algorithm
  *  Header File
- *  Copyright (C) 2011-2017, Yann Collet.
-
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - LZ4 homepage : http://www.lz4.org
- - LZ4 source repository : https://github.com/lz4/lz4
- */
+ *  Copyright (C) 2011-2023, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -45,27 +45,32 @@ extern "C" {
 /**
   Introduction
 
-  LZ4 is lossless compression algorithm, providing compression speed at 400 MB/s
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s
   per core, scalable with multi-cores CPU. It features an extremely fast
   decoder, with speed in multiple GB/s per core, typically reaching RAM speed
   limits on multi-core systems.
 
   The LZ4 compression library provides in-memory compression and decompression
-  functions. Compression can be done in:
+  functions. It gives full buffer control to user. Compression can be done in:
     - a single step (described as Simple Functions)
     - a single step, reusing a context (described in Advanced Functions)
     - unbounded multiple steps (described as Streaming compression)
 
-  lz4.h provides block compression functions. It gives full buffer control to
-  user. Decompressing an lz4-compressed block also requires metadata (such as
-  compressed size). Each application is free to encode such metadata in
-  whichever way it wants.
-
-  An additional format, called LZ4 frame specification
-  (doc/lz4_Frame_format.md), take care of encoding standard metadata alongside
-  LZ4-compressed blocks. If your application requires interoperability, it's
-  recommended to use it. A library is provided to take care of it, see
-  lz4frame.h.
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed
+  size. Each application is free to encode and pass such metadata in whichever
+  way it wants.
+
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and
+  portable. Frame format is delivered through a companion API, declared in
+  lz4frame.h. The `lz4` CLI can only manage frames.
 */
 
 /*^***************************************************************
@@ -87,21 +92,49 @@ extern "C" {
 #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT == 1)
 #define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
 #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT == 1)
-#define LZ4LIB_API                                                         \
-    __declspec(dllimport)                                                  \
-    LZ4LIB_VISIBILITY /* It isn't required but allows generating better    \
-                         code, saving a function pointer load from the IAT \
-                         and an indirect jump. */
+#define LZ4LIB_API                                                             \
+    __declspec(dllimport)                                                      \
+    LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, \
+                         saving a function pointer load from the IAT and an    \
+                         indirect jump.*/
 #else
 #define LZ4LIB_API LZ4LIB_VISIBILITY
 #endif
 
+/*! LZ4_FREESTANDING :
+ *  When this macro is set to 1, it enables "freestanding mode" that is
+ *  suitable for typical freestanding environment which doesn't support
+ *  standard C library.
+ *
+ *  - LZ4_FREESTANDING is a compile-time switch.
+ *  - It requires the following macros to be defined:
+ *    LZ4_memcpy, LZ4_memmove, LZ4_memset.
+ *  - It only enables LZ4/HC functions which don't use heap.
+ *    All LZ4F_* functions are not supported.
+ *  - See tests/freestanding.c to check its basic setup.
+ */
+#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1)
+#define LZ4_HEAPMODE                                      0
+#define LZ4HC_HEAPMODE                                    0
+#define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1
+#if !defined(LZ4_memcpy)
+#error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'."
+#endif
+#if !defined(LZ4_memset)
+#error "LZ4_FREESTANDING requires macro 'LZ4_memset'."
+#endif
+#if !defined(LZ4_memmove)
+#error "LZ4_FREESTANDING requires macro 'LZ4_memmove'."
+#endif
+#elif !defined(LZ4_FREESTANDING)
+#define LZ4_FREESTANDING 0
+#endif
+
 /*------   Version   ------*/
-#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR                                                      \
-    8                         /* for new (non-breaking) interface capabilities \
-                               */
-#define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */
+#define LZ4_VERSION_MAJOR   1  /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR   10 /* for new (non-breaking) interface capabilities \
+                                */
+#define LZ4_VERSION_RELEASE 0  /* for tweaks, bug-fixes, or development */
 
 #define LZ4_VERSION_NUMBER                                     \
     (LZ4_VERSION_MAJOR * 100 * 100 + LZ4_VERSION_MINOR * 100 + \
@@ -110,55 +143,84 @@ extern "C" {
 #define LZ4_LIB_VERSION           LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
 #define LZ4_QUOTE(str)            #str
 #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
-#define LZ4_VERSION_STRING        LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
-
-LZ4LIB_API int LZ4_versionNumber(void);
-/**< library version number; useful to check dll version */
+#define LZ4_VERSION_STRING \
+    LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */
 
-LZ4LIB_API const char *LZ4_versionString(
-    void); /**< library version string; unseful to check dll version */
+LZ4LIB_API int
+LZ4_versionNumber(void); /**< library version number; useful to check dll
+                            version; requires v1.3.0+ */
+LZ4LIB_API const char *
+LZ4_versionString(void); /**< library version string; useful to check dll
+                            version; requires v1.7.5+ */
 
 /*-************************************
- *  Tuning parameter
+ *  Tuning memory usage
  **************************************/
 /*!
  * LZ4_MEMORY_USAGE :
+ * Can be selected at compile time, by setting LZ4_MEMORY_USAGE.
  * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 ->
- * 64KB; 20 -> 1MB; etc.) Increasing memory usage improves compression ratio
- * Reduced memory usage may improve speed, thanks to cache effect
- * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ * 64KB; 20 -> 1MB) Increasing memory usage improves compression ratio,
+ * generally at the cost of speed. Reduced memory usage may improve speed at the
+ * cost of ratio, thanks to better cache locality. Default value is 14, for
+ * 16KB, which nicely fits into most L1 caches.
  */
 #ifndef LZ4_MEMORY_USAGE
-#define LZ4_MEMORY_USAGE 14
+#define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT
+#endif
+
+/* These are absolute limits, they should not be changed by users */
+#define LZ4_MEMORY_USAGE_MIN     10
+#define LZ4_MEMORY_USAGE_DEFAULT 14
+#define LZ4_MEMORY_USAGE_MAX     20
+
+#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN)
+#error "LZ4_MEMORY_USAGE is too small !"
+#endif
+
+#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX)
+#error "LZ4_MEMORY_USAGE is too large !"
 #endif
 
 /*-************************************
  *  Simple Functions
  **************************************/
 /*! LZ4_compress_default() :
-   Compresses 'srcSize' bytes from buffer 'src'
-   into already allocated 'dst' buffer of size 'dstCapacity'.
-   Compression is guaranteed to succeed if 'dstCapacity' >=
-   LZ4_compressBound(srcSize). It also runs faster, so it's a recommended
-   setting. If the function cannot compress 'src' into a more limited 'dst'
-   budget, compression stops *immediately*, and the function result is zero.
-   Note : as a consequence, 'dst' content is not valid.
-   Note 2 : This function is protected against buffer overflow scenarios (never
-   writes outside 'dst' buffer, nor read outside 'source' buffer). srcSize : max
-   supported value is LZ4_MAX_INPUT_SIZE. dstCapacity : size of buffer 'dst'
-   (which must be already allocated) return  : the number of bytes written into
-   buffer 'dst' (necessarily <= dstCapacity) or 0 if compression fails */
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >=
+ * LZ4_compressBound(srcSize). It also runs faster, so it's a recommended
+ * setting. If the function cannot compress 'src' into a more limited 'dst'
+ * budget, compression stops *immediately*, and the function result is zero. In
+ * which case, 'dst' content is undefined (invalid). srcSize : max supported
+ * value is LZ4_MAX_INPUT_SIZE. dstCapacity : size of buffer 'dst' (which must
+ * be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <=
+ * dstCapacity) or 0 if compression fails Note : This function is protected
+ * against buffer overflow scenarios (never writes outside 'dst' buffer, nor
+ * read outside 'source' buffer).
+ */
 LZ4LIB_API int LZ4_compress_default(const char *src, char *dst, int srcSize,
                                     int dstCapacity);
 
 /*! LZ4_decompress_safe() :
-   compressedSize : is the exact complete size of the compressed block.
-   dstCapacity : is the size of destination buffer, which must be already
-   allocated. return : the number of bytes decompressed into destination buffer
-   (necessarily <= dstCapacity) If destination buffer is not large enough,
-   decoding will stop and output an error code (negative value). If the source
-   stream is detected malformed, the function will stop decoding and return a
-   negative result. This function is protected against malicious data packets.
+ * @compressedSize : is the exact complete size of the compressed block.
+ * @dstCapacity : is the size of destination buffer (which must be already
+ * allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer
+ * (necessarily <= dstCapacity) If destination buffer is not large enough,
+ * decoding will stop and output an error code (negative value). If the source
+ * stream is detected malformed, the function will stop decoding and return a
+ * negative result. Note 1 : This function is protected against malicious data
+ * packets : it will never writes outside 'dst' buffer, nor read outside
+ * 'source' buffer, even if the compressed block is maliciously modified to
+ * order the decoder to do these actions. In such case, the decoder stops
+ * immediately, and considers the compressed block malformed. Note 2 :
+ * compressedSize and dstCapacity must be provided to the function, the
+ * compressed block does not contain them. The implementation is free to send /
+ * store / derive this information in whichever way is most beneficial. If there
+ * is a need for a different format which bundles together both compressed data
+ * and its metadata, consider looking at lz4frame.h instead.
  */
 LZ4LIB_API int LZ4_decompress_safe(const char *src, char *dst,
                                    int compressedSize, int dstCapacity);
@@ -172,9 +234,8 @@ LZ4LIB_API int LZ4_decompress_safe(const char *src, char *dst,
          ? 0                                          \
          : (isize) + ((isize) / 255) + 16)
 
-/*!
-   LZ4_compressBound() :
-   Provides the maximum size that LZ4 compression may output in a "worst case"
+/*! LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case"
    scenario (input data not compressible) This function is primarily useful for
    memory allocation purposes (destination buffer size). Macro
    LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack
@@ -183,79 +244,95 @@ LZ4LIB_API int LZ4_decompress_safe(const char *src, char *dst,
    supported value is LZ4_MAX_INPUT_SIZE return : maximum output size in a
    "worst case" scenario or 0, if input size is incorrect (too large or
    negative)
- */
+*/
 LZ4LIB_API int LZ4_compressBound(int inputSize);
 
-/*!
-   LZ4_compress_fast() :
-   Same as LZ4_compress_default(), but allows selection of "acceleration"
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration"
    factor. The larger the acceleration value, the faster the algorithm, but also
    the lesser the compression. It's a trade-off. It can be fine tuned, with each
    successive value providing roughly +~3% to speed. An acceleration value of
    "1" is the same as regular LZ4_compress_default() Values <= 0 will be
-   replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
- */
+   replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). Values >
+   LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently ==
+   65537, see lz4.c).
+*/
 LZ4LIB_API int LZ4_compress_fast(const char *src, char *dst, int srcSize,
                                  int dstCapacity, int acceleration);
 
-/*!
-   LZ4_compress_fast_extState() :
-   Same compression function, just using an externally allocated memory space to
-   store compression state. Use LZ4_sizeofState() to know how much memory must
-   be allocated, and allocate it on 8-bytes boundaries (using malloc()
-   typically). Then, provide it as 'void* state' to compression function.
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for
+ * its state. Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
  */
 LZ4LIB_API int LZ4_sizeofState(void);
 LZ4LIB_API int LZ4_compress_fast_extState(void *state, const char *src,
                                           char *dst, int srcSize,
                                           int dstCapacity, int acceleration);
 
-/*!
-   LZ4_compress_destSize() :
-   Reverse the logic : compresses as much data as possible from 'src' buffer
-   into already allocated buffer 'dst' of size 'targetDestSize'.
-   This function either compresses the entire 'src' content into 'dst' if it's
-   large enough, or fill 'dst' buffer completely with as much data as possible
-   from 'src'. *srcSizePtr : will be modified to indicate how many bytes where
-   read from 'src' to fill 'dst'. New value is necessarily <= old value. return
-   : Nb bytes written into 'dst' (necessarily <= targetDestSize) or 0 if
-   compression fails
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'dstCapacity'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's
+ * large enough, or fill 'dst' buffer completely with as much data as possible
+ * from 'src'. note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : in+out parameter. Initially contains size of input.
+ *               Will be modified to indicate how many bytes where read from
+ * 'src' to fill 'dst'. New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= dstCapacity)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed in v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
  */
 LZ4LIB_API int LZ4_compress_destSize(const char *src, char *dst,
                                      int *srcSizePtr, int targetDstSize);
 
-/*!
-   LZ4_decompress_fast() : **unsafe!**
-   This function is a bit faster than LZ4_decompress_safe(),
-   but it may misbehave on malformed input because it doesn't perform full
-   validation of compressed data. originalSize : is the uncompressed size to
-   regenerate Destination buffer must be already allocated, and its size must be
-   >= 'originalSize' bytes. return : number of bytes read from source buffer (==
-   compressed size). If the source stream is detected malformed, the function
-   stops decoding and return a negative result. note : This function is only
-   usable if the originalSize of uncompressed data is known in advance. The
-   caller should also check that all the compressed input has been consumed
-   properly, i.e. that the return value matches the size of the buffer with
-   compressed input. The function never writes past the output buffer.  However,
-   since it doesn't know its 'src' size, it may read past the intended input.
-   Also, because match offsets are not validated during decoding, reads from
-   'src' may underflow.  Use this function in trusted environment **only**.
- */
-LZ4LIB_API int LZ4_decompress_fast(const char *src, char *dst,
-                                   int originalSize);
-
-/*!
-   LZ4_decompress_safe_partial() :
-   This function decompress a compressed block of size 'srcSize' at position
-   'src' into destination buffer 'dst' of size 'dstCapacity'. The function will
-   decompress a minimum of 'targetOutputSize' bytes, and stop after that.
-   However, it's not accurate, and may write more than 'targetOutputSize' (but
-   always <= dstCapacity).
-   @return : the number of bytes decoded in the destination buffer (necessarily
-   <= dstCapacity) Note : this number can also be < targetOutputSize, if
-   compressed block contains less data. Therefore, always control how many bytes
-   were decoded. If source stream is detected malformed, function returns a
-   negative result. This function is protected against malicious data packets.
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <=
+ * targetOutputSize) If source stream is detected malformed, function returns a
+ * negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains
+ * less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching
+ * targetOutputSize, so dstCapacity is kind of redundant. This is because in
+ * older versions of this function, decoding operation would still write
+ * complete sequences. Therefore, there was no guarantee that it would stop
+ * writing at exactly targetOutputSize, it could write more bytes, though only
+ * up to dstCapacity. Some "margin" used to be required for this operation to
+ * work properly. Thankfully, this is no longer necessary. The function
+ * nonetheless keeps the same signature, in an effort to preserve API
+ * compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
  */
 LZ4LIB_API int LZ4_decompress_safe_partial(const char *src, char *dst,
                                            int srcSize, int targetOutputSize,
@@ -266,45 +343,143 @@ LZ4LIB_API int LZ4_decompress_safe_partial(const char *src, char *dst,
  ***********************************************/
 typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
 
-/*! LZ4_createStream() and LZ4_freeStream() :
- *  LZ4_createStream() will allocate and initialize an `LZ4_stream_t` structure.
- *  LZ4_freeStream() releases its memory.
- */
+/*!
+ Note about RC_INVOKED
+
+ - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is
+ part of MSVC/Visual Studio).
+   https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros
+
+ - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars)
+   and reports warning "RC4011: identifier truncated".
+
+ - To eliminate the warning, we surround long preprocessor symbol with
+   "#if !defined(RC_INVOKED) ... #endif" block that means
+   "skip this block when rc.exe is trying to read it".
+*/
+#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros \
+                          */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 LZ4LIB_API LZ4_stream_t *LZ4_createStream(void);
 LZ4LIB_API int LZ4_freeStream(LZ4_stream_t *streamPtr);
+#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
+#endif
 
-/*! LZ4_resetStream() :
- *  An LZ4_stream_t structure can be allocated once and re-used multiple times.
- *  Use this function to start compressing a new stream.
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even
+ * counterproductive.
  */
-LZ4LIB_API void LZ4_resetStream(LZ4_stream_t *streamPtr);
+LZ4LIB_API void LZ4_resetStream_fast(LZ4_stream_t *streamPtr);
 
 /*! LZ4_loadDict() :
- *  Use this function to load a static dictionary into LZ4_stream_t.
- *  Any previous data will be forgotten, only 'dictionary' will remain in
- * memory. Loading a size of 0 is allowed, and is the same as reset.
- * @return : dictionary size, in bytes (necessarily <= 64 KB)
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for
+ * successful decoding. Dictionary are useful for better compression of small
+ * data (KB range). While LZ4 itself accepts any input as dictionary, dictionary
+ * efficiency is also a topic. When in doubt, employ the Zstandard's Dictionary
+ * Builder. Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (note: only the last 64 KB are
+ * loaded)
  */
 LZ4LIB_API int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
                             int dictSize);
 
+/*! LZ4_loadDictSlow() : v1.10.0+
+ *  Same as LZ4_loadDict(),
+ *  but uses a bit more cpu to reference the dictionary content more thoroughly.
+ *  This is expected to slightly improve compression ratio.
+ *  The extra-cpu cost is likely worth it if the dictionary is re-used across
+ * multiple sessions.
+ * @return : loaded dictionary size, in bytes (note: only the last 64 KB are
+ * loaded)
+ */
+LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t *streamPtr, const char *dictionary,
+                                int dictSize);
+
+/*! LZ4_attach_dictionary() : stable since v1.10.0
+ *
+ *  This allows efficient re-use of a static dictionary multiple times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references @dictionaryStream in-place.
+ *
+ *  Several assumptions are made about the state of @dictionaryStream.
+ *  Currently, only states which have been prepared by LZ4_loadDict() or
+ *  LZ4_loadDictSlow() should be expected to work.
+ *
+ *  Alternatively, the provided @dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared.
+ * @dictionaryStream stream (and source buffer) must remain in-place /
+ * accessible / unchanged through the completion of the compression session.
+ *
+ *  Note: there is no equivalent LZ4_attach_*() method on the decompression side
+ *  because there is no initialization cost, hence no need to share the cost
+ * across multiple sessions. To decompress LZ4 blocks using dictionary, attached
+ * or not, just employ the regular LZ4_setStreamDecode() for streaming, or the
+ * stateless LZ4_decompress_safe_usingDict() for one-shot decompression.
+ */
+LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *workingStream,
+                                      const LZ4_stream_t *dictionaryStream);
+
 /*! LZ4_compress_fast_continue() :
  *  Compress 'src' content using data from previously compressed blocks, for
  * better compression ratio. 'dst' buffer must be already allocated. If
  * dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to
  * succeed, and runs faster.
  *
- *  Important : The previous 64KB of compressed data is assumed to remain
- * present and unmodified in memory!
- *
- *  Special 1 : When input is a double-buffer, they can have any size, including
- * < 64 KB. Make sure that buffers are separated by at least one byte. This way,
- * each block only depends on previous block. Special 2 : If input buffer is a
- * ring-buffer, it can have any size, including < 64 KB.
- *
  * @return : size of compressed block
  *           or 0 if there is an error (typically, cannot fit into 'dst').
- *  After an error, the stream status is invalid, it can only be reset or freed.
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new
+ * block. Each block has precise boundaries. Each block must be decompressed
+ * separately, calling LZ4_decompress_*() with relevant metadata. It's not
+ * possible to append blocks together and expect a single invocation of
+ * LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present,
+ * unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have
+ * any size, including < 64 KB. Make sure that buffers are separated, by at
+ * least one byte. This construction ensures that each block only depends on
+ * previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including <
+ * 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can
+ * only be reset or freed.
  */
 LZ4LIB_API int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr,
                                           const char *src, char *dst,
@@ -332,13 +507,18 @@ typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
  *  creation / destruction of streaming decompression tracking context.
  *  A tracking context can be re-used multiple times.
  */
+#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros \
+                          */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
 LZ4LIB_API LZ4_streamDecode_t *LZ4_createStreamDecode(void);
 LZ4LIB_API int LZ4_freeStreamDecode(LZ4_streamDecode_t *LZ4_stream);
+#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
+#endif
 
 /*! LZ4_setStreamDecode() :
  *  An LZ4_streamDecode_t context can be allocated once and re-used multiple
  * times. Use this function to start decompression of a new stream of blocks. A
- * dictionary can optionnally be set. Use NULL or size 0 for a reset order.
+ * dictionary can optionally be set. Use NULL or size 0 for a reset order.
  *  Dictionary is presumed stable : it must remain accessible and unmodified
  * during next decompression.
  * @return : 1 if OK, 0 if error
@@ -346,7 +526,7 @@ LZ4LIB_API int LZ4_freeStreamDecode(LZ4_streamDecode_t *LZ4_stream);
 LZ4LIB_API int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
                                    const char *dictionary, int dictSize);
 
-/*! LZ4_decoderRingBufferSize() : v1.8.2
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
  *  Note : in a ring buffer scenario (optional),
  *  blocks are presumed decompressed next to each other
  *  up to the moment there is not enough remaining space for next block
@@ -358,16 +538,31 @@ LZ4LIB_API int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
  *           or 0 if there is an error (invalid maxBlockSize).
  */
 LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
-#define LZ4_DECODER_RING_BUFFER_SIZE(mbs) \
-    (65536 + 14 + (mbs)) /* for static allocation; mbs presumed valid */
-
-/*! LZ4_decompress_*_continue() :
- *  These decoding functions allow decompression of consecutive blocks in
- * "streaming" mode. A block is an unsplittable entity, it must be presented
- * entirely to a decompression function. Decompression functions only accepts
- * one block at a time. The last 64KB of previously decoded data *must* remain
- * available and unmodified at the memory position where they were decoded. If
- * less than 64KB of data has been decoded, all the data must be present.
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) \
+    (65536 + 14 +                                  \
+     (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_safe_continue() :
+ *  This decoding function allows decompression of consecutive blocks in
+ * "streaming" mode. The difference with the usual independent blocks is that
+ *  new blocks are allowed to find references into former blocks.
+ *  A block is an unsplittable entity, and must be presented entirely to the
+ * decompression function. LZ4_decompress_safe_continue() only accepts one block
+ * at a time. It's modeled after `LZ4_decompress_safe()` and behaves similarly.
+ *
+ * @LZ4_streamDecode : decompression state, tracking the position in memory of
+ * past data
+ * @compressedSize : exact complete size of one compressed block.
+ * @dstCapacity : size of destination buffer (which must be already allocated),
+ *                must be an upper bound of decompressed size.
+ * @return : number of bytes decompressed into destination buffer (necessarily
+ * <= dstCapacity) If destination buffer is not large enough, decoding will stop
+ * and output an error code (negative value). If the source stream is detected
+ * malformed, the function will stop decoding and return a negative result.
+ *
+ *  The last 64KB of previously decoded data *must* remain available and
+ * unmodified at the memory position where they were previously decoded. If less
+ * than 64KB of data has been decoded, all the data must be present.
  *
  *  Special : if decompression side sets a ring buffer, it must respect one of
  * the following conditions :
@@ -397,130 +592,185 @@ LZ4LIB_API int
 LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
                              const char *src, char *dst, int srcSize,
                              int dstCapacity);
-LZ4LIB_API int
-LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
-                             const char *src, char *dst, int originalSize);
 
-/*! LZ4_decompress_*_usingDict() :
- *  These decoding functions work the same as
+/*! LZ4_decompress_safe_usingDict() :
+ *  Works the same as
  *  a combination of LZ4_setStreamDecode() followed by
- * LZ4_decompress_*_continue() They are stand-alone, and don't need an
- * LZ4_streamDecode_t structure. Dictionary is presumed stable : it must remain
- * accessible and unmodified during next decompression.
+ * LZ4_decompress_safe_continue() However, it's stateless: it doesn't need any
+ * LZ4_streamDecode_t state. Dictionary is presumed stable : it must remain
+ * accessible and unmodified during decompression. Performance tip :
+ * Decompression speed can be substantially increased when dst == dictStart +
+ * dictSize.
  */
 LZ4LIB_API int LZ4_decompress_safe_usingDict(const char *src, char *dst,
-                                             int srcSize, int dstCapcity,
-                                             const char *dictStart,
-                                             int dictSize);
-LZ4LIB_API int LZ4_decompress_fast_usingDict(const char *src, char *dst,
-                                             int originalSize,
+                                             int srcSize, int dstCapacity,
                                              const char *dictStart,
                                              int dictSize);
 
-/*^**********************************************
+/*! LZ4_decompress_safe_partial_usingDict() :
+ *  Behaves the same as LZ4_decompress_safe_partial()
+ *  with the added ability to specify a memory segment for past data.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial_usingDict(
+    const char *src, char *dst, int compressedSize, int targetOutputSize,
+    int maxOutputSize, const char *dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+/*^*************************************
  * !!!!!!   STATIC LINKING ONLY   !!!!!!
- ***********************************************/
+ ***************************************/
 
-/*-************************************
- *  Unstable declarations
- **************************************
- * Declarations in this section should be considered unstable.
- * Use at your own peril, etc., etc.
- * They may be removed in the future.
- * Their signatures may change.
- **************************************/
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
 
 #ifdef LZ4_STATIC_LINKING_ONLY
 
-/*! LZ4_resetStream_fast() :
- *  Use this, like LZ4_resetStream(), to prepare a context for a new chain of
- *  calls to a streaming API (e.g., LZ4_compress_fast_continue()).
- *
- *  Note:
- *  Using this in advance of a non- streaming-compression function is redundant,
- *  and potentially bad for performance, since they all perform their own custom
- *  reset internally.
- *
- *  Differences from LZ4_resetStream():
- *  When an LZ4_stream_t is known to be in a internally coherent state,
- *  it can often be prepared for a new compression with almost no work, only
- *  sometimes falling back to the full, expensive reset that is always required
- *  when the stream is in an indeterminate state (i.e., the reset performed by
- *  LZ4_resetStream()).
- *
- *  LZ4_streams are guaranteed to be in a valid state when:
- *  - returned from LZ4_createStream()
- *  - reset by LZ4_resetStream()
- *  - memset(stream, 0, sizeof(LZ4_stream_t)), though this is discouraged
- *  - the stream was in a valid state and was reset by LZ4_resetStream_fast()
- *  - the stream was in a valid state and was then used in any compression call
- *    that returned success
- *  - the stream was in an indeterminate state and was used in a compression
- *    call that fully reset the state (e.g., LZ4_compress_fast_extState()) and
- *    that returned success
- *
- *  When a stream isn't known to be in a valid state, it is not safe to pass to
- *  any fastReset or streaming function. It must first be cleansed by the full
- *  LZ4_resetStream().
- */
-LZ4LIB_API void LZ4_resetStream_fast(LZ4_stream_t *streamPtr);
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
 
 /*! LZ4_compress_fast_extState_fastReset() :
  *  A variant of LZ4_compress_fast_extState().
  *
- *  Using this variant avoids an expensive initialization step. It is only safe
- *  to call if the state buffer is known to be correctly initialized already
- *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly
- *  initialized"). From a high level, the difference is that this function
- *  initializes the provided state with a call to something like
- *  LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
- *  call to LZ4_resetStream().
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly
+ * initialized already (see above comment on LZ4_resetStream_fast() for a
+ * definition of "correctly initialized"). From a high level, the difference is
+ * that this function initializes the provided state with a call to something
+ * like LZ4_resetStream_fast() while LZ4_compress_fast_extState() starts with a
+ * call to LZ4_resetStream().
  */
-LZ4LIB_API int LZ4_compress_fast_extState_fastReset(void *state,
-                                                    const char *src, char *dst,
-                                                    int srcSize,
-                                                    int dstCapacity,
-                                                    int acceleration);
+LZ4LIB_STATIC_API int
+LZ4_compress_fast_extState_fastReset(void *state, const char *src, char *dst,
+                                     int srcSize, int dstCapacity,
+                                     int acceleration);
+
+/*! LZ4_compress_destSize_extState() : introduced in v1.10.0
+ *  Same as LZ4_compress_destSize(), but using an externally allocated state.
+ *  Also: exposes @acceleration
+ */
+int LZ4_compress_destSize_extState(void *state, const char *src, char *dst,
+                                   int *srcSizePtr, int targetDstSize,
+                                   int acceleration);
 
-/*! LZ4_attach_dictionary() :
- *  This is an experimental API that allows for the efficient use of a
- *  static dictionary many times.
+/*! In-place compression and decompression
  *
- *  Rather than re-loading the dictionary buffer into a working context before
- *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
- *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
- *  in which the working stream references the dictionary stream in-place.
+ * It's possible to have input and output sharing the same buffer,
+ * for highly constrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
  *
- *  Several assumptions are made about the state of the dictionary stream.
- *  Currently, only streams which have been prepared by LZ4_loadDict() should
- *  be expected to work.
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
  *
- *  Alternatively, the provided dictionary stream pointer may be NULL, in which
- *  case any existing dictionary stream is unset.
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
  *
- *  If a dictionary is provided, it replaces any pre-existing stream history.
- *  The dictionary contents are the only history that can be referenced and
- *  logically immediately precede the data compressed in the first subsequent
- *  compression call.
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's
+ * not compressed. This can happen when data is not compressible (already
+ * compressed, or encrypted).
  *
- *  The dictionary will only remain attached to the working stream through the
- *  first compression call, at the end of which it is cleared. The dictionary
- *  stream (and source buffer) must remain in-place / accessible / unchanged
- *  through the completion of the first compression call on the stream.
+ * For in-place compression, margin is larger, as it must be able to cope with
+ * both history preservation, requiring input data to remain unmodified up to
+ * LZ4_DISTANCE_MAX, and data expansion, which can happen when input is not
+ * compressible. As a consequence, buffer size requirements are much higher, and
+ * memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply
+ * this limit. Lower values will reduce compression ratio, except when
+ * input_size < LZ4_DISTANCE_MAX, so it's a reasonable trick when inputs are
+ * known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can
+ * fail, in which case, the return code will be 0 (zero). The caller must be
+ * ready for these cases to happen, and typically design a backup scheme to send
+ * data uncompressed. The combination of both techniques can significantly
+ * reduce the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed
+ * compression success. LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both
+ * maxCompressedSize and LZ4_DISTANCE_MAX, so it's possible to reduce memory
+ * requirements by playing with them.
  */
-LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *working_stream,
-                                      const LZ4_stream_t *dictionary_stream);
 
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) \
+    (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)           \
+    ((decompressedSize) +                                              \
+     LZ4_DECOMPRESS_INPLACE_MARGIN(                                    \
+         decompressedSize)) /**< note: presumes that compressedSize <  \
+                               decompressedSize. note2: margin is      \
+                               overestimated a bit, since it could use \
+                               compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX       /* history window size; can be user-defined at \
+                                  compile time */
+#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
 #endif
 
-/*-************************************
- *  Private definitions
- **************************************
- * Do not use these definitions.
- * They are exposed to allow static allocation of `LZ4_stream_t` and
- *`LZ4_streamDecode_t`. Using these definitions will expose code to API and/or
+#define LZ4_COMPRESS_INPLACE_MARGIN                                       \
+    (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by \
+                               srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)                   \
+    ((maxCompressedSize) +                                                    \
+     LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally         \
+                                     LZ4_COMPRESSBOUND(inputSize), but can be \
+                                     set to any lower value, with the risk    \
+                                     that compression can fail (return code   \
+                                     0(zero)) */
+
+#endif /* LZ4_STATIC_3504398509 */
+#endif /* LZ4_STATIC_LINKING_ONLY */
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and
+ *`LZ4_streamDecode_t`. Accessing members will expose user code to API and/or
  *ABI break in future versions of the library.
- **************************************/
+ **************************************************************/
 #define LZ4_HASHLOG       (LZ4_MEMORY_USAGE - 2)
 #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
 #define LZ4_HASH_SIZE_U32 \
@@ -529,75 +779,74 @@ LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *working_stream,
 #if defined(__cplusplus) || \
     (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
 #include <stdint.h>
-
-typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
-struct LZ4_stream_t_internal {
-    uint32_t hashTable[LZ4_HASH_SIZE_U32];
-    uint32_t currentOffset;
-    uint16_t initCheck;
-    uint16_t tableType;
-    const uint8_t *dictionary;
-    const LZ4_stream_t_internal *dictCtx;
-    uint32_t dictSize;
-};
-
-typedef struct {
-    const uint8_t *externalDict;
-    size_t extDictSize;
-    const uint8_t *prefixEnd;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-
+typedef int8_t LZ4_i8;
+typedef uint8_t LZ4_byte;
+typedef uint16_t LZ4_u16;
+typedef uint32_t LZ4_u32;
 #else
+typedef signed char LZ4_i8;
+typedef unsigned char LZ4_byte;
+typedef unsigned short LZ4_u16;
+typedef unsigned int LZ4_u32;
+#endif
+
+/*! LZ4_stream_t :
+ *  Never ever use below internal definitions directly !
+ *  These definitions are not API/ABI safe, and may change in future versions.
+ *  If you need static allocation, declare or allocate an LZ4_stream_t object.
+ **/
 
 typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
 struct LZ4_stream_t_internal {
-    unsigned int hashTable[LZ4_HASH_SIZE_U32];
-    unsigned int currentOffset;
-    unsigned short initCheck;
-    unsigned short tableType;
-    const unsigned char *dictionary;
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    const LZ4_byte *dictionary;
     const LZ4_stream_t_internal *dictCtx;
-    unsigned int dictSize;
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    LZ4_u32 dictSize;
+    /* Implicit padding to ensure structure is aligned */
 };
 
+#define LZ4_STREAM_MINSIZE         \
+    ((1UL << (LZ4_MEMORY_USAGE)) + \
+     32) /* static size, for inter-version compatibility */
+union LZ4_stream_u {
+    char minStateSize[LZ4_STREAM_MINSIZE];
+    LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not
+ *respected. In which case, the function will @return NULL. Note2: An
+ *LZ4_stream_t structure guarantees correct alignment and size. Note3: Before
+ *v1.9.0, use LZ4_resetStream() instead
+ **/
+LZ4LIB_API LZ4_stream_t *LZ4_initStream(void *stateBuffer, size_t size);
+
+/*! LZ4_streamDecode_t :
+ *  Never ever use below internal definitions directly !
+ *  These definitions are not API/ABI safe, and may change in future versions.
+ *  If you need static allocation, declare or allocate an LZ4_streamDecode_t
+ *object.
+ **/
 typedef struct {
-    const unsigned char *externalDict;
+    const LZ4_byte *externalDict;
+    const LZ4_byte *prefixEnd;
     size_t extDictSize;
-    const unsigned char *prefixEnd;
     size_t prefixSize;
 } LZ4_streamDecode_t_internal;
 
-#endif
-
-/*!
- * LZ4_stream_t :
- * information structure to track an LZ4 stream.
- * init this structure before first use.
- * note : only use in association with static linking !
- *        this definition is not API/ABI safe,
- *        it may change in a future version !
- */
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
-union LZ4_stream_u {
-    unsigned long long table[LZ4_STREAMSIZE_U64];
-    LZ4_stream_t_internal internal_donotuse;
-}; /* previously typedef'd to LZ4_stream_t */
-
-/*!
- * LZ4_streamDecode_t :
- * information structure to track an LZ4 stream during decompression.
- * init this structure  using LZ4_setStreamDecode (or memset()) before first use
- * note : only use in association with static linking !
- *        this definition is not API/ABI safe,
- *        and may change in a future version !
- */
-#define LZ4_STREAMDECODESIZE_U64 4
-#define LZ4_STREAMDECODESIZE \
-    (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+#define LZ4_STREAMDECODE_MINSIZE 32
 union LZ4_streamDecode_u {
-    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    char minStateSize[LZ4_STREAMDECODE_MINSIZE];
     LZ4_streamDecode_t_internal internal_donotuse;
 }; /* previously typedef'd to LZ4_streamDecode_t */
 
@@ -606,62 +855,68 @@ union LZ4_streamDecode_u {
  **************************************/
 
 /*! Deprecation warnings
-   Should deprecation warnings be a problem,
-   it is generally possible to disable them,
-   typically with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual.
-   Otherwise, it's also possible to define LZ4_DISABLE_DEPRECATE_WARNINGS */
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to
+ * disable them, typically with -Wno-deprecated-declarations for gcc or
+ * _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
 #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
 #define LZ4_DEPRECATED(message) /* disable deprecation warnings */
 #else
-#define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 #if defined(__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #define LZ4_DEPRECATED(message) [[deprecated(message)]]
-#elif (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#elif (LZ4_GCC_VERSION >= 301)
-#define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #elif defined(_MSC_VER)
 #define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#elif defined(__clang__) || \
+    (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#define LZ4_DEPRECATED(message) __attribute__((deprecated))
 #else
 #pragma message( \
-    "WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#define LZ4_DEPRECATED(message)
+    "WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#define LZ4_DEPRECATED(message) /* disabled */
 #endif
 #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
 
-/* Obsolete compression functions */
+/*! Obsolete compression functions (since v1.7.3) */
 LZ4_DEPRECATED("use LZ4_compress_default() instead")
-LZ4LIB_API int LZ4_compress(const char *source, char *dest, int sourceSize);
+LZ4LIB_API int LZ4_compress(const char *src, char *dest, int srcSize);
 LZ4_DEPRECATED("use LZ4_compress_default() instead")
-LZ4LIB_API int LZ4_compress_limitedOutput(const char *source, char *dest,
-                                          int sourceSize, int maxOutputSize);
+LZ4LIB_API int LZ4_compress_limitedOutput(const char *src, char *dest,
+                                          int srcSize, int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead")
 LZ4LIB_API int LZ4_compress_withState(void *state, const char *source,
                                       char *dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead")
 LZ4LIB_API
-int LZ4_compress_limitedOutput_withState(void *state, const char *source,
-                                         char *dest, int inputSize,
-                                         int maxOutputSize);
+    int LZ4_compress_limitedOutput_withState(void *state, const char *source,
+                                             char *dest, int inputSize,
+                                             int maxOutputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead")
 LZ4LIB_API
-int LZ4_compress_continue(LZ4_stream_t *LZ4_streamPtr, const char *source,
-                          char *dest, int inputSize);
+    int LZ4_compress_continue(LZ4_stream_t *LZ4_streamPtr, const char *source,
+                              char *dest, int inputSize);
 LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead")
 LZ4LIB_API
-int LZ4_compress_limitedOutput_continue(LZ4_stream_t *LZ4_streamPtr,
-                                        const char *source, char *dest,
-                                        int inputSize, int maxOutputSize);
+    int LZ4_compress_limitedOutput_continue(LZ4_stream_t *LZ4_streamPtr,
+                                            const char *source, char *dest,
+                                            int inputSize, int maxOutputSize);
 
-/* Obsolete decompression functions */
+/*! Obsolete decompression functions (since v1.8.0) */
 LZ4_DEPRECATED("use LZ4_decompress_fast() instead")
 LZ4LIB_API int LZ4_uncompress(const char *source, char *dest, int outputSize);
 LZ4_DEPRECATED("use LZ4_decompress_safe() instead")
 LZ4LIB_API int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
                                                 int isize, int maxOutputSize);
 
-/* Obsolete streaming functions; degraded functionality; do not use!
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
  *
  * In order to perform streaming compression, these functions depended on data
  * that is no longer tracked in the state. They have been preserved as well as
@@ -679,16 +934,73 @@ LZ4LIB_API int LZ4_resetStreamState(void *state, char *inputBuffer);
 LZ4_DEPRECATED("Use LZ4_saveDict() instead")
 LZ4LIB_API char *LZ4_slideInputBuffer(void *state);
 
-/* Obsolete streaming decoding functions */
+/*! Obsolete streaming decoding functions (since v1.7.0) */
 LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead")
 LZ4LIB_API
-int LZ4_decompress_safe_withPrefix64k(const char *src, char *dst,
-                                      int compressedSize, int maxDstSize);
+    int LZ4_decompress_safe_withPrefix64k(const char *src, char *dst,
+                                          int compressedSize, int maxDstSize);
 LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead")
 LZ4LIB_API int LZ4_decompress_fast_withPrefix64k(const char *src, char *dst,
                                                  int originalSize);
 
-#endif /* LZ4_H_2983827168210 */
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not
+ * read beyond the end of block. On top of that `LZ4_decompress_fast()` is not
+ * protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and
+ * deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >=
+ * 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops
+ * decoding and returns a negative result. note : LZ4_decompress_fast*()
+ * requires originalSize. Thanks to this information, it never writes past the
+ * output buffer. However, since it doesn't know its 'src' size, it may read an
+ * unknown amount of input, past input buffer bounds. Also, since match offsets
+ * are not validated, match reads from 'src' may underflow too. These issues
+ * never happen if input (compressed) data is correct. But they may happen if
+ * input data is invalid (error or intentional tampering). As a consequence, use
+ * these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using "
+               "LZ4_decompress_safe_partial() instead")
+LZ4LIB_API int LZ4_decompress_fast(const char *src, char *dst,
+                                   int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider migrating "
+               "towards LZ4_decompress_safe_continue() instead. "
+               "Note that the contract will change (requires block's "
+               "compressed size, instead of decompressed size)")
+LZ4LIB_API int
+LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+                             const char *src, char *dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using "
+               "LZ4_decompress_safe_partial_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict(const char *src, char *dst,
+                                             int originalSize,
+                                             const char *dictStart,
+                                             int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream(LZ4_stream_t *streamPtr);
+
+#endif /* LZ4_H_98237428734687 */
 
 #if defined(__cplusplus)
 }