From e007e42a23b53e798a212fd254ecf7d2d913b267 Mon Sep 17 00:00:00 2001
From: Randy Jones <randy@madronalabs.com>
Date: Tue, 21 Nov 2023 14:16:08 -0800
Subject: [PATCH] cleanup int -> size_t, fix theoretical UB in PhasorGen

---
 source/DSP/MLDSPBuffer.h     |  4 +--
 source/DSP/MLDSPGens.h       | 51 ++++++++++++++----------------------
 source/DSP/MLDSPMath.h       |  3 ++-
 source/DSP/MLDSPMathSSE.h    | 10 +++++++
 source/DSP/MLDSPOps.h        |  1 +
 source/DSP/MLDSPScalarMath.h |  2 +-
 6 files changed, 36 insertions(+), 35 deletions(-)
diff --git a/source/DSP/MLDSPBuffer.h b/source/DSP/MLDSPBuffer.h
index 505cf802..bbef5111 100644
--- a/source/DSP/MLDSPBuffer.h
+++ b/source/DSP/MLDSPBuffer.h
@@ -105,8 +105,8 @@ class DSPBuffer
   {
     mReadIndex = mWriteIndex = 0;
 
-    int sizeBits = ml::bitsToContain(sizeInSamples);
-    mSize = std::max(1 << sizeBits, kFloatsPerDSPVector);
+    size_t sizeBits = ml::bitsToContain(sizeInSamples);
+    mSize = std::max(1UL << sizeBits, kFloatsPerDSPVector);
 
     try
     {
diff --git a/source/DSP/MLDSPGens.h b/source/DSP/MLDSPGens.h
index 2670aa37..54fa8184 100644
--- a/source/DSP/MLDSPGens.h
+++ b/source/DSP/MLDSPGens.h
@@ -176,23 +176,20 @@ class TestSineGen
 // it outputs a phasor with range from 0--1.
 class PhasorGen
 {
-  int32_t mOmega32{0};
-
- public:
+  uint32_t mOmega32{0};
+  
+public:
   void clear(int32_t omega = 0) { mOmega32 = omega; }
-
+  
+  static constexpr float stepsPerCycle{static_cast<float>(const_math::pow(2., 32))};
+  static constexpr float cyclesPerStep{1.f / stepsPerCycle};
+  
   DSPVector operator()(const DSPVector cyclesPerSample)
   {
-    constexpr float range(1.0f);
-    constexpr float offset(0.5f);
-    constexpr float stepsPerCycle(static_cast<float>(const_math::pow(2., 32)));
-    constexpr float cyclesPerStep(1.f / stepsPerCycle);
-    DSPVector outputScaleV(range * cyclesPerStep);
-
     // calculate int steps per sample
     DSPVector stepsPerSampleV = cyclesPerSample * DSPVector(stepsPerCycle);
     DSPVectorInt intStepsPerSampleV = roundFloatToInt(stepsPerSampleV);
-
+    
     // accumulate 32-bit phase with wrap
     DSPVectorInt omega32V;
     for (int n = 0; n < kIntsPerDSPVector; ++n)
@@ -200,34 +197,29 @@ class PhasorGen
       mOmega32 += intStepsPerSampleV[n];
       omega32V[n] = mOmega32;
     }
-
+    
     // convert counter to float output range
-    DSPVector omegaV = intToFloat(omega32V) * outputScaleV + DSPVector(offset);
-    return omegaV;
+    return unsignedIntToFloat(omega32V) * DSPVector(cyclesPerStep);
   }
 };
 
-
 // OneShotGen, when triggered, makes a single ramp from 0-1 then resets to 0. The speed
 // of the ramp is a signal input, giving a ramp with the same speed as PhasorGen.
 class OneShotGen
 {
-  static constexpr int32_t start = std::numeric_limits<int32_t>::min();
-  int32_t mOmega32{start};
-  int32_t mGate{0};
-  int32_t mOmegaPrev{start};
+  static constexpr uint32_t start = 0;
+  uint32_t mOmega32{start};
+  uint32_t mGate{0};
+  uint32_t mOmegaPrev{start};
   
 public:
   void trigger() { mOmega32 = mOmegaPrev = start; mGate = 1; }
   
+  static constexpr float stepsPerCycle{static_cast<float>(const_math::pow(2., 32))};
+  static constexpr float cyclesPerStep{1.f / stepsPerCycle};
+
   DSPVector operator()(const DSPVector cyclesPerSample)
   {
-    constexpr float range(1.0f);
-    constexpr float offset(0.5f);
-    constexpr float stepsPerCycle(static_cast<float>(const_math::pow(2., 32)));
-    constexpr float cyclesPerStep(1.f / stepsPerCycle);
-    DSPVector outputScaleV(range * cyclesPerStep);
-    
     // calculate int steps per sample
     DSPVector stepsPerSampleV = cyclesPerSample * DSPVector(stepsPerCycle);
     DSPVectorInt intStepsPerSampleV = roundFloatToInt(stepsPerSampleV);
@@ -245,11 +237,8 @@ class OneShotGen
       }
       omega32V[n] = mOmegaPrev = mOmega32;
     }
-    
     // convert counter to float output range
-    DSPVector omegaV = intToFloat(omega32V) * outputScaleV + DSPVector(offset);
-    
-    return omegaV;
+    return unsignedIntToFloat(omega32V) * DSPVector(cyclesPerStep);
   }
 };
 
@@ -346,8 +335,8 @@ class SineGen
 {
   static constexpr int32_t kZeroPhase = -(2 << 29);
   PhasorGen _phasor;
-
- public:
+  
+public:
   void clear() { _phasor.clear(kZeroPhase); }
   DSPVector operator()(const DSPVector freq) { return phasorToSine(_phasor(freq)); }
 };
diff --git a/source/DSP/MLDSPMath.h b/source/DSP/MLDSPMath.h
index 69aa7e51..c2ae3b63 100644
--- a/source/DSP/MLDSPMath.h
+++ b/source/DSP/MLDSPMath.h
@@ -5,7 +5,8 @@
 #pragma once
 
 // Here is the DSP vector size, an important constant.
-constexpr int kFloatsPerDSPVector = 64;
+constexpr size_t kFloatsPerDSPVectorBits = 6;
+constexpr size_t kFloatsPerDSPVector = 1 << kFloatsPerDSPVectorBits;
 
 // Load definitions for low-level SIMD math.
 // These must define SIMDVectorFloat, SIMDVectorInt, their sizes, and a bunch of
diff --git a/source/DSP/MLDSPMathSSE.h b/source/DSP/MLDSPMathSSE.h
index 65af29eb..fbe03e72 100644
--- a/source/DSP/MLDSPMathSSE.h
+++ b/source/DSP/MLDSPMathSSE.h
@@ -123,6 +123,16 @@ inline bool isSIMDAligned(float* p)
 #define vecFloatToIntTruncate _mm_cvttps_epi32
 #define vecIntToFloat _mm_cvtepi32_ps
 
+// _mm_cvtepi32_ps approximation for unsigned int data
+// this loses a bit of precision
+inline SIMDVectorFloat vecUnsignedIntToFloat(SIMDVectorInt v)
+{
+  __m128i v_hi = _mm_srli_epi32(v, 1);
+  __m128 v_hi_flt = _mm_cvtepi32_ps(v_hi);
+  return _mm_add_ps(v_hi_flt, v_hi_flt);
+}
+
+
 #define vecAddInt _mm_add_epi32
 #define vecSubInt _mm_sub_epi32
 #define vecSet1Int _mm_set1_epi32
diff --git a/source/DSP/MLDSPOps.h b/source/DSP/MLDSPOps.h
index aa28e822..f30e68cd 100644
--- a/source/DSP/MLDSPOps.h
+++ b/source/DSP/MLDSPOps.h
@@ -821,6 +821,7 @@ DEFINE_OP1_F2I(truncateFloatToInt, (VecI2F(vecFloatToIntTruncate(x))));
   }
 
 DEFINE_OP1_I2F(intToFloat, (vecIntToFloat(x)));
+DEFINE_OP1_I2F(unsignedIntToFloat, (vecUnsignedIntToFloat(x)));
 
 // ----------------------------------------------------------------
 // using the conversions above, define fractionalPart
diff --git a/source/DSP/MLDSPScalarMath.h b/source/DSP/MLDSPScalarMath.h
index b718052c..79b6b9d9 100644
--- a/source/DSP/MLDSPScalarMath.h
+++ b/source/DSP/MLDSPScalarMath.h
@@ -30,7 +30,7 @@ constexpr float kMinGain = 0.00001f;  // 10e-5 = -120dB
 typedef float MLSample;
 
 // return the exponent of the smallest power of 2 that is >= x.
-inline int bitsToContain(int x)
+inline size_t bitsToContain(int x)
 {
   int exp;
   for (exp = 0; (1 << exp) < x; exp++)