From b438131507adb3c3be068f9efe08543ee47c735b Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 5 Aug 2017 13:27:43 -0400
Subject: [PATCH 001/204] Support for ESP32

Credit to Rina Shkrabova for the first cut.
---
 fastled_delay.h                          |   7 +
 led_sysdefs.h                            |   2 +
 platforms.h                              |   2 +
 platforms/esp/32/clockless_block_esp32.h | 168 +++++++++++++++++++++++
 platforms/esp/32/clockless_esp32.h       | 125 +++++++++++++++++
 platforms/esp/32/fastled_esp32.h         |   7 +
 platforms/esp/32/fastpin_esp32.h         |  92 +++++++++++++
 platforms/esp/32/led_sysdefs_esp32.h     |  33 +++++
 8 files changed, 436 insertions(+)
 create mode 100644 platforms/esp/32/clockless_block_esp32.h
 create mode 100644 platforms/esp/32/clockless_esp32.h
 create mode 100644 platforms/esp/32/fastled_esp32.h
 create mode 100644 platforms/esp/32/fastpin_esp32.h
 create mode 100644 platforms/esp/32/led_sysdefs_esp32.h
diff --git a/fastled_delay.h b/fastled_delay.h
index f16d322e2f..cfc7882ff5 100644
--- a/fastled_delay.h
+++ b/fastled_delay.h
@@ -33,6 +33,13 @@ template<int WAIT> class CMinWait {
 ////////////////////////////////////////////////////////////////////////////////////////////
 
 // Default is now just 'nop', with special case for AVR
+
+// ESP32 core has it's own definition of NOP, so undef it first
+#ifdef ESP32
+#undef NOP
+#undef NOP2
+#endif
+
 #if defined(__AVR__)
 #  define NOP __asm__ __volatile__ ("cp r0,r0\n");
 #  define NOP2 __asm__ __volatile__ ("rjmp .+0");
diff --git a/led_sysdefs.h b/led_sysdefs.h
index 57faad2fa0..93d878ac58 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -25,6 +25,8 @@
 #include "platforms/arm/d21/led_sysdefs_arm_d21.h"
 #elif defined(ESP8266)
 #include "platforms/esp/8266/led_sysdefs_esp8266.h"
+#elif defined(ESP32)
+#include "platforms/esp/32/led_sysdefs_esp32.h"
 #else
 // AVR platforms
 #include "platforms/avr/led_sysdefs_avr.h"
diff --git a/platforms.h b/platforms.h
index 7216de7c48..9fb4fcb940 100644
--- a/platforms.h
+++ b/platforms.h
@@ -25,6 +25,8 @@
 #include "platforms/arm/d21/fastled_arm_d21.h"
 #elif defined(ESP8266)
 #include "platforms/esp/8266/fastled_esp8266.h"
+#elif defined(ESP32)
+#include "platforms/esp/32/fastled_esp32.h"
 #else
 // AVR platforms
 #include "platforms/avr/fastled_avr.h"
diff --git a/platforms/esp/32/clockless_block_esp32.h b/platforms/esp/32/clockless_block_esp32.h
new file mode 100644
index 0000000000..8ab5807af8
--- /dev/null
+++ b/platforms/esp/32/clockless_block_esp32.h
@@ -0,0 +1,168 @@
+#ifndef __INC_CLOCKLESS_BLOCK_ESP8266_H
+#define __INC_CLOCKLESS_BLOCK_ESP8266_H
+
+#define FASTLED_HAS_BLOCKLESS 1
+
+#define PORT_MASK (((1<<LANES)-1) & 0x0000FFFFL)
+#define MIN(X,Y) (((X)<(Y)) ? (X):(Y))
+#define USED_LANES (MIN(LANES,4))
+#define REAL_FIRST_PIN 12
+#define LAST_PIN (12 + USED_LANES - 1)
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+extern uint32_t _frame_cnt;
+extern uint32_t _retry_cnt;
+#endif
+
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
+    typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPin<FIRST_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
+public:
+    virtual int size() { return CLEDController::size() * LANES; }
+
+    virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
+	// mWait.wait();
+	/*uint32_t clocks = */
+	int cnt=FASTLED_INTERRUPT_RETRY_COUNT;
+	while(!showRGBInternal(pixels) && cnt--) {
+	    ets_intr_unlock();
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	    _retry_cnt++;
+#endif
+	    delayMicroseconds(WAIT_TIME * 10);
+	    ets_intr_lock();
+	}
+	// #if FASTLED_ALLOW_INTTERUPTS == 0
+	// Adjust the timer
+	// long microsTaken = CLKS_TO_MICROS(clocks);
+	// MS_COUNTER += (1 + (microsTaken / 1000));
+	// #endif
+	
+	// mWait.mark();
+    }
+
+    template<int PIN> static void initPin() {
+	if(PIN >= REAL_FIRST_PIN && PIN <= LAST_PIN) {
+	    _ESPPIN<PIN, 1<<(PIN & 0xFF)>::setOutput();
+	    // FastPin<PIN>::setOutput();
+	}
+    }
+
+    virtual void init() {
+	// Only supportd on pins 12-15
+        // SZG: This probably won't work (check pins definitions in fastpin_esp32)
+	initPin<12>();
+	initPin<13>();
+	initPin<14>();
+	initPin<15>();
+	mPinMask = FastPin<FIRST_PIN>::mask();
+	mPort = FastPin<FIRST_PIN>::port();
+	
+	// Serial.print("Mask is "); Serial.println(PORT_MASK);
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+    
+    typedef union {
+	uint8_t bytes[8];
+	uint16_t shorts[4];
+	uint32_t raw[2];
+    } Lines;
+
+#define ESP_ADJUST 0 // (2*(F_CPU/24000000))
+#define ESP_ADJUST2 0
+    template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+	Lines b2 = b;
+	transpose8x1_noinline(b.bytes,b2.bytes);
+	
+	register uint8_t d = pixels.template getd<PX>(pixels);
+	register uint8_t scale = pixels.template getscale<PX>(pixels);
+	
+	for(register uint32_t i = 0; i < USED_LANES; i++) {
+	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
+	    last_mark = __clock_cycles();
+	    *FastPin<FIRST_PIN>::sport() = PORT_MASK << REAL_FIRST_PIN;
+	    
+	    uint32_t nword = ((uint32_t)(~b2.bytes[7-i]) & PORT_MASK) << REAL_FIRST_PIN;
+	    while((__clock_cycles() - last_mark) < (T1-6));
+	    *FastPin<FIRST_PIN>::cport() = nword;
+	    
+	    while((__clock_cycles() - last_mark) < (T1+T2));
+	    *FastPin<FIRST_PIN>::cport() = PORT_MASK << REAL_FIRST_PIN;
+	    
+	    b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+	}
+
+	for(register uint32_t i = USED_LANES; i < 8; i++) {
+	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
+	    last_mark = __clock_cycles();
+	    *FastPin<FIRST_PIN>::sport() = PORT_MASK << REAL_FIRST_PIN;
+	    
+	    uint32_t nword = ((uint32_t)(~b2.bytes[7-i]) & PORT_MASK) << REAL_FIRST_PIN;
+	    while((__clock_cycles() - last_mark) < (T1-6));
+	    *FastPin<FIRST_PIN>::cport() = nword;
+	    
+	    while((__clock_cycles() - last_mark) < (T1+T2));
+	    *FastPin<FIRST_PIN>::cport() = PORT_MASK << REAL_FIRST_PIN;
+	}
+    }
+
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+	
+	// Setup the pixel controller and load/scale the first byte
+	Lines b0;
+	
+	for(int i = 0; i < USED_LANES; i++) {
+	    b0.bytes[i] = allpixels.loadAndScale0(i);
+	}
+	allpixels.preStepFirstByteDithering();
+	
+	ets_intr_lock();
+	uint32_t _start = __clock_cycles();
+	uint32_t last_mark = _start;
+	
+	while(allpixels.has(1)) {
+	    // Write first byte, read next byte
+	    writeBits<8+XTRA0,1>(last_mark, b0, allpixels);
+	    
+	    // Write second byte, read 3rd byte
+	    writeBits<8+XTRA0,2>(last_mark, b0, allpixels);
+	    allpixels.advanceData();
+	    
+	    // Write third byte
+	    writeBits<8+XTRA0,0>(last_mark, b0, allpixels);
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_unlock();
+#endif
+	    
+	    allpixels.stepDithering();
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_lock();
+	    // if interrupts took longer than 45µs, punt on the current frame
+	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
+		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { ets_intr_unlock(); return 0; }
+	    }
+#endif
+	};
+	
+	ets_intr_unlock();
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	_frame_cnt++;
+#endif
+	return __clock_cycles() - _start;
+    }
+};
+
+FASTLED_NAMESPACE_END
+#endif
diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
new file mode 100644
index 0000000000..0ed9224b0f
--- /dev/null
+++ b/platforms/esp/32/clockless_esp32.h
@@ -0,0 +1,125 @@
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+extern uint32_t _frame_cnt;
+extern uint32_t _retry_cnt;
+#endif
+
+// Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+
+    typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
+public:
+    virtual void init() {
+	FastPin<DATA_PIN>::setOutput();
+	mPinMask = FastPin<DATA_PIN>::mask();
+	mPort = FastPin<DATA_PIN>::port();
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+	mWait.wait();
+	int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
+	while((showRGBInternal(pixels)==0) && cnt--) {
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	    _retry_cnt++;
+#endif
+	    ets_intr_unlock();
+	    // interrupts();
+	    delayMicroseconds(WAIT_TIME);
+	    ets_intr_lock();
+	    // noInterrupts();
+	}
+	// ets_intr_unlock();
+	mWait.mark();
+    }
+
+#define _ESP_ADJ (0)
+#define _ESP_ADJ2 (0)
+
+    template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b) {
+	b = ~b; b <<= 24;
+	for(register uint32_t i = BITS; i > 0; i--) {
+	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
+	    last_mark = __clock_cycles();
+	    FastPin<DATA_PIN>::hi();
+	    
+	    while((__clock_cycles() - last_mark) < T1);
+	    if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
+	    b <<= 1;
+	    
+	    while((__clock_cycles() - last_mark) < (T1+T2));
+	    FastPin<DATA_PIN>::lo();
+	}
+    }
+
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+	// Setup the pixel controller and load/scale the first byte
+	pixels.preStepFirstByteDithering();
+	register uint32_t b = pixels.loadAndScale0();
+	pixels.preStepFirstByteDithering();
+	ets_intr_lock();
+	// noInterrupts();
+	uint32_t start = __clock_cycles();
+	uint32_t last_mark = start;
+	while(pixels.has(1)) {
+	    // Write first byte, read next byte
+	    writeBits<8+XTRA0>(last_mark, b);
+	    b = pixels.loadAndScale1();
+	    
+	    // Write second byte, read 3rd byte
+	    writeBits<8+XTRA0>(last_mark, b);
+	    b = pixels.loadAndScale2();
+	    
+	    // Write third byte, read 1st byte of next pixel
+	    writeBits<8+XTRA0>(last_mark, b);
+	    b = pixels.advanceAndLoadAndScale0();
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_unlock();
+	    // interrupts();
+#endif
+	    
+	    pixels.stepDithering();
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_lock();
+	    // noInterrupts();
+	    // if interrupts took longer than 45µs, punt on the current frame
+	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
+		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { sei(); return 0; }
+	    }
+#endif
+	};
+
+	ets_intr_unlock();
+	// interrupts();
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	_frame_cnt++;
+#endif
+	return __clock_cycles() - start;
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/fastled_esp32.h b/platforms/esp/32/fastled_esp32.h
new file mode 100644
index 0000000000..2dcbe2df40
--- /dev/null
+++ b/platforms/esp/32/fastled_esp32.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "bitswap.h"
+#include "fastled_delay.h"
+#include "fastpin_esp32.h"
+#include "clockless_esp32.h"
+// #include "clockless_block_esp32.h"
diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
new file mode 100644
index 0000000000..9aa4ebe76a
--- /dev/null
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -0,0 +1,92 @@
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+struct FASTLED_ESP_IO {
+  volatile uint32_t _GPO;
+  volatile uint32_t _GPOS;
+  volatile uint32_t _GPOC;
+};
+
+#define _GPB0 (*(FASTLED_ESP_IO*)(GPIO_OUT_REG))
+// #define _GPB0 (*(FASTLED_ESP_IO*)(DR_REG_GPIO_BASE))
+// #define _GPB1 (*(FASTLED_ESP_IO*)(0x3ff44010))
+//THERE'S a second register for pins 32-39 (33 for outputs) but let's get one working first
+#define OUTPUT_PIN_LIMIT 31
+
+
+template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
+
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); }
+  inline static void setInput() { pinMode(PIN, INPUT); }
+
+  inline static void hi() __attribute__ ((always_inline)) { if(PIN < OUTPUT_PIN_LIMIT) { _GPB0._GPOS = MASK; } }
+  // inline static void hi() __attribute__ ((always_inline)) { gpio_set_level((gpio_num_t)PIN, HIGH); }
+
+  inline static void lo() __attribute__ ((always_inline)) { if (PIN < OUTPUT_PIN_LIMIT){ _GPB0._GPOC = MASK; } }
+  // inline static void lo() __attribute__ ((always_inline)) { gpio_set_level((gpio_num_t)PIN, LOW); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { if (PIN < OUTPUT_PIN_LIMIT){ _GPB0._GPO = val; }}
+  // inline static void set(register port_t val) __attribute__ ((always_inline)) { gpio_set_level((gpio_num_t)PIN, val); }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { if (PIN < OUTPUT_PIN_LIMIT){ _GPB0._GPO = MASK; } }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) { return GPIO_OUT_REG | MASK;    }}
+  inline static port_t loval() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) { return GPIO_OUT_REG & ~MASK;   }}
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { if(PIN<OUTPUT_PIN_LIMIT) { return &_GPB0._GPO;   }}
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) {return &_GPB0._GPOS; }}
+  inline static port_ptr_t cport() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) {return &_GPB0._GPOC; }}
+  inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
+
+  inline static bool isset() __attribute__ ((always_inline)) { return (0x004 & MASK); }
+};
+
+#define _DEFPIN_ESP32(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
+
+
+#ifdef FASTLED_ESP32_RAW_PIN_ORDER
+
+_DEFPIN_ESP32(0,0); _DEFPIN_ESP32(1,1); _DEFPIN_ESP32(2,2); 
+_DEFPIN_ESP32(3,3); _DEFPIN_ESP32(4,4); _DEFPIN_ESP32(5,5); 
+
+// -- These are not safe to use:
+// _DEFPIN_ESP32(6,6); _DEFPIN_ESP32(7,7); _DEFPIN_ESP32(8,8); 
+// _DEFPIN_ESP32(9,9); _DEFPIN_ESP32(10,10); _DEFPIN_ESP32(11,11); 
+
+_DEFPIN_ESP32(12,12); _DEFPIN_ESP32(13,13);
+_DEFPIN_ESP32(14,14); _DEFPIN_ESP32(15,15); _DEFPIN_ESP32(16,16);
+_DEFPIN_ESP32(17,17); _DEFPIN_ESP32(18,18); _DEFPIN_ESP32(19,19);
+
+// No pin 20 : _DEFPIN_ESP32(20,20); 
+
+_DEFPIN_ESP32(21,21); _DEFPIN_ESP32(22,22); _DEFPIN_ESP32(23,23); 
+
+// No pin 24 : _DEFPIN_ESP32(24,24); 
+
+_DEFPIN_ESP32(25,25); _DEFPIN_ESP32(26,26); _DEFPIN_ESP32(27,27); 
+
+// No pin 28-31: _DEFPIN_ESP32(28,28); _DEFPIN_ESP32(29,29); _DEFPIN_ESP32(30,30); _DEFPIN_ESP32(31,31);
+
+// Need special handling for pins > 31
+// _DEFPIN_ESP32(32,32); _DEFPIN_ESP32(33,33);
+
+#define PORTA_FIRST_PIN 32
+// The rest of the pins - these are generally not available
+// _DEFPIN_ESP32(11,6);
+// _DEFPIN_ESP32(12,7); _DEFPIN_ESP32(13,8); _DEFPIN_ESP32(14,9); _DEFPIN_ESP32(15,10);
+// _DEFPIN_ESP32(16,11);
+
+#endif
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#define FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/led_sysdefs_esp32.h b/platforms/esp/32/led_sysdefs_esp32.h
new file mode 100644
index 0000000000..68e782398e
--- /dev/null
+++ b/platforms/esp/32/led_sysdefs_esp32.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#ifndef ESP32
+#define ESP32
+#endif
+
+#define FASTLED_ESP32
+
+// Use system millis timer
+#define FASTLED_HAS_MILLIS
+
+typedef volatile uint32_t RoReg;
+typedef volatile uint32_t RwReg;
+typedef unsigned long prog_uint32_t;
+typedef bool boolean;
+
+// Default to NOT using PROGMEM here
+#ifndef FASTLED_USE_PROGMEM
+# define FASTLED_USE_PROGMEM 0
+#endif
+
+#ifndef FASTLED_ALLOW_INTERRUPTS
+# define FASTLED_ALLOW_INTERRUPTS 1
+# define INTERRUPT_THRESHOLD 0
+#endif
+
+#define NEED_CXX_BITS
+
+// These can be overridden
+#   define FASTLED_ESP32_RAW_PIN_ORDER
+
+// #define cli() os_intr_lock();
+// #define sei() os_intr_lock();

From b1ffc707a1a7075840018ba44617be21bbccf568 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 28 Sep 2017 22:36:12 -0400
Subject: [PATCH 002/204] Clean up interrupt handling

I think there was actually an error in the interrupt enabling/disabling, but I also cleaned it up so that it is more clear how interrupts are handled.
---
 platforms/esp/32/clockless_esp32.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 0ed9224b0f..10c0ae0dda 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -43,13 +43,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 	    _retry_cnt++;
 #endif
-	    ets_intr_unlock();
-	    // interrupts();
 	    delayMicroseconds(WAIT_TIME);
-	    ets_intr_lock();
-	    // noInterrupts();
 	}
-	// ets_intr_unlock();
 	mWait.mark();
     }
 
@@ -79,11 +74,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	pixels.preStepFirstByteDithering();
 	register uint32_t b = pixels.loadAndScale0();
 	pixels.preStepFirstByteDithering();
+
+#if (FASTLED_ALLOW_INTERRUPTS == 0)
 	ets_intr_lock();
-	// noInterrupts();
+#endif
+
 	uint32_t start = __clock_cycles();
 	uint32_t last_mark = start;
 	while(pixels.has(1)) {
+
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_lock();
+#endif
+
 	    // Write first byte, read next byte
 	    writeBits<8+XTRA0>(last_mark, b);
 	    b = pixels.loadAndScale1();
@@ -96,15 +99,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	    writeBits<8+XTRA0>(last_mark, b);
 	    b = pixels.advanceAndLoadAndScale0();
 	    
-#if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_unlock();
-	    // interrupts();
-#endif
-	    
 	    pixels.stepDithering();
 	    
 #if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_lock();
+	    ets_intr_unlock();
 	    // noInterrupts();
 	    // if interrupts took longer than 45µs, punt on the current frame
 	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
@@ -113,8 +111,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #endif
 	};
 
+#if (FASTLED_ALLOW_INTERRUPTS == 0)
 	ets_intr_unlock();
-	// interrupts();
+#endif
+
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 	_frame_cnt++;
 #endif

From b94a63ec95f78d47f13ce7e34bb5f376882eaa8b Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 2 Oct 2017 21:28:04 -0400
Subject: [PATCH 003/204] Better interrupt handling

---
 platforms/esp/32/clockless_esp32.h   | 23 ++++++++++++-----------
 platforms/esp/32/led_sysdefs_esp32.h |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 10c0ae0dda..605ba28530 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -43,7 +43,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 	    _retry_cnt++;
 #endif
+	    ets_intr_unlock();
 	    delayMicroseconds(WAIT_TIME);
+	    ets_intr_lock();
 	}
 	mWait.mark();
     }
@@ -75,18 +77,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	register uint32_t b = pixels.loadAndScale0();
 	pixels.preStepFirstByteDithering();
 
-#if (FASTLED_ALLOW_INTERRUPTS == 0)
 	ets_intr_lock();
-#endif
 
 	uint32_t start = __clock_cycles();
 	uint32_t last_mark = start;
 	while(pixels.has(1)) {
 
-#if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_lock();
-#endif
-
 	    // Write first byte, read next byte
 	    writeBits<8+XTRA0>(last_mark, b);
 	    b = pixels.loadAndScale1();
@@ -99,25 +95,30 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	    writeBits<8+XTRA0>(last_mark, b);
 	    b = pixels.advanceAndLoadAndScale0();
 	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_unlock();	    
+#endif
+
 	    pixels.stepDithering();
 	    
 #if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_unlock();
-	    // noInterrupts();
+	    ets_intr_lock();
 	    // if interrupts took longer than 45µs, punt on the current frame
 	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
-		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { sei(); return 0; }
+		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) {
+		    ets_intr_unlock();
+		    return 0; 
+		}
 	    }
 #endif
 	};
 
-#if (FASTLED_ALLOW_INTERRUPTS == 0)
 	ets_intr_unlock();
-#endif
 
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 	_frame_cnt++;
 #endif
+
 	return __clock_cycles() - start;
     }
 };
diff --git a/platforms/esp/32/led_sysdefs_esp32.h b/platforms/esp/32/led_sysdefs_esp32.h
index 68e782398e..88d8e0cc72 100644
--- a/platforms/esp/32/led_sysdefs_esp32.h
+++ b/platforms/esp/32/led_sysdefs_esp32.h
@@ -20,7 +20,7 @@ typedef bool boolean;
 #endif
 
 #ifndef FASTLED_ALLOW_INTERRUPTS
-# define FASTLED_ALLOW_INTERRUPTS 1
+# define FASTLED_ALLOW_INTERRUPTS 0
 # define INTERRUPT_THRESHOLD 0
 #endif
 

From 01bf9cfa4e18a4683f568cba4347ee9e10e8a1c1 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 4 Oct 2017 21:05:39 -0400
Subject: [PATCH 004/204] Added RMT version

Not fully portable yet, though. The timing numbers are hard-wired for WS2812, and the RMT channel is also hard-wired.
---
 platforms/esp/32/clockless_esp32.h | 126 ++++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 3 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 605ba28530..3af09c1782 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -7,6 +7,19 @@ extern uint32_t _frame_cnt;
 extern uint32_t _retry_cnt;
 #endif
 
+#include <driver/rmt.h>
+
+// RMT Clock source is @ 80 MHz. Dividing it by 8 gives us 10 MHz frequency, or 100ns period.
+#define LED_STRIP_RMT_CLK_DIV (8)
+
+/****************************
+        WS2812 Timing
+****************************/
+#define LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812 9 // 900ns (900ns +/- 150ns per datasheet)
+#define LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812  3 // 300ns (350ns +/- 150ns per datasheet)
+#define LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812 3 // 300ns (350ns +/- 150ns per datasheet)
+#define LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812  9 // 900ns (900ns +/- 150ns per datasheet)
+
 // Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
 __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
   uint32_t cyc;
@@ -25,15 +38,49 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
     data_t mPinMask;
     data_ptr_t mPort;
     CMinWait<WAIT_TIME> mWait;
+
+    rmt_channel_t LED_RMT_CHANNEL;
+    rmt_config_t mRMT_config;
+    
 public:
     virtual void init() {
+	LED_RMT_CHANNEL = RMT_CHANNEL_0;
+
 	FastPin<DATA_PIN>::setOutput();
 	mPinMask = FastPin<DATA_PIN>::mask();
 	mPort = FastPin<DATA_PIN>::port();
+
+	mRMT_config.rmt_mode = RMT_MODE_TX;
+	mRMT_config.channel = LED_RMT_CHANNEL;
+	mRMT_config.clk_div = LED_STRIP_RMT_CLK_DIV;
+	mRMT_config.gpio_num = (gpio_num_t) DATA_PIN;
+	mRMT_config.mem_block_num = 1;
+
+	mRMT_config.tx_config.loop_en = false;
+	mRMT_config.tx_config.carrier_freq_hz = 100; // Not used, but has to be set to avoid divide by 0 err
+	mRMT_config.tx_config.carrier_duty_percent = 50;
+	mRMT_config.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+	mRMT_config.tx_config.carrier_en = false;
+	mRMT_config.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+	mRMT_config.tx_config.idle_output_en = true;
+
+	esp_err_t cfg_ok = rmt_config(&mRMT_config);
+	if (cfg_ok != ESP_OK) {
+	    Serial.println("RMT config failed");
+	    return;
+	}
+	esp_err_t install_ok = rmt_driver_install(mRMT_config.channel, 0, 0);
+	if (install_ok != ESP_OK) {
+	    Serial.println("RMT driver install failed");
+	    return;
+	}
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
+    // rmt_item32_t * rmt_items = 0;
+    // size_t num_rmt_items;
+
 protected:
 
     virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
@@ -43,9 +90,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 	    _retry_cnt++;
 #endif
-	    ets_intr_unlock();
+	    // ets_intr_unlock();
 	    delayMicroseconds(WAIT_TIME);
-	    ets_intr_lock();
+	    // ets_intr_lock();
 	}
 	mWait.mark();
     }
@@ -53,6 +100,79 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define _ESP_ADJ (0)
 #define _ESP_ADJ2 (0)
 
+    __attribute__ ((always_inline)) inline static void convertBit(rmt_item32_t * item, register uint32_t b) {
+	if (b & 0x80000000L) {
+	    item->level0 = 1;
+	    item->duration0 = LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812;
+	    item->level1 = 0;
+	    item->duration1 = LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812;
+	} else {
+	    item->level0 = 1;
+	    item->duration0 = LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812;
+	    item->level1 = 0;
+	    item->duration1 = LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812;
+	}
+    }
+    
+    uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+	
+	// -- Allocate the RMT buffer (this should really only be done once)
+	int num_rmt_items = (pixels.size() * 3 * 8);
+	rmt_item32_t * rmt_items = (rmt_item32_t*) malloc(sizeof(rmt_item32_t) * num_rmt_items);
+	int cur_item = 0;
+	
+	// Setup the pixel controller and load/scale the first byte
+	pixels.preStepFirstByteDithering();
+	register uint32_t b = pixels.loadAndScale0();
+	pixels.preStepFirstByteDithering();
+
+	uint32_t start = __clock_cycles();
+	while(pixels.has(1)) {
+
+	    // Write first byte, read next byte
+	    b <<= 24;
+	    for (register uint32_t i = 8; i > 0; i--) {
+		convertBit(&rmt_items[cur_item], b);
+		cur_item++;
+		b <<= 1;
+	    }		
+	    b = pixels.loadAndScale1();
+	    
+	    // Write second byte, read 3rd byte
+	    b <<= 24;
+	    for (register uint32_t i = 8; i > 0; i--) {
+		convertBit(&rmt_items[cur_item], b);
+		cur_item++;
+		b <<= 1;
+	    }		
+	    b = pixels.loadAndScale2();
+	    
+	    // Write third byte, read 1st byte of next pixel
+	    b <<= 24;
+	    for (register uint32_t i = 8; i > 0; i--) {
+		convertBit(&rmt_items[cur_item], b);
+		cur_item++;
+		b <<= 1;
+	    }		
+	    b = pixels.advanceAndLoadAndScale0();
+	    
+	    pixels.stepDithering();
+	    
+	};
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	_frame_cnt++;
+#endif
+
+	// -- Now, actually send the bits!
+	rmt_write_items(LED_RMT_CHANNEL, rmt_items, num_rmt_items, true);
+	free(rmt_items);
+	
+	return __clock_cycles() - start;
+    }
+
+    // -------------- OLD VERSION -------------------------------
+
     template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b) {
 	b = ~b; b <<= 24;
 	for(register uint32_t i = BITS; i > 0; i--) {
@@ -71,7 +191,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
     // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
     // gcc will use register Y for the this pointer.
-    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    static uint32_t showRGBInternalOLD(PixelController<RGB_ORDER> pixels) {
 	// Setup the pixel controller and load/scale the first byte
 	pixels.preStepFirstByteDithering();
 	register uint32_t b = pixels.loadAndScale0();

From 4c38836f243deaf931a779741f69e766bc822227 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 4 Oct 2017 21:52:05 -0400
Subject: [PATCH 005/204] Fixed the timing

Timing is now computed from T1, T2, amd T3 instead of being hard-wired.
---
 platforms/esp/32/clockless_esp32.h | 41 +++++++++++++++++-------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 3af09c1782..475d21d0ea 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -9,16 +9,13 @@ extern uint32_t _retry_cnt;
 
 #include <driver/rmt.h>
 
-// RMT Clock source is @ 80 MHz. Dividing it by 8 gives us 10 MHz frequency, or 100ns period.
-#define LED_STRIP_RMT_CLK_DIV (8)
+// RMT Clock source is @ 80 MHz. Dividing it by 4 gives us 20 MHz frequency, or 50ns period.
+#define LED_STRIP_RMT_CLK_DIV  4   /* 8 still seems to work, but timings become marginal */
+#define RMT_DURATION_NS       12.5 /* minimum time of a single RMT duration based on clock ns */
 
-/****************************
-        WS2812 Timing
-****************************/
-#define LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812 9 // 900ns (900ns +/- 150ns per datasheet)
-#define LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812  3 // 300ns (350ns +/- 150ns per datasheet)
-#define LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812 3 // 300ns (350ns +/- 150ns per datasheet)
-#define LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812  9 // 900ns (900ns +/- 150ns per datasheet)
+// These macros help us convert from ESP32 clock cycles to RMT "ticks"
+#define PERIOD  50  /* RMT_DURATION_NS * LED_STRIP_RMT_CLK_DIV */
+#define TO_NS(_CLKS) (((((long)(_CLKS)) * 1000 - 999) / F_CPU_MHZ))
 
 // Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
 __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
@@ -39,6 +36,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
     data_ptr_t mPort;
     CMinWait<WAIT_TIME> mWait;
 
+    uint16_t T0H, T1H, T0L, T1L;
     rmt_channel_t LED_RMT_CHANNEL;
     rmt_config_t mRMT_config;
     
@@ -50,6 +48,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	mPinMask = FastPin<DATA_PIN>::mask();
 	mPort = FastPin<DATA_PIN>::port();
 
+	// -- Compute the timing values
+	//    We are converting from ESP32 clock cycles (~4ns) to RMT peripheral ticks (12.5ns)
+	T0H = TO_NS(T1) / PERIOD;
+	T1H = TO_NS(T1 + T2) / PERIOD;
+	T0L = TO_NS(T2 + T3) / PERIOD;
+	T1L = TO_NS(T3) / PERIOD;
+
 	mRMT_config.rmt_mode = RMT_MODE_TX;
 	mRMT_config.channel = LED_RMT_CHANNEL;
 	mRMT_config.clk_div = LED_STRIP_RMT_CLK_DIV;
@@ -100,17 +105,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define _ESP_ADJ (0)
 #define _ESP_ADJ2 (0)
 
-    __attribute__ ((always_inline)) inline static void convertBit(rmt_item32_t * item, register uint32_t b) {
+    __attribute__ ((always_inline)) inline void convertBit(rmt_item32_t * item, register uint32_t b) {
 	if (b & 0x80000000L) {
 	    item->level0 = 1;
-	    item->duration0 = LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812;
+	    item->duration0 = T1H; // LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812;
 	    item->level1 = 0;
-	    item->duration1 = LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812;
+	    item->duration1 = T1L; // LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812;
 	} else {
 	    item->level0 = 1;
-	    item->duration0 = LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812;
+	    item->duration0 = T0H; // LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812;
 	    item->level1 = 0;
-	    item->duration1 = LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812;
+	    item->duration1 = T0L; // LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812;
 	}
     }
     
@@ -119,7 +124,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	// -- Allocate the RMT buffer (this should really only be done once)
 	int num_rmt_items = (pixels.size() * 3 * 8);
 	rmt_item32_t * rmt_items = (rmt_item32_t*) malloc(sizeof(rmt_item32_t) * num_rmt_items);
-	int cur_item = 0;
+	rmt_item32_t * cur_item = & rmt_items[0];
 	
 	// Setup the pixel controller and load/scale the first byte
 	pixels.preStepFirstByteDithering();
@@ -132,7 +137,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	    // Write first byte, read next byte
 	    b <<= 24;
 	    for (register uint32_t i = 8; i > 0; i--) {
-		convertBit(&rmt_items[cur_item], b);
+		convertBit(cur_item, b);
 		cur_item++;
 		b <<= 1;
 	    }		
@@ -141,7 +146,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	    // Write second byte, read 3rd byte
 	    b <<= 24;
 	    for (register uint32_t i = 8; i > 0; i--) {
-		convertBit(&rmt_items[cur_item], b);
+		convertBit(cur_item, b);
 		cur_item++;
 		b <<= 1;
 	    }		
@@ -150,7 +155,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	    // Write third byte, read 1st byte of next pixel
 	    b <<= 24;
 	    for (register uint32_t i = 8; i > 0; i--) {
-		convertBit(&rmt_items[cur_item], b);
+		convertBit(cur_item, b);
 		cur_item++;
 		b <<= 1;
 	    }		

From d9fdee63901e251feba1b63e2a06185ae4e58f82 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 5 Oct 2017 15:37:54 -0400
Subject: [PATCH 006/204] Better buffer management

The RMT signal is sent in 10-pixel chunks, using double-buffering to hide the latency when possible. Also: assign RMT channels sequentially.
---
 platforms/esp/32/clockless_esp32.h | 141 ++++++++++++++++++-----------
 1 file changed, 87 insertions(+), 54 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 475d21d0ea..a0f7e5b0ec 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -17,6 +17,11 @@ extern uint32_t _retry_cnt;
 #define PERIOD  50  /* RMT_DURATION_NS * LED_STRIP_RMT_CLK_DIV */
 #define TO_NS(_CLKS) (((((long)(_CLKS)) * 1000 - 999) / F_CPU_MHZ))
 
+#define RMT_MAX_WAIT 100  // Should really figure out how many ticks in 45us
+#define RMT_ITEMS_SIZE (20 * 3 * 8)  // Number of RMT items for 20 pixels -- 1840 bytes
+
+static int Next_RMT_Channel = 0;
+
 // Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
 __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
   uint32_t cyc;
@@ -36,13 +41,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
     data_ptr_t mPort;
     CMinWait<WAIT_TIME> mWait;
 
-    uint16_t T0H, T1H, T0L, T1L;
-    rmt_channel_t LED_RMT_CHANNEL;
-    rmt_config_t mRMT_config;
-    
+    uint16_t mT0H, mT1H, mT0L, mT1L;
+    rmt_channel_t mLED_RMT_CHANNEL;
+    rmt_config_t mRMT_config;    
+    rmt_item32_t mRMT_items[RMT_ITEMS_SIZE];
+
 public:
     virtual void init() {
-	LED_RMT_CHANNEL = RMT_CHANNEL_0;
+	// -- Assign RMT channels sequentially
+	if (Next_RMT_Channel > 7) {
+	    Serial.println("ERROR: Not enough RMT channels!");
+	}
+	mLED_RMT_CHANNEL = (rmt_channel_t) Next_RMT_Channel;
+	Next_RMT_Channel++;
 
 	FastPin<DATA_PIN>::setOutput();
 	mPinMask = FastPin<DATA_PIN>::mask();
@@ -50,13 +61,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 	// -- Compute the timing values
 	//    We are converting from ESP32 clock cycles (~4ns) to RMT peripheral ticks (12.5ns)
-	T0H = TO_NS(T1) / PERIOD;
-	T1H = TO_NS(T1 + T2) / PERIOD;
-	T0L = TO_NS(T2 + T3) / PERIOD;
-	T1L = TO_NS(T3) / PERIOD;
+	mT0H = TO_NS(T1) / PERIOD;
+	mT1H = TO_NS(T1 + T2) / PERIOD;
+	mT0L = TO_NS(T2 + T3) / PERIOD;
+	mT1L = TO_NS(T3) / PERIOD;
 
+	// -- Set up the RMT peripheral
 	mRMT_config.rmt_mode = RMT_MODE_TX;
-	mRMT_config.channel = LED_RMT_CHANNEL;
+	mRMT_config.channel = mLED_RMT_CHANNEL;
 	mRMT_config.clk_div = LED_STRIP_RMT_CLK_DIV;
 	mRMT_config.gpio_num = (gpio_num_t) DATA_PIN;
 	mRMT_config.mem_block_num = 1;
@@ -83,9 +95,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
-    // rmt_item32_t * rmt_items = 0;
-    // size_t num_rmt_items;
-
 protected:
 
     virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
@@ -105,73 +114,97 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define _ESP_ADJ (0)
 #define _ESP_ADJ2 (0)
 
-    __attribute__ ((always_inline)) inline void convertBit(rmt_item32_t * item, register uint32_t b) {
+    // -- convertBit
+    //    Translate a single bit into an RMT signal entry using the given timing variables
+    __attribute__ ((always_inline)) 
+    inline void convertBit(rmt_item32_t * item, register uint32_t b) 
+    {
 	if (b & 0x80000000L) {
 	    item->level0 = 1;
-	    item->duration0 = T1H; // LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812;
+	    item->duration0 = mT1H; // LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812;
 	    item->level1 = 0;
-	    item->duration1 = T1L; // LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812;
+	    item->duration1 = mT1L; // LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812;
 	} else {
 	    item->level0 = 1;
-	    item->duration0 = T0H; // LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812;
+	    item->duration0 = mT0H; // LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812;
 	    item->level1 = 0;
-	    item->duration1 = T0L; // LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812;
+	    item->duration1 = mT0L; // LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812;
 	}
     }
     
     uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
 	
-	// -- Allocate the RMT buffer (this should really only be done once)
-	int num_rmt_items = (pixels.size() * 3 * 8);
-	rmt_item32_t * rmt_items = (rmt_item32_t*) malloc(sizeof(rmt_item32_t) * num_rmt_items);
-	rmt_item32_t * cur_item = & rmt_items[0];
-	
+	int start_item = 0;
+	rmt_item32_t * cur_item = & mRMT_items[0];
+
 	// Setup the pixel controller and load/scale the first byte
 	pixels.preStepFirstByteDithering();
 	register uint32_t b = pixels.loadAndScale0();
 	pixels.preStepFirstByteDithering();
 
 	uint32_t start = __clock_cycles();
-	while(pixels.has(1)) {
-
-	    // Write first byte, read next byte
-	    b <<= 24;
-	    for (register uint32_t i = 8; i > 0; i--) {
-		convertBit(cur_item, b);
-		cur_item++;
-		b <<= 1;
-	    }		
-	    b = pixels.loadAndScale1();
-	    
-	    // Write second byte, read 3rd byte
-	    b <<= 24;
-	    for (register uint32_t i = 8; i > 0; i--) {
-		convertBit(cur_item, b);
-		cur_item++;
-		b <<= 1;
-	    }		
-	    b = pixels.loadAndScale2();
+	while (pixels.has(1)) {
+
+	    // -- Prepare a chunk of RMT items for no more than 10 pixels
+	    int num_items = 0;
+	    while (pixels.has(1) && (num_items < (RMT_ITEMS_SIZE/2))) {
+
+		// Write first byte, read next byte
+		b <<= 24;
+		for (register uint32_t i = 8; i > 0; i--) {
+		    convertBit(cur_item, b);
+		    cur_item++;
+		    num_items++;
+		    b <<= 1;
+		}
+		b = pixels.loadAndScale1();
 	    
-	    // Write third byte, read 1st byte of next pixel
-	    b <<= 24;
-	    for (register uint32_t i = 8; i > 0; i--) {
-		convertBit(cur_item, b);
-		cur_item++;
-		b <<= 1;
-	    }		
-	    b = pixels.advanceAndLoadAndScale0();
+		// Write second byte, read 3rd byte
+		b <<= 24;
+		for (register uint32_t i = 8; i > 0; i--) {
+		    convertBit(cur_item, b);
+		    cur_item++;
+		    num_items++;
+		    b <<= 1;
+		}
+		b = pixels.loadAndScale2();
 	    
-	    pixels.stepDithering();
+		// Write third byte, read 1st byte of next pixel
+		b <<= 24;
+		for (register uint32_t i = 8; i > 0; i--) {
+		    convertBit(cur_item, b);
+		    cur_item++;
+		    num_items++;
+		    b <<= 1;
+		}
+		b = pixels.advanceAndLoadAndScale0();
 	    
-	};
+		pixels.stepDithering();   
+	    }
+
+	    // -- Wait for the previous chunk of 10 items to finish
+	    rmt_wait_tx_done(mLED_RMT_CHANNEL, RMT_MAX_WAIT);
+
+	    // -- Send the new chunk
+	    rmt_write_items(mLED_RMT_CHANNEL, &mRMT_items[start_item], num_items, false);
+
+	    // -- Shift the window foward to compute the next chunk, wrapping around
+            //    as necessary
+	    start_item += num_items;
+	    if (start_item >= RMT_ITEMS_SIZE) start_item = 0;
+	    cur_item = & mRMT_items[start_item];
+	}
 
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 	_frame_cnt++;
 #endif
 
 	// -- Now, actually send the bits!
-	rmt_write_items(LED_RMT_CHANNEL, rmt_items, num_rmt_items, true);
-	free(rmt_items);
+	// rmt_write_items(mLED_RMT_CHANNEL, rmt_items, num_rmt_items, true);
+	// free(rmt_items);
+
+	// -- Wait for the last chunk of values to be sent
+	rmt_wait_tx_done(mLED_RMT_CHANNEL, RMT_MAX_WAIT);
 	
 	return __clock_cycles() - start;
     }

From 0bc8a61e57113314e9cfd5edbc2d6af204142ed7 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 6 Oct 2017 22:28:43 -0400
Subject: [PATCH 007/204] Total rewrite using Martin's code

---
 platforms/esp/32/clockless_esp32.h | 442 +++++++++++++----------------
 1 file changed, 201 insertions(+), 241 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index a0f7e5b0ec..114e8dcb21 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -1,283 +1,243 @@
+/*
+ * Integration into FastLED ClocklessController 2017 Thomas Basler
+ *
+ * Modifications Copyright (c) 2017 Martin F. Falatic
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
 #pragma once
 
 FASTLED_NAMESPACE_BEGIN
 
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-extern uint32_t _frame_cnt;
-extern uint32_t _retry_cnt;
+#ifdef __cplusplus
+extern "C" {
 #endif
 
-#include <driver/rmt.h>
-
-// RMT Clock source is @ 80 MHz. Dividing it by 4 gives us 20 MHz frequency, or 50ns period.
-#define LED_STRIP_RMT_CLK_DIV  4   /* 8 still seems to work, but timings become marginal */
-#define RMT_DURATION_NS       12.5 /* minimum time of a single RMT duration based on clock ns */
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
 
-// These macros help us convert from ESP32 clock cycles to RMT "ticks"
-#define PERIOD  50  /* RMT_DURATION_NS * LED_STRIP_RMT_CLK_DIV */
-#define TO_NS(_CLKS) (((((long)(_CLKS)) * 1000 - 999) / F_CPU_MHZ))
+#include "esp_log.h"
 
-#define RMT_MAX_WAIT 100  // Should really figure out how many ticks in 45us
-#define RMT_ITEMS_SIZE (20 * 3 * 8)  // Number of RMT items for 20 pixels -- 1840 bytes
+#ifdef __cplusplus
+}
+#endif
 
-static int Next_RMT_Channel = 0;
+#define DIVIDER             4 /* 8 still seems to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+#define RMT_DURATION_NS  12.5 /* minimum time of a single RMT duration based on clock ns */
 
-// Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
+#define CLKS_TO_NS(_CLKS) ((((long)(_CLKS)) * 1000 - 999) / F_CPU_MHZ)
 
 #define FASTLED_HAS_CLOCKLESS 1
 
+static uint8_t rmt_channels_used = 0;
+
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    rmt_item32_t mZero;
+    rmt_item32_t mOne;
+
+    rmt_channel_t mRMT_channel;
+    xSemaphoreHandle mTX_sem = NULL;
+    intr_handle_t mRMT_intr_handle = NULL;
+    
+    PixelController<RGB_ORDER> *local_pixels  = NULL;
+    uint16_t mRGB_channel;
+    uint16_t mCurPulse;
 
-    typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
-    typedef typename FastPin<DATA_PIN>::port_t data_t;
+public:
 
-    data_t mPinMask;
-    data_ptr_t mPort;
-    CMinWait<WAIT_TIME> mWait;
+    virtual void init()
+    {
+	// TRS = 50000;
+
+	// -- Precompute rmt items corresponding to a zero bit and a one bit
+	//    according to the timing values given in the template instantiation
+	mOne.level0 = 1;
+	mOne.duration0 = CLKS_TO_NS(T1 + T2) / (RMT_DURATION_NS * DIVIDER);
+	mOne.level1 = 0;
+	mOne.duration1 = CLKS_TO_NS(T3) / (RMT_DURATION_NS * DIVIDER);
+
+	mZero.level0 = 1;
+	mZero.duration0 = CLKS_TO_NS(T1) / (RMT_DURATION_NS * DIVIDER);
+	mZero.level1 = 0;
+	mZero.duration1 = CLKS_TO_NS(T2 + T3) / (RMT_DURATION_NS * DIVIDER);
+
+	// -- Sequentially assign RMT channels -- at most 8
+	mRMT_channel =  (rmt_channel_t) rmt_channels_used++;
+	if (mRMT_channel > 7) {
+	    assert("Only 8 RMT Channels are allowed");
+	}
 
-    uint16_t mT0H, mT1H, mT0L, mT1L;
-    rmt_channel_t mLED_RMT_CHANNEL;
-    rmt_config_t mRMT_config;    
-    rmt_item32_t mRMT_items[RMT_ITEMS_SIZE];
+	ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
 
-public:
-    virtual void init() {
-	// -- Assign RMT channels sequentially
-	if (Next_RMT_Channel > 7) {
-	    Serial.println("ERROR: Not enough RMT channels!");
-	}
-	mLED_RMT_CHANNEL = (rmt_channel_t) Next_RMT_Channel;
-	Next_RMT_Channel++;
-
-	FastPin<DATA_PIN>::setOutput();
-	mPinMask = FastPin<DATA_PIN>::mask();
-	mPort = FastPin<DATA_PIN>::port();
-
-	// -- Compute the timing values
-	//    We are converting from ESP32 clock cycles (~4ns) to RMT peripheral ticks (12.5ns)
-	mT0H = TO_NS(T1) / PERIOD;
-	mT1H = TO_NS(T1 + T2) / PERIOD;
-	mT0L = TO_NS(T2 + T3) / PERIOD;
-	mT1L = TO_NS(T3) / PERIOD;
-
-	// -- Set up the RMT peripheral
-	mRMT_config.rmt_mode = RMT_MODE_TX;
-	mRMT_config.channel = mLED_RMT_CHANNEL;
-	mRMT_config.clk_div = LED_STRIP_RMT_CLK_DIV;
-	mRMT_config.gpio_num = (gpio_num_t) DATA_PIN;
-	mRMT_config.mem_block_num = 1;
-
-	mRMT_config.tx_config.loop_en = false;
-	mRMT_config.tx_config.carrier_freq_hz = 100; // Not used, but has to be set to avoid divide by 0 err
-	mRMT_config.tx_config.carrier_duty_percent = 50;
-	mRMT_config.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-	mRMT_config.tx_config.carrier_en = false;
-	mRMT_config.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-	mRMT_config.tx_config.idle_output_en = true;
-
-	esp_err_t cfg_ok = rmt_config(&mRMT_config);
-	if (cfg_ok != ESP_OK) {
-	    Serial.println("RMT config failed");
-	    return;
-	}
-	esp_err_t install_ok = rmt_driver_install(mRMT_config.channel, 0, 0);
-	if (install_ok != ESP_OK) {
-	    Serial.println("RMT driver install failed");
-	    return;
-	}
+	// -- RMT set up magic
+	DPORT_SET_PERI_REG_MASK(DPORT_PERIP_CLK_EN_REG, DPORT_RMT_CLK_EN);
+	DPORT_CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN_REG, DPORT_RMT_RST);
+
+	rmt_set_pin(static_cast<rmt_channel_t>(mRMT_channel),
+		    RMT_MODE_TX,
+		    static_cast<gpio_num_t>(DATA_PIN));
+
+	RMT.apb_conf.fifo_mask = 1;  //enable memory access, instead of FIFO mode.
+	RMT.apb_conf.mem_tx_wrap_en = 1; //wrap around when hitting end of buffer
+	
+	RMT.conf_ch[mRMT_channel].conf0.div_cnt = DIVIDER;
+	RMT.conf_ch[mRMT_channel].conf0.mem_size = 1;
+	RMT.conf_ch[mRMT_channel].conf0.carrier_en = 0;
+	RMT.conf_ch[mRMT_channel].conf0.carrier_out_lv = 1;
+	RMT.conf_ch[mRMT_channel].conf0.mem_pd = 0;
+	RMT.conf_ch[mRMT_channel].conf1.rx_en = 0;
+	RMT.conf_ch[mRMT_channel].conf1.mem_owner = 0;
+	RMT.conf_ch[mRMT_channel].conf1.tx_conti_mode = 0;    //loop back mode.
+	RMT.conf_ch[mRMT_channel].conf1.ref_always_on = 1;    // use apb clock: 80M
+	RMT.conf_ch[mRMT_channel].conf1.idle_out_en = 1;
+	RMT.conf_ch[mRMT_channel].conf1.idle_out_lv = 0;
+		
+	RMT.tx_lim_ch[mRMT_channel].limit = MAX_PULSES;
+	
+	RMT.int_ena.val |= BIT(24 + mRMT_channel); // set ch*_tx_thr_event
+	RMT.int_ena.val |= BIT(mRMT_channel * 3); // set ch*_tx_end
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-	mWait.wait();
-	int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
-	while((showRGBInternal(pixels)==0) && cnt--) {
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	    _retry_cnt++;
-#endif
-	    // ets_intr_unlock();
-	    delayMicroseconds(WAIT_TIME);
-	    // ets_intr_lock();
-	}
-	mWait.mark();
-    }
-
-#define _ESP_ADJ (0)
-#define _ESP_ADJ2 (0)
-
-    // -- convertBit
-    //    Translate a single bit into an RMT signal entry using the given timing variables
-    __attribute__ ((always_inline)) 
-    inline void convertBit(rmt_item32_t * item, register uint32_t b) 
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
-	if (b & 0x80000000L) {
-	    item->level0 = 1;
-	    item->duration0 = mT1H; // LED_STRIP_RMT_TICKS_BIT_1_HIGH_WS2812;
-	    item->level1 = 0;
-	    item->duration1 = mT1L; // LED_STRIP_RMT_TICKS_BIT_1_LOW_WS2812;
-	} else {
-	    item->level0 = 1;
-	    item->duration0 = mT0H; // LED_STRIP_RMT_TICKS_BIT_0_HIGH_WS2812;
-	    item->level1 = 0;
-	    item->duration1 = mT0L; // LED_STRIP_RMT_TICKS_BIT_0_LOW_WS2812;
-	}
-    }
-    
-    uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-	
-	int start_item = 0;
-	rmt_item32_t * cur_item = & mRMT_items[0];
-
-	// Setup the pixel controller and load/scale the first byte
-	pixels.preStepFirstByteDithering();
-	register uint32_t b = pixels.loadAndScale0();
-	pixels.preStepFirstByteDithering();
-
-	uint32_t start = __clock_cycles();
-	while (pixels.has(1)) {
-
-	    // -- Prepare a chunk of RMT items for no more than 10 pixels
-	    int num_items = 0;
-	    while (pixels.has(1) && (num_items < (RMT_ITEMS_SIZE/2))) {
-
-		// Write first byte, read next byte
-		b <<= 24;
-		for (register uint32_t i = 8; i > 0; i--) {
-		    convertBit(cur_item, b);
-		    cur_item++;
-		    num_items++;
-		    b <<= 1;
-		}
-		b = pixels.loadAndScale1();
-	    
-		// Write second byte, read 3rd byte
-		b <<= 24;
-		for (register uint32_t i = 8; i > 0; i--) {
-		    convertBit(cur_item, b);
-		    cur_item++;
-		    num_items++;
-		    b <<= 1;
-		}
-		b = pixels.loadAndScale2();
-	    
-		// Write third byte, read 1st byte of next pixel
-		b <<= 24;
-		for (register uint32_t i = 8; i > 0; i--) {
-		    convertBit(cur_item, b);
-		    cur_item++;
-		    num_items++;
-		    b <<= 1;
-		}
-		b = pixels.advanceAndLoadAndScale0();
-	    
-		pixels.stepDithering();   
-	    }
+	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
 
-	    // -- Wait for the previous chunk of 10 items to finish
-	    rmt_wait_tx_done(mLED_RMT_CHANNEL, RMT_MAX_WAIT);
+	// -- Initialize the local state, save a pointer to the pixel data
+	local_pixels = &pixels;
+	mCurPulse = 0;
+	mRGB_channel = 0;
+		
+	// -- Fill both halves of the buffer
+	copyToRmtBlock_half();
+	copyToRmtBlock_half();
 
-	    // -- Send the new chunk
-	    rmt_write_items(mLED_RMT_CHANNEL, &mRMT_items[start_item], num_items, false);
+	mTX_sem = xSemaphoreCreateBinary();
 
-	    // -- Shift the window foward to compute the next chunk, wrapping around
-            //    as necessary
-	    start_item += num_items;
-	    if (start_item >= RMT_ITEMS_SIZE) start_item = 0;
-	    cur_item = & mRMT_items[start_item];
-	}
+	// -- Start the RMT TX operationb
+	RMT.conf_ch[mRMT_channel].conf1.mem_rd_rst = 1;
+	RMT.conf_ch[mRMT_channel].conf1.tx_start = 1;
 
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	_frame_cnt++;
-#endif
+	// -- Block until done
+	xSemaphoreTake(mTX_sem, portMAX_DELAY);
 
-	// -- Now, actually send the bits!
-	// rmt_write_items(mLED_RMT_CHANNEL, rmt_items, num_rmt_items, true);
-	// free(rmt_items);
+	// -- When we get here, all of the data has been sent
+	vSemaphoreDelete(mTX_sem);
+	mTX_sem = NULL;
 
-	// -- Wait for the last chunk of values to be sent
-	rmt_wait_tx_done(mLED_RMT_CHANNEL, RMT_MAX_WAIT);
-	
-	return __clock_cycles() - start;
+	esp_intr_free(mRMT_intr_handle);
     }
 
-    // -------------- OLD VERSION -------------------------------
-
-    template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b) {
-	b = ~b; b <<= 24;
-	for(register uint32_t i = BITS; i > 0; i--) {
-	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
-	    last_mark = __clock_cycles();
-	    FastPin<DATA_PIN>::hi();
-	    
-	    while((__clock_cycles() - last_mark) < T1);
-	    if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
-	    b <<= 1;
-	    
-	    while((__clock_cycles() - last_mark) < (T1+T2));
-	    FastPin<DATA_PIN>::lo();
-	}
-    }
+    static void handleInterrupt(void *arg)
+    {
+	ClocklessController* c = static_cast<ClocklessController*>(arg);
+	rmt_channel_t rmt_channel = c->mRMT_channel;
 
-    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-    // gcc will use register Y for the this pointer.
-    static uint32_t showRGBInternalOLD(PixelController<RGB_ORDER> pixels) {
-	// Setup the pixel controller and load/scale the first byte
-	pixels.preStepFirstByteDithering();
-	register uint32_t b = pixels.loadAndScale0();
-	pixels.preStepFirstByteDithering();
-
-	ets_intr_lock();
-
-	uint32_t start = __clock_cycles();
-	uint32_t last_mark = start;
-	while(pixels.has(1)) {
-
-	    // Write first byte, read next byte
-	    writeBits<8+XTRA0>(last_mark, b);
-	    b = pixels.loadAndScale1();
-	    
-	    // Write second byte, read 3rd byte
-	    writeBits<8+XTRA0>(last_mark, b);
-	    b = pixels.loadAndScale2();
-	    
-	    // Write third byte, read 1st byte of next pixel
-	    writeBits<8+XTRA0>(last_mark, b);
-	    b = pixels.advanceAndLoadAndScale0();
-	    
-#if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_unlock();	    
-#endif
+	portBASE_TYPE xHigherPriorityTaskWoken  = 0;
 
-	    pixels.stepDithering();
-	    
-#if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_lock();
-	    // if interrupts took longer than 45µs, punt on the current frame
-	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
-		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) {
-		    ets_intr_unlock();
-		    return 0; 
-		}
+	if (RMT.int_st.val & BIT(24 + rmt_channel)) { // check if ch*_tx_thr_event is set
+	    // -- Interrupt is telling us the RMT is ready for the next set of pulses
+	    c->copyToRmtBlock_half();
+	    RMT.int_clr.val |= BIT(24 + rmt_channel); // set ch*_tx_thr_event
+	}
+	else if ((RMT.int_st.val & BIT(rmt_channel * 3)) && c->mTX_sem) { // check if ch*_tx_end is set
+	    // -- Interrupt is telling us the RMT is done -- release the semaphore
+	    xSemaphoreGiveFromISR(c->mTX_sem, &xHigherPriorityTaskWoken);
+	    RMT.int_clr.val |= BIT(rmt_channel * 3); // set ch*_tx_end
+
+	    if (xHigherPriorityTaskWoken == pdTRUE) {
+		portYIELD_FROM_ISR();
 	    }
-#endif
-	};
+	}
+    }
 
-	ets_intr_unlock();
+    void copyToRmtBlock_half()
+    {
+	// -- Fill half of the RMT pulse buffer
+	//    The buffer holds MAX_PULSES*2 total items, so this loop converts as many pixels
+	//    as can fit in MAX_PULSES items. In our case, each pixel consists of three bytes,
+	//    each bit turns into one pulse item. So, MAX_PULSES is four bytes, or 1 1/3 of
+	//    a pixel. The member variable mCurPulse keeps track of which of the 64 items we
+	//    are writing, and it wraps around as necessary. When we run out of pixel data,
+	//    just fill the remaining items with zero pulses.
+
+	uint16_t pulse_count = 0;
+	uint32_t byteval;
+	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {
+	    // -- Cycle through the R,G, and B values in the right order
+	    switch (mRGB_channel) {
+	    case 0:
+		byteval = local_pixels->loadAndScale0();
+		mRGB_channel = 1;
+		break;
+	    case 1:
+		byteval = local_pixels->loadAndScale1();
+		mRGB_channel = 2;
+		break;
+	    case 2:
+		byteval = local_pixels->loadAndScale2();
+		local_pixels->advanceData();
+		local_pixels->stepDithering();
+		mRGB_channel = 0;
+		break;
+	    default:
+		break;
+	    }
 
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	_frame_cnt++;
-#endif
+	    byteval <<= 24;
+	    // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the rmt_item32_t value corresponding to the buffered bit value
+	    for (register uint32_t j = 0; j < 8; j++) {
+		uint32_t val = (byteval & 0x80000000L) ? mOne.val : mZero.val;
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+		byteval <<= 1;
+		mCurPulse++;
+		pulse_count++;
+	    }
+	}
+	
+	// -- Fill the remaining items with zero pulses
+	while (pulse_count < MAX_PULSES) {
+	    RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+	    mCurPulse++;
+	    pulse_count++;
+	}
 
-	return __clock_cycles() - start;
+	// -- When we have filled the back half the buffer, reset the position to the first half
+	if (mCurPulse >= MAX_PULSES*2)
+	    mCurPulse = 0;
     }
 };
 

From 8b912d8a2e4bb94293b1dfe512a3dfd49c3f8ee7 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 6 Oct 2017 22:40:13 -0400
Subject: [PATCH 008/204] Better comments

---
 platforms/esp/32/clockless_esp32.h | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 114e8dcb21..0e9a29301f 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -187,13 +187,21 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     void copyToRmtBlock_half()
     {
 	// -- Fill half of the RMT pulse buffer
-	//    The buffer holds MAX_PULSES*2 total items, so this loop converts as many pixels
-	//    as can fit in MAX_PULSES items. In our case, each pixel consists of three bytes,
-	//    each bit turns into one pulse item. So, MAX_PULSES is four bytes, or 1 1/3 of
-	//    a pixel. The member variable mCurPulse keeps track of which of the 64 items we
-	//    are writing, and it wraps around as necessary. When we run out of pixel data,
-	//    just fill the remaining items with zero pulses.
 
+	//    The buffer holds 64 total pulse items, so this loop converts
+	//    as many pixels as can fit in half of the buffer (MAX_PULSES =
+	//    32 items). In our case, each pixel consists of three bytes,
+	//    each bit turns into one pulse item -- 24 items per pixel. So,
+	//    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+	//    The member variable mCurPulse keeps track of which of the 64
+	//    items we are writing. During the first call to this method it
+	//    fills 0-31; in the second call it fills 32-63, and then wraps
+	//    back around to zero.
+
+	//    When we run out of pixel data, just fill the remaining items
+	//    with zero pulses.
+	
 	uint16_t pulse_count = 0;
 	uint32_t byteval;
 	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {

From 78aa81473cd4cbd3010eacce495573f9de95671e Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 9 Oct 2017 21:39:54 -0400
Subject: [PATCH 009/204] Fixed the timing calculation

We were not doing the conversion from ESP32 cycles to RMT cycles correctly. Now it all works!
---
 platforms/esp/32/clockless_esp32.h | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 0e9a29301f..d9b1659036 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -49,13 +49,25 @@ extern "C" {
 }
 #endif
 
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
 #define DIVIDER             4 /* 8 still seems to work, but timings become marginal */
 #define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
 #define RMT_DURATION_NS  12.5 /* minimum time of a single RMT duration based on clock ns */
 
-#define CLKS_TO_NS(_CLKS) ((((long)(_CLKS)) * 1000 - 999) / F_CPU_MHZ)
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
 
-#define FASTLED_HAS_CLOCKLESS 1
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))
 
 static uint8_t rmt_channels_used = 0;
 
@@ -81,15 +93,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
 	// -- Precompute rmt items corresponding to a zero bit and a one bit
 	//    according to the timing values given in the template instantiation
+	// T1H
 	mOne.level0 = 1;
-	mOne.duration0 = CLKS_TO_NS(T1 + T2) / (RMT_DURATION_NS * DIVIDER);
+	mOne.duration0 = TO_RMT_CYCLES(T1+T2); // 900
+	// T1L
 	mOne.level1 = 0;
-	mOne.duration1 = CLKS_TO_NS(T3) / (RMT_DURATION_NS * DIVIDER);
+	mOne.duration1 = TO_RMT_CYCLES(T3); // 600
 
+	// T0H
 	mZero.level0 = 1;
-	mZero.duration0 = CLKS_TO_NS(T1) / (RMT_DURATION_NS * DIVIDER);
+	mZero.duration0 = TO_RMT_CYCLES(T1); // 400
+	// T0L
 	mZero.level1 = 0;
-	mZero.duration1 = CLKS_TO_NS(T2 + T3) / (RMT_DURATION_NS * DIVIDER);
+	mZero.duration1 = TO_RMT_CYCLES(T2 + T3); // 900
 
 	// -- Sequentially assign RMT channels -- at most 8
 	mRMT_channel =  (rmt_channel_t) rmt_channels_used++;

From 8a165f316440873c06ecf5a881eb21ef3c5b8608 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 9 Oct 2017 21:42:52 -0400
Subject: [PATCH 010/204] Added Martin's changes

---
 platforms/esp/32/clockless_esp32.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index d9b1659036..5f71c5965e 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -82,7 +82,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     intr_handle_t mRMT_intr_handle = NULL;
     
     PixelController<RGB_ORDER> *local_pixels  = NULL;
-    uint16_t mRGB_channel;
+    uint8_t mRGB_channel;
     uint16_t mCurPulse;
 
 public:
@@ -219,7 +219,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	//    with zero pulses.
 	
 	uint16_t pulse_count = 0;
-	uint32_t byteval;
+	uint32_t byteval = 0;
 	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {
 	    // -- Cycle through the R,G, and B values in the right order
 	    switch (mRGB_channel) {

From 9e3ae2100ddc37a8b07a17d5116c3aa40757abf7 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 9 Oct 2017 21:44:13 -0400
Subject: [PATCH 011/204] Removed confusing comments

---
 platforms/esp/32/clockless_esp32.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 5f71c5965e..0a3750e863 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -95,17 +95,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	//    according to the timing values given in the template instantiation
 	// T1H
 	mOne.level0 = 1;
-	mOne.duration0 = TO_RMT_CYCLES(T1+T2); // 900
+	mOne.duration0 = TO_RMT_CYCLES(T1+T2);
 	// T1L
 	mOne.level1 = 0;
-	mOne.duration1 = TO_RMT_CYCLES(T3); // 600
+	mOne.duration1 = TO_RMT_CYCLES(T3);
 
 	// T0H
 	mZero.level0 = 1;
-	mZero.duration0 = TO_RMT_CYCLES(T1); // 400
+	mZero.duration0 = TO_RMT_CYCLES(T1);
 	// T0L
 	mZero.level1 = 0;
-	mZero.duration1 = TO_RMT_CYCLES(T2 + T3); // 900
+	mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
 
 	// -- Sequentially assign RMT channels -- at most 8
 	mRMT_channel =  (rmt_channel_t) rmt_channels_used++;

From 61e4af48eaff4c5df6bb5fbd32e3b6b410c37c02 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 15 Nov 2017 11:57:20 -0500
Subject: [PATCH 012/204] Added my name!

---
 platforms/esp/32/clockless_esp32.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 0a3750e863..bfaaba8c60 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -2,6 +2,7 @@
  * Integration into FastLED ClocklessController 2017 Thomas Basler
  *
  * Modifications Copyright (c) 2017 Martin F. Falatic
+ * and Samuel Z. Guyer
  *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *

From 72d87b86ba402fa42e70230ef2d52d10612e091f Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 5 Dec 2017 07:30:36 -0500
Subject: [PATCH 013/204] Fixed ESP32 compile problem

On ESP platforms the dev kit provides the function __cxa_pure_virtual, so there is no need to define it.
---
 FastLED.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FastLED.cpp b/FastLED.cpp
index 349e8c0ac1..b7e4233c35 100644
--- a/FastLED.cpp
+++ b/FastLED.cpp
@@ -237,7 +237,7 @@ extern "C" int atexit(void (* /*func*/ )()) { return 0; }
 #ifdef NEED_CXX_BITS
 namespace __cxxabiv1
 {
-	#ifndef ESP8266
+	#if !defined(ESP8266) && !defined(ESP32)
 	extern "C" void __cxa_pure_virtual (void) {}
 	#endif
 

From 2da0c93446fff04efc07e0b106657adcd3af389a Mon Sep 17 00:00:00 2001
From: h3ndrik <hendrik+dev@xd0.de>
Date: Tue, 23 Jan 2018 17:58:44 +0100
Subject: [PATCH 014/204] honor WAIT_TIME

for chipsets that need it (for example TM1829)
---
 platforms/esp/32/clockless_esp32.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index bfaaba8c60..96c2f8ade0 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -85,6 +85,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     PixelController<RGB_ORDER> *local_pixels  = NULL;
     uint8_t mRGB_channel;
     uint16_t mCurPulse;
+    CMinWait<WAIT_TIME> mWait;
 
 public:
 
@@ -151,6 +152,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
+        mWait.wait();
 	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
 
 	// -- Initialize the local state, save a pointer to the pixel data
@@ -176,6 +178,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	mTX_sem = NULL;
 
 	esp_intr_free(mRMT_intr_handle);
+	mWait.mark();
     }
 
     static void handleInterrupt(void *arg)

From 8f1755d1d48f752edc919de12f695144e76accd2 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 6 Feb 2018 14:37:06 -0500
Subject: [PATCH 015/204] Better interrupt handling

Suggested by @h3ndrik : allocated the interrupt once at the initialization and then just turn it on and off. This is the strategy that the ESP32 core uses also.
---
 FastLED.cpp~                            | 270 ++++++++++++++++++++++++
 platforms/esp/32/clockless_esp32.h      |  52 +++--
 platforms/esp/32/clockless_esp32.h-safe | 126 +++++++++++
 platforms/esp/32/clockless_esp32.h~     | 268 +++++++++++++++++++++++
 4 files changed, 700 insertions(+), 16 deletions(-)
 create mode 100644 FastLED.cpp~
 create mode 100644 platforms/esp/32/clockless_esp32.h-safe
 create mode 100644 platforms/esp/32/clockless_esp32.h~

diff --git a/FastLED.cpp~ b/FastLED.cpp~
new file mode 100644
index 0000000000..349e8c0ac1
--- /dev/null
+++ b/FastLED.cpp~
@@ -0,0 +1,270 @@
+#define FASTLED_INTERNAL
+#include "FastLED.h"
+
+
+#if defined(__SAM3X8E__)
+volatile uint32_t fuckit;
+#endif
+
+FASTLED_NAMESPACE_BEGIN
+
+void *pSmartMatrix = NULL;
+
+CFastLED FastLED;
+
+CLEDController *CLEDController::m_pHead = NULL;
+CLEDController *CLEDController::m_pTail = NULL;
+static uint32_t lastshow = 0;
+
+uint32_t _frame_cnt=0;
+uint32_t _retry_cnt=0;
+
+// uint32_t CRGB::Squant = ((uint32_t)((__TIME__[4]-'0') * 28))<<16 | ((__TIME__[6]-'0')*50)<<8 | ((__TIME__[7]-'0')*28);
+
+CFastLED::CFastLED() {
+	// clear out the array of led controllers
+	// m_nControllers = 0;
+	m_Scale = 255;
+	m_nFPS = 0;
+	m_pPowerFunc = NULL;
+	m_nPowerData = 0xFFFFFFFF;
+}
+
+CLEDController &CFastLED::addLeds(CLEDController *pLed,
+									   struct CRGB *data,
+									   int nLedsOrOffset, int nLedsIfOffset) {
+	int nOffset = (nLedsIfOffset > 0) ? nLedsOrOffset : 0;
+	int nLeds = (nLedsIfOffset > 0) ? nLedsIfOffset : nLedsOrOffset;
+
+	pLed->init();
+	pLed->setLeds(data + nOffset, nLeds);
+	FastLED.setMaxRefreshRate(pLed->getMaxRefreshRate(),true);
+	return *pLed;
+}
+
+void CFastLED::show(uint8_t scale) {
+	// guard against showing too rapidly
+	while(m_nMinMicros && ((micros()-lastshow) < m_nMinMicros));
+	lastshow = micros();
+
+	// If we have a function for computing power, use it!
+	if(m_pPowerFunc) {
+		scale = (*m_pPowerFunc)(scale, m_nPowerData);
+	}
+
+	CLEDController *pCur = CLEDController::head();
+	while(pCur) {
+		uint8_t d = pCur->getDither();
+		if(m_nFPS < 100) { pCur->setDither(0); }
+		pCur->showLeds(scale);
+		pCur->setDither(d);
+		pCur = pCur->next();
+	}
+	countFPS();
+}
+
+int CFastLED::count() {
+    int x = 0;
+	CLEDController *pCur = CLEDController::head();
+	while( pCur) {
+        x++;
+		pCur = pCur->next();
+	}
+    return x;
+}
+
+CLEDController & CFastLED::operator[](int x) {
+	CLEDController *pCur = CLEDController::head();
+	while(x-- && pCur) {
+		pCur = pCur->next();
+	}
+	if(pCur == NULL) {
+		return *(CLEDController::head());
+	} else {
+		return *pCur;
+	}
+}
+
+void CFastLED::showColor(const struct CRGB & color, uint8_t scale) {
+	while(m_nMinMicros && ((micros()-lastshow) < m_nMinMicros));
+	lastshow = micros();
+
+	// If we have a function for computing power, use it!
+	if(m_pPowerFunc) {
+		scale = (*m_pPowerFunc)(scale, m_nPowerData);
+	}
+
+	CLEDController *pCur = CLEDController::head();
+	while(pCur) {
+		uint8_t d = pCur->getDither();
+		if(m_nFPS < 100) { pCur->setDither(0); }
+		pCur->showColor(color, scale);
+		pCur->setDither(d);
+		pCur = pCur->next();
+	}
+	countFPS();
+}
+
+void CFastLED::clear(boolean writeData) {
+	if(writeData) {
+		showColor(CRGB(0,0,0), 0);
+	}
+    clearData();
+}
+
+void CFastLED::clearData() {
+	CLEDController *pCur = CLEDController::head();
+	while(pCur) {
+		pCur->clearLedData();
+		pCur = pCur->next();
+	}
+}
+
+void CFastLED::delay(unsigned long ms) {
+	unsigned long start = millis();
+        do {
+#ifndef FASTLED_ACCURATE_CLOCK
+		// make sure to allow at least one ms to pass to ensure the clock moves
+		// forward
+		::delay(1);
+#endif
+		show();
+#if defined(ARDUINO) && (ARDUINO > 150) && !defined(IS_BEAN) && !defined (ARDUINO_AVR_DIGISPARK)
+		yield();
+#endif
+	}
+	while((millis()-start) < ms);
+}
+
+void CFastLED::setTemperature(const struct CRGB & temp) {
+	CLEDController *pCur = CLEDController::head();
+	while(pCur) {
+		pCur->setTemperature(temp);
+		pCur = pCur->next();
+	}
+}
+
+void CFastLED::setCorrection(const struct CRGB & correction) {
+	CLEDController *pCur = CLEDController::head();
+	while(pCur) {
+		pCur->setCorrection(correction);
+		pCur = pCur->next();
+	}
+}
+
+void CFastLED::setDither(uint8_t ditherMode)  {
+	CLEDController *pCur = CLEDController::head();
+	while(pCur) {
+		pCur->setDither(ditherMode);
+		pCur = pCur->next();
+	}
+}
+
+//
+// template<int m, int n> void transpose8(unsigned char A[8], unsigned char B[8]) {
+// 	uint32_t x, y, t;
+//
+// 	// Load the array and pack it into x and y.
+//   	y = *(unsigned int*)(A);
+// 	x = *(unsigned int*)(A+4);
+//
+// 	// x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
+// 	// y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
+//
+        // // pre-transform x
+        // t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+        // t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+				//
+        // // pre-transform y
+        // t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+        // t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+				//
+        // // final transform
+        // t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+        // y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+        // x = t;
+//
+// 	B[7*n] = y; y >>= 8;
+// 	B[6*n] = y; y >>= 8;
+// 	B[5*n] = y; y >>= 8;
+// 	B[4*n] = y;
+//
+//   B[3*n] = x; x >>= 8;
+// 	B[2*n] = x; x >>= 8;
+// 	B[n] = x; x >>= 8;
+// 	B[0] = x;
+// 	// B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x>>0;
+// 	// B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y>>0;
+// }
+//
+// void transposeLines(Lines & out, Lines & in) {
+// 	transpose8<1,2>(in.bytes, out.bytes);
+// 	transpose8<1,2>(in.bytes + 8, out.bytes + 1);
+// }
+
+extern int noise_min;
+extern int noise_max;
+
+void CFastLED::countFPS(int nFrames) {
+  static int br = 0;
+  static uint32_t lastframe = 0; // millis();
+
+  if(br++ >= nFrames) {
+		uint32_t now = millis();
+		now -= lastframe;
+		m_nFPS = (br * 1000) / now;
+    br = 0;
+    lastframe = millis();
+  }
+}
+
+void CFastLED::setMaxRefreshRate(uint16_t refresh, bool constrain) {
+  if(constrain) {
+    // if we're constraining, the new value of m_nMinMicros _must_ be higher than previously (because we're only
+    // allowed to slow things down if constraining)
+    if(refresh > 0) {
+      m_nMinMicros = ( (1000000/refresh) >  m_nMinMicros) ? (1000000/refresh) : m_nMinMicros;
+    }
+  } else if(refresh > 0) {
+    m_nMinMicros = 1000000 / refresh;
+  } else {
+    m_nMinMicros = 0;
+  }
+}
+
+extern "C" int atexit(void (* /*func*/ )()) { return 0; }
+
+#ifdef NEED_CXX_BITS
+namespace __cxxabiv1
+{
+	#ifndef ESP8266
+	extern "C" void __cxa_pure_virtual (void) {}
+	#endif
+
+	/* guard variables */
+
+	/* The ABI requires a 64-bit type.  */
+	__extension__ typedef int __guard __attribute__((mode(__DI__)));
+
+	extern "C" int __cxa_guard_acquire (__guard *) __attribute__((weak));
+	extern "C" void __cxa_guard_release (__guard *) __attribute__((weak));
+	extern "C" void __cxa_guard_abort (__guard *) __attribute__((weak));
+
+	extern "C" int __cxa_guard_acquire (__guard *g)
+	{
+		return !*(char *)(g);
+	}
+
+	extern "C" void __cxa_guard_release (__guard *g)
+	{
+		*(char *)g = 1;
+	}
+
+	extern "C" void __cxa_guard_abort (__guard *)
+	{
+
+	}
+}
+#endif
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 96c2f8ade0..d12b283b55 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -50,12 +50,17 @@ extern "C" {
 }
 #endif
 
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
 #define FASTLED_HAS_CLOCKLESS 1
 
 // -- Configuration constants
-#define DIVIDER             4 /* 8 still seems to work, but timings become marginal */
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
 #define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-#define RMT_DURATION_NS  12.5 /* minimum time of a single RMT duration based on clock ns */
 
 // -- Convert ESP32 cycles back into nanoseconds
 #define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
@@ -81,6 +86,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_channel_t mRMT_channel;
     xSemaphoreHandle mTX_sem = NULL;
     intr_handle_t mRMT_intr_handle = NULL;
+
+    // TRS = 50000;
+    uint16_t m_ResetDuration = NS_TO_CYCLES(50000);
     
     PixelController<RGB_ORDER> *local_pixels  = NULL;
     uint8_t mRGB_channel;
@@ -91,8 +99,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     virtual void init()
     {
-	// TRS = 50000;
-
 	// -- Precompute rmt items corresponding to a zero bit and a one bit
 	//    according to the timing values given in the template instantiation
 	// T1H
@@ -141,9 +147,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	RMT.conf_ch[mRMT_channel].conf1.idle_out_lv = 0;
 		
 	RMT.tx_lim_ch[mRMT_channel].limit = MAX_PULSES;
-	
-	RMT.int_ena.val |= BIT(24 + mRMT_channel); // set ch*_tx_thr_event
-	RMT.int_ena.val |= BIT(mRMT_channel * 3); // set ch*_tx_end
+
+	// -- Allocate the interrupt, but don't enable it now
+	//    TBD: when do we deallocate it?
+	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
@@ -153,17 +160,20 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
         mWait.wait();
-	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
 
 	// -- Initialize the local state, save a pointer to the pixel data
 	local_pixels = &pixels;
 	mCurPulse = 0;
 	mRGB_channel = 0;
-		
+
 	// -- Fill both halves of the buffer
 	copyToRmtBlock_half();
 	copyToRmtBlock_half();
 
+	// -- Turn on the interrupts
+	RMT.int_ena.val |= BIT(mRMT_channel * 3);
+	RMT.int_ena.val |= BIT(mRMT_channel + 24);
+
 	mTX_sem = xSemaphoreCreateBinary();
 
 	// -- Start the RMT TX operationb
@@ -173,11 +183,15 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	// -- Block until done
 	xSemaphoreTake(mTX_sem, portMAX_DELAY);
 
+	// -- Turn off the interrupts
+	RMT.int_ena.val &= ~(BIT(mRMT_channel * 3));
+	RMT.int_ena.val &= ~(BIT(mRMT_channel + 24));
+
 	// -- When we get here, all of the data has been sent
 	vSemaphoreDelete(mTX_sem);
 	mTX_sem = NULL;
 
-	esp_intr_free(mRMT_intr_handle);
+	// esp_intr_free(mRMT_intr_handle);
 	mWait.mark();
     }
 
@@ -221,7 +235,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
 	//    When we run out of pixel data, just fill the remaining items
 	//    with zero pulses.
-	
 	uint16_t pulse_count = 0;
 	uint32_t byteval = 0;
 	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {
@@ -256,11 +269,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	    }
 	}
 	
-	// -- Fill the remaining items with zero pulses
-	while (pulse_count < MAX_PULSES) {
-	    RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-	    mCurPulse++;
-	    pulse_count++;
+	// -- At the end, stretch out the last pulse to signal to the strip
+        //    that we're done
+	if ( ! local_pixels->has(1)) {
+	    RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = m_ResetDuration;
+
+	    // -- And fill the remaining items with zero pulses. The zero values triggers
+	    //    the tx_done interrupt.
+	    while (pulse_count < MAX_PULSES) {
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+		mCurPulse++;
+		pulse_count++;
+	    }
 	}
 
 	// -- When we have filled the back half the buffer, reset the position to the first half
diff --git a/platforms/esp/32/clockless_esp32.h-safe b/platforms/esp/32/clockless_esp32.h-safe
new file mode 100644
index 0000000000..605ba28530
--- /dev/null
+++ b/platforms/esp/32/clockless_esp32.h-safe
@@ -0,0 +1,126 @@
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+extern uint32_t _frame_cnt;
+extern uint32_t _retry_cnt;
+#endif
+
+// Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+
+    typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
+public:
+    virtual void init() {
+	FastPin<DATA_PIN>::setOutput();
+	mPinMask = FastPin<DATA_PIN>::mask();
+	mPort = FastPin<DATA_PIN>::port();
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+	mWait.wait();
+	int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
+	while((showRGBInternal(pixels)==0) && cnt--) {
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	    _retry_cnt++;
+#endif
+	    ets_intr_unlock();
+	    delayMicroseconds(WAIT_TIME);
+	    ets_intr_lock();
+	}
+	mWait.mark();
+    }
+
+#define _ESP_ADJ (0)
+#define _ESP_ADJ2 (0)
+
+    template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b) {
+	b = ~b; b <<= 24;
+	for(register uint32_t i = BITS; i > 0; i--) {
+	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
+	    last_mark = __clock_cycles();
+	    FastPin<DATA_PIN>::hi();
+	    
+	    while((__clock_cycles() - last_mark) < T1);
+	    if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
+	    b <<= 1;
+	    
+	    while((__clock_cycles() - last_mark) < (T1+T2));
+	    FastPin<DATA_PIN>::lo();
+	}
+    }
+
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+	// Setup the pixel controller and load/scale the first byte
+	pixels.preStepFirstByteDithering();
+	register uint32_t b = pixels.loadAndScale0();
+	pixels.preStepFirstByteDithering();
+
+	ets_intr_lock();
+
+	uint32_t start = __clock_cycles();
+	uint32_t last_mark = start;
+	while(pixels.has(1)) {
+
+	    // Write first byte, read next byte
+	    writeBits<8+XTRA0>(last_mark, b);
+	    b = pixels.loadAndScale1();
+	    
+	    // Write second byte, read 3rd byte
+	    writeBits<8+XTRA0>(last_mark, b);
+	    b = pixels.loadAndScale2();
+	    
+	    // Write third byte, read 1st byte of next pixel
+	    writeBits<8+XTRA0>(last_mark, b);
+	    b = pixels.advanceAndLoadAndScale0();
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_unlock();	    
+#endif
+
+	    pixels.stepDithering();
+	    
+#if (FASTLED_ALLOW_INTERRUPTS == 1)
+	    ets_intr_lock();
+	    // if interrupts took longer than 45µs, punt on the current frame
+	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
+		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) {
+		    ets_intr_unlock();
+		    return 0; 
+		}
+	    }
+#endif
+	};
+
+	ets_intr_unlock();
+
+#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+	_frame_cnt++;
+#endif
+
+	return __clock_cycles() - start;
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h~ b/platforms/esp/32/clockless_esp32.h~
new file mode 100644
index 0000000000..0a3750e863
--- /dev/null
+++ b/platforms/esp/32/clockless_esp32.h~
@@ -0,0 +1,268 @@
+/*
+ * Integration into FastLED ClocklessController 2017 Thomas Basler
+ *
+ * Modifications Copyright (c) 2017 Martin F. Falatic
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             4 /* 8 still seems to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+#define RMT_DURATION_NS  12.5 /* minimum time of a single RMT duration based on clock ns */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))
+
+static uint8_t rmt_channels_used = 0;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    rmt_item32_t mZero;
+    rmt_item32_t mOne;
+
+    rmt_channel_t mRMT_channel;
+    xSemaphoreHandle mTX_sem = NULL;
+    intr_handle_t mRMT_intr_handle = NULL;
+    
+    PixelController<RGB_ORDER> *local_pixels  = NULL;
+    uint8_t mRGB_channel;
+    uint16_t mCurPulse;
+
+public:
+
+    virtual void init()
+    {
+	// TRS = 50000;
+
+	// -- Precompute rmt items corresponding to a zero bit and a one bit
+	//    according to the timing values given in the template instantiation
+	// T1H
+	mOne.level0 = 1;
+	mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+	// T1L
+	mOne.level1 = 0;
+	mOne.duration1 = TO_RMT_CYCLES(T3);
+
+	// T0H
+	mZero.level0 = 1;
+	mZero.duration0 = TO_RMT_CYCLES(T1);
+	// T0L
+	mZero.level1 = 0;
+	mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+	// -- Sequentially assign RMT channels -- at most 8
+	mRMT_channel =  (rmt_channel_t) rmt_channels_used++;
+	if (mRMT_channel > 7) {
+	    assert("Only 8 RMT Channels are allowed");
+	}
+
+	ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
+
+	// -- RMT set up magic
+	DPORT_SET_PERI_REG_MASK(DPORT_PERIP_CLK_EN_REG, DPORT_RMT_CLK_EN);
+	DPORT_CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN_REG, DPORT_RMT_RST);
+
+	rmt_set_pin(static_cast<rmt_channel_t>(mRMT_channel),
+		    RMT_MODE_TX,
+		    static_cast<gpio_num_t>(DATA_PIN));
+
+	RMT.apb_conf.fifo_mask = 1;  //enable memory access, instead of FIFO mode.
+	RMT.apb_conf.mem_tx_wrap_en = 1; //wrap around when hitting end of buffer
+	
+	RMT.conf_ch[mRMT_channel].conf0.div_cnt = DIVIDER;
+	RMT.conf_ch[mRMT_channel].conf0.mem_size = 1;
+	RMT.conf_ch[mRMT_channel].conf0.carrier_en = 0;
+	RMT.conf_ch[mRMT_channel].conf0.carrier_out_lv = 1;
+	RMT.conf_ch[mRMT_channel].conf0.mem_pd = 0;
+	RMT.conf_ch[mRMT_channel].conf1.rx_en = 0;
+	RMT.conf_ch[mRMT_channel].conf1.mem_owner = 0;
+	RMT.conf_ch[mRMT_channel].conf1.tx_conti_mode = 0;    //loop back mode.
+	RMT.conf_ch[mRMT_channel].conf1.ref_always_on = 1;    // use apb clock: 80M
+	RMT.conf_ch[mRMT_channel].conf1.idle_out_en = 1;
+	RMT.conf_ch[mRMT_channel].conf1.idle_out_lv = 0;
+		
+	RMT.tx_lim_ch[mRMT_channel].limit = MAX_PULSES;
+	
+	RMT.int_ena.val |= BIT(24 + mRMT_channel); // set ch*_tx_thr_event
+	RMT.int_ena.val |= BIT(mRMT_channel * 3); // set ch*_tx_end
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
+
+	// -- Initialize the local state, save a pointer to the pixel data
+	local_pixels = &pixels;
+	mCurPulse = 0;
+	mRGB_channel = 0;
+		
+	// -- Fill both halves of the buffer
+	copyToRmtBlock_half();
+	copyToRmtBlock_half();
+
+	mTX_sem = xSemaphoreCreateBinary();
+
+	// -- Start the RMT TX operationb
+	RMT.conf_ch[mRMT_channel].conf1.mem_rd_rst = 1;
+	RMT.conf_ch[mRMT_channel].conf1.tx_start = 1;
+
+	// -- Block until done
+	xSemaphoreTake(mTX_sem, portMAX_DELAY);
+
+	// -- When we get here, all of the data has been sent
+	vSemaphoreDelete(mTX_sem);
+	mTX_sem = NULL;
+
+	esp_intr_free(mRMT_intr_handle);
+    }
+
+    static void handleInterrupt(void *arg)
+    {
+	ClocklessController* c = static_cast<ClocklessController*>(arg);
+	rmt_channel_t rmt_channel = c->mRMT_channel;
+
+	portBASE_TYPE xHigherPriorityTaskWoken  = 0;
+
+	if (RMT.int_st.val & BIT(24 + rmt_channel)) { // check if ch*_tx_thr_event is set
+	    // -- Interrupt is telling us the RMT is ready for the next set of pulses
+	    c->copyToRmtBlock_half();
+	    RMT.int_clr.val |= BIT(24 + rmt_channel); // set ch*_tx_thr_event
+	}
+	else if ((RMT.int_st.val & BIT(rmt_channel * 3)) && c->mTX_sem) { // check if ch*_tx_end is set
+	    // -- Interrupt is telling us the RMT is done -- release the semaphore
+	    xSemaphoreGiveFromISR(c->mTX_sem, &xHigherPriorityTaskWoken);
+	    RMT.int_clr.val |= BIT(rmt_channel * 3); // set ch*_tx_end
+
+	    if (xHigherPriorityTaskWoken == pdTRUE) {
+		portYIELD_FROM_ISR();
+	    }
+	}
+    }
+
+    void copyToRmtBlock_half()
+    {
+	// -- Fill half of the RMT pulse buffer
+
+	//    The buffer holds 64 total pulse items, so this loop converts
+	//    as many pixels as can fit in half of the buffer (MAX_PULSES =
+	//    32 items). In our case, each pixel consists of three bytes,
+	//    each bit turns into one pulse item -- 24 items per pixel. So,
+	//    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+	//    The member variable mCurPulse keeps track of which of the 64
+	//    items we are writing. During the first call to this method it
+	//    fills 0-31; in the second call it fills 32-63, and then wraps
+	//    back around to zero.
+
+	//    When we run out of pixel data, just fill the remaining items
+	//    with zero pulses.
+	
+	uint16_t pulse_count = 0;
+	uint32_t byteval = 0;
+	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {
+	    // -- Cycle through the R,G, and B values in the right order
+	    switch (mRGB_channel) {
+	    case 0:
+		byteval = local_pixels->loadAndScale0();
+		mRGB_channel = 1;
+		break;
+	    case 1:
+		byteval = local_pixels->loadAndScale1();
+		mRGB_channel = 2;
+		break;
+	    case 2:
+		byteval = local_pixels->loadAndScale2();
+		local_pixels->advanceData();
+		local_pixels->stepDithering();
+		mRGB_channel = 0;
+		break;
+	    default:
+		break;
+	    }
+
+	    byteval <<= 24;
+	    // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the rmt_item32_t value corresponding to the buffered bit value
+	    for (register uint32_t j = 0; j < 8; j++) {
+		uint32_t val = (byteval & 0x80000000L) ? mOne.val : mZero.val;
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+		byteval <<= 1;
+		mCurPulse++;
+		pulse_count++;
+	    }
+	}
+	
+	// -- Fill the remaining items with zero pulses
+	while (pulse_count < MAX_PULSES) {
+	    RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+	    mCurPulse++;
+	    pulse_count++;
+	}
+
+	// -- When we have filled the back half the buffer, reset the position to the first half
+	if (mCurPulse >= MAX_PULSES*2)
+	    mCurPulse = 0;
+    }
+};
+
+FASTLED_NAMESPACE_END

From 4891cb416677ea64e51d3acadc37a8e5c6c6bca2 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 12 Feb 2018 21:22:55 -0500
Subject: [PATCH 016/204] Major refactoring

Two major changes to the RMT driver. First, I realized that we can have only one interrupt handler attached to the RMT peripheral, so it needs to be able to handle all of the attached strips. To accomplish this, I store each ClocklessController in an array indexed by its RMT channel. The interrupt handler can then take the channel that triggered it and index into the array to get the right controller.

The second major change is that I replaced all of the explicit bit twiddling of the RMT configurartion with calls to the proper functions in ESP32 core. That should make the code more stable if the core changes.
---
 platforms/esp/32/clockless_esp32.h | 362 +++++++++++++++--------------
 1 file changed, 190 insertions(+), 172 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index d12b283b55..8f6a9adb2a 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -73,24 +73,32 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
 
 // -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to reset the strip
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Global information for the interrupt handler
+static void * gControllers[8];
+static intr_handle_t gRMT_intr_handle;
+static uint8_t gNext_channel;
 
-static uint8_t rmt_channels_used = 0;
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
 {
-    rmt_item32_t mZero;
-    rmt_item32_t mOne;
-
+    // -- RMT has 8 channels, numbered 0 to 7
     rmt_channel_t mRMT_channel;
+
+    // -- Semaphore to signal when show() is done
     xSemaphoreHandle mTX_sem = NULL;
-    intr_handle_t mRMT_intr_handle = NULL;
 
-    // TRS = 50000;
-    uint16_t m_ResetDuration = NS_TO_CYCLES(50000);
-    
-    PixelController<RGB_ORDER> *local_pixels  = NULL;
+    // -- Timing values for zero and one bits
+    rmt_item32_t mZero;
+    rmt_item32_t mOne;
+
+    // -- State information for keeping track of where we are in the pixel data
+    PixelController<RGB_ORDER> * mPixels  = NULL;
     uint8_t mRGB_channel;
     uint16_t mCurPulse;
     CMinWait<WAIT_TIME> mWait;
@@ -99,58 +107,57 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     virtual void init()
     {
-	// -- Precompute rmt items corresponding to a zero bit and a one bit
-	//    according to the timing values given in the template instantiation
-	// T1H
-	mOne.level0 = 1;
-	mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-	// T1L
-	mOne.level1 = 0;
-	mOne.duration1 = TO_RMT_CYCLES(T3);
-
-	// T0H
-	mZero.level0 = 1;
-	mZero.duration0 = TO_RMT_CYCLES(T1);
-	// T0L
-	mZero.level1 = 0;
-	mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-	// -- Sequentially assign RMT channels -- at most 8
-	mRMT_channel =  (rmt_channel_t) rmt_channels_used++;
-	if (mRMT_channel > 7) {
-	    assert("Only 8 RMT Channels are allowed");
-	}
-
-	ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
-
-	// -- RMT set up magic
-	DPORT_SET_PERI_REG_MASK(DPORT_PERIP_CLK_EN_REG, DPORT_RMT_CLK_EN);
-	DPORT_CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN_REG, DPORT_RMT_RST);
-
-	rmt_set_pin(static_cast<rmt_channel_t>(mRMT_channel),
-		    RMT_MODE_TX,
-		    static_cast<gpio_num_t>(DATA_PIN));
-
-	RMT.apb_conf.fifo_mask = 1;  //enable memory access, instead of FIFO mode.
-	RMT.apb_conf.mem_tx_wrap_en = 1; //wrap around when hitting end of buffer
-	
-	RMT.conf_ch[mRMT_channel].conf0.div_cnt = DIVIDER;
-	RMT.conf_ch[mRMT_channel].conf0.mem_size = 1;
-	RMT.conf_ch[mRMT_channel].conf0.carrier_en = 0;
-	RMT.conf_ch[mRMT_channel].conf0.carrier_out_lv = 1;
-	RMT.conf_ch[mRMT_channel].conf0.mem_pd = 0;
-	RMT.conf_ch[mRMT_channel].conf1.rx_en = 0;
-	RMT.conf_ch[mRMT_channel].conf1.mem_owner = 0;
-	RMT.conf_ch[mRMT_channel].conf1.tx_conti_mode = 0;    //loop back mode.
-	RMT.conf_ch[mRMT_channel].conf1.ref_always_on = 1;    // use apb clock: 80M
-	RMT.conf_ch[mRMT_channel].conf1.idle_out_en = 1;
-	RMT.conf_ch[mRMT_channel].conf1.idle_out_lv = 0;
-		
-	RMT.tx_lim_ch[mRMT_channel].limit = MAX_PULSES;
-
-	// -- Allocate the interrupt, but don't enable it now
-	//    TBD: when do we deallocate it?
-	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+        // -- Sequentially assign RMT channels -- at most 8
+        mRMT_channel =  (rmt_channel_t) gNext_channel++;
+        if (mRMT_channel > 7) {
+            assert("Only 8 RMT Channels are allowed");
+        }
+
+        // -- Save this controller object, indexed by the RMT channel
+        //    This allows us to get the pointer inside the interrupt handler
+        gControllers[mRMT_channel] = this;
+
+        ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
+
+        // -- RMT configuration for transmission
+        rmt_config_t rmt_tx;
+        rmt_tx.channel = mRMT_channel;
+        rmt_tx.rmt_mode = RMT_MODE_TX;
+        rmt_tx.gpio_num = gpio_num_t(DATA_PIN);
+        rmt_tx.mem_block_num = 1;
+        rmt_tx.clk_div = DIVIDER;
+        rmt_tx.tx_config.loop_en = false;
+        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+        rmt_tx.tx_config.carrier_en = false;
+        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+        rmt_tx.tx_config.idle_output_en = true;
+        
+        // -- Apply the configuration
+        rmt_config(&rmt_tx);
+
+        // -- Set up the RMT to send 1/2 of the pulse buffer and then
+        //    generate an interrupt. When we get this interrupt we
+        //    fill the other half in preparation (kind of like double-buffering)
+        rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
+
+        // -- Semaphore to signal completion of each show()
+        mTX_sem = xSemaphoreCreateBinary();
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
@@ -161,131 +168,142 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         mWait.wait();
 
-	// -- Initialize the local state, save a pointer to the pixel data
-	local_pixels = &pixels;
-	mCurPulse = 0;
-	mRGB_channel = 0;
-
-	// -- Fill both halves of the buffer
-	copyToRmtBlock_half();
-	copyToRmtBlock_half();
+        // -- Initialize the local state, save a pointer to the pixel data
+        mPixels = &pixels;
+        mCurPulse = 0;
+        mRGB_channel = 0;
 
-	// -- Turn on the interrupts
-	RMT.int_ena.val |= BIT(mRMT_channel * 3);
-	RMT.int_ena.val |= BIT(mRMT_channel + 24);
+        // -- Fill both halves of the buffer
+        fillHalfRMTBuffer();
+        fillHalfRMTBuffer();
 
-	mTX_sem = xSemaphoreCreateBinary();
+        // -- Allocate the interrupt if we have not done so yet
+        if (gRMT_intr_handle == NULL)
+            esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, 0, &gRMT_intr_handle);
 
-	// -- Start the RMT TX operationb
-	RMT.conf_ch[mRMT_channel].conf1.mem_rd_rst = 1;
-	RMT.conf_ch[mRMT_channel].conf1.tx_start = 1;
+        // -- Turn on the interrupts
+        rmt_set_tx_intr_en(mRMT_channel, true);
 
-	// -- Block until done
-	xSemaphoreTake(mTX_sem, portMAX_DELAY);
+        // -- Start the RMT TX operation
+        rmt_tx_start(mRMT_channel, true);
 
-	// -- Turn off the interrupts
-	RMT.int_ena.val &= ~(BIT(mRMT_channel * 3));
-	RMT.int_ena.val &= ~(BIT(mRMT_channel + 24));
+        // -- Block until done
+        //    All of the data transmission happens while we wait here
+        xSemaphoreTake(mTX_sem, portMAX_DELAY);
 
-	// -- When we get here, all of the data has been sent
-	vSemaphoreDelete(mTX_sem);
-	mTX_sem = NULL;
+        // -- Turn off the interrupts
+        rmt_set_tx_intr_en(mRMT_channel, false);
 
-	// esp_intr_free(mRMT_intr_handle);
-	mWait.mark();
+        mWait.mark();
     }
 
     static void handleInterrupt(void *arg)
     {
-	ClocklessController* c = static_cast<ClocklessController*>(arg);
-	rmt_channel_t rmt_channel = c->mRMT_channel;
-
-	portBASE_TYPE xHigherPriorityTaskWoken  = 0;
-
-	if (RMT.int_st.val & BIT(24 + rmt_channel)) { // check if ch*_tx_thr_event is set
-	    // -- Interrupt is telling us the RMT is ready for the next set of pulses
-	    c->copyToRmtBlock_half();
-	    RMT.int_clr.val |= BIT(24 + rmt_channel); // set ch*_tx_thr_event
-	}
-	else if ((RMT.int_st.val & BIT(rmt_channel * 3)) && c->mTX_sem) { // check if ch*_tx_end is set
-	    // -- Interrupt is telling us the RMT is done -- release the semaphore
-	    xSemaphoreGiveFromISR(c->mTX_sem, &xHigherPriorityTaskWoken);
-	    RMT.int_clr.val |= BIT(rmt_channel * 3); // set ch*_tx_end
-
-	    if (xHigherPriorityTaskWoken == pdTRUE) {
-		portYIELD_FROM_ISR();
-	    }
-	}
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint32_t i = 0;
+        uint8_t channel;
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        // -- Loop over all the bits in the interrupt status word; the particular
+        //    bit set indicates both the meaning and the RMT channel to which it applies
+        for(i = 0; i < 32; i++) {
+            if(i < 24) {
+                // -- The low 24 bits consist of 3 bits per channel that indicate that
+                //    when a tx/rx operation completes
+                if(intr_st & BIT(i)) {
+                    channel = i / 3;
+                    if (i % 3 == 0) {
+                        // -- Transmission is complete, signal the semaphore that show() is finished
+                        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+                        xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
+                        if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+                    }
+                    RMT.int_clr.val = BIT(i);
+                }
+            } else {
+                // -- The high 8 bits signal that the current 1/2 buffer has been sent, so we should
+                //    fill the next half and continue.
+                if(intr_st & (BIT(i))) {
+                    channel = i - 24;
+                    ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+                    controller->fillHalfRMTBuffer();
+                    RMT.int_clr.val = BIT(i);
+                }
+            }
+        }
     }
 
-    void copyToRmtBlock_half()
+    void fillHalfRMTBuffer()
     {
-	// -- Fill half of the RMT pulse buffer
-
-	//    The buffer holds 64 total pulse items, so this loop converts
-	//    as many pixels as can fit in half of the buffer (MAX_PULSES =
-	//    32 items). In our case, each pixel consists of three bytes,
-	//    each bit turns into one pulse item -- 24 items per pixel. So,
-	//    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-	//    The member variable mCurPulse keeps track of which of the 64
-	//    items we are writing. During the first call to this method it
-	//    fills 0-31; in the second call it fills 32-63, and then wraps
-	//    back around to zero.
-
-	//    When we run out of pixel data, just fill the remaining items
-	//    with zero pulses.
-	uint16_t pulse_count = 0;
-	uint32_t byteval = 0;
-	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {
-	    // -- Cycle through the R,G, and B values in the right order
-	    switch (mRGB_channel) {
-	    case 0:
-		byteval = local_pixels->loadAndScale0();
-		mRGB_channel = 1;
-		break;
-	    case 1:
-		byteval = local_pixels->loadAndScale1();
-		mRGB_channel = 2;
-		break;
-	    case 2:
-		byteval = local_pixels->loadAndScale2();
-		local_pixels->advanceData();
-		local_pixels->stepDithering();
-		mRGB_channel = 0;
-		break;
-	    default:
-		break;
-	    }
-
-	    byteval <<= 24;
-	    // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the rmt_item32_t value corresponding to the buffered bit value
-	    for (register uint32_t j = 0; j < 8; j++) {
-		uint32_t val = (byteval & 0x80000000L) ? mOne.val : mZero.val;
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-		byteval <<= 1;
-		mCurPulse++;
-		pulse_count++;
-	    }
-	}
-	
-	// -- At the end, stretch out the last pulse to signal to the strip
+        // -- Fill half of the RMT pulse buffer
+
+        //    The buffer holds 64 total pulse items, so this loop converts
+        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
+        //    32 items). In our case, each pixel consists of three bytes,
+        //    each bit turns into one pulse item -- 24 items per pixel. So,
+        //    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+        //    The member variable mCurPulse keeps track of which of the 64
+        //    items we are writing. During the first call to this method it
+        //    fills 0-31; in the second call it fills 32-63, and then wraps
+        //    back around to zero.
+
+        //    When we run out of pixel data, just fill the remaining items
+        //    with zero pulses.
+        uint16_t pulse_count = 0;
+        uint32_t byteval = 0;
+        while (mPixels->has(1) && pulse_count < MAX_PULSES) {
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? mOne.val : mZero.val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+        
+        // -- At the end, stretch out the last pulse to signal to the strip
         //    that we're done
-	if ( ! local_pixels->has(1)) {
-	    RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = m_ResetDuration;
-
-	    // -- And fill the remaining items with zero pulses. The zero values triggers
-	    //    the tx_done interrupt.
-	    while (pulse_count < MAX_PULSES) {
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-		mCurPulse++;
-		pulse_count++;
-	    }
-	}
-
-	// -- When we have filled the back half the buffer, reset the position to the first half
-	if (mCurPulse >= MAX_PULSES*2)
-	    mCurPulse = 0;
+        if ( ! mPixels->has(1)) {
+            RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+
+            // -- And fill the remaining items with zero pulses. The zero values triggers
+            //    the tx_done interrupt.
+            while (pulse_count < MAX_PULSES) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
     }
 };
 

From d0ca483c41276efc9b68199cd17972b1706e1d24 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 14 Feb 2018 12:32:07 -0500
Subject: [PATCH 017/204] Fixed the interrupt dispatch

Since the interrupt handler is global for all channels, we need to store not just the controller, but also the buffer refill function for each strip.
---
 platforms/esp/32/clockless_esp32.h | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 8f6a9adb2a..cd65008682 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -2,7 +2,8 @@
  * Integration into FastLED ClocklessController 2017 Thomas Basler
  *
  * Modifications Copyright (c) 2017 Martin F. Falatic
- * and Samuel Z. Guyer
+ *
+ * Modifications Copyright (c) 2018 Samuel Z. Guyer
  *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
@@ -79,11 +80,12 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define RMT_RESET_DURATION NS_TO_CYCLES(50000)
 
 // -- Global information for the interrupt handler
-static void * gControllers[8];
+static CLEDController * gControllers[8];
+typedef void (*RefillDispatcher_t)(uint8_t);
+static RefillDispatcher_t gRefillFunctions[8];
 static intr_handle_t gRMT_intr_handle;
 static uint8_t gNext_channel;
 
-
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
 {
@@ -132,6 +134,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Save this controller object, indexed by the RMT channel
         //    This allows us to get the pointer inside the interrupt handler
         gControllers[mRMT_channel] = this;
+	gRefillFunctions[mRMT_channel] = &refillDispatcher;
 
         ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
 
@@ -177,9 +180,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         fillHalfRMTBuffer();
         fillHalfRMTBuffer();
 
-        // -- Allocate the interrupt if we have not done so yet
+        // -- Allocate the interrupt if we have not done so yet. This
+        // -- interrupt handler must work for all different kinds of
+        // -- strips, so it delegates to the refill function for each
+        // -- specific instantiation of ClocklessController.
         if (gRMT_intr_handle == NULL)
-            esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, 0, &gRMT_intr_handle);
+            esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
 
         // -- Turn on the interrupts
         rmt_set_tx_intr_en(mRMT_channel, true);
@@ -197,7 +203,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mWait.mark();
     }
 
-    static void handleInterrupt(void *arg)
+    static void interruptHandler(void *arg)
     {
         // -- The basic structure of this code is borrowed from the
         //    interrupt handler in esp-idf/components/driver/rmt.c
@@ -227,14 +233,20 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 //    fill the next half and continue.
                 if(intr_st & (BIT(i))) {
                     channel = i - 24;
-                    ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-                    controller->fillHalfRMTBuffer();
+		    // -- Look up the appropriate refill dispatcher and call it
+		    (gRefillFunctions[channel])(channel);
                     RMT.int_clr.val = BIT(i);
                 }
             }
         }
     }
 
+    static void refillDispatcher(uint8_t channel)
+    {
+	ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+	controller->fillHalfRMTBuffer();
+    }
+
     void fillHalfRMTBuffer()
     {
         // -- Fill half of the RMT pulse buffer

From dddc30cda03780c4e33bd0790ab079036089f645 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 14 Feb 2018 16:16:19 -0500
Subject: [PATCH 018/204] Added a demo

This version of DemoReel100 spins off a separate task on core 0 that just performs the FastLED.show() operations. Regular code running on core 1 (the default for Arduino) signals this task to request a show().
---
 examples/DemoReelESP32/DemoReelESP32.ino | 182 +++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 examples/DemoReelESP32/DemoReelESP32.ino

diff --git a/examples/DemoReelESP32/DemoReelESP32.ino b/examples/DemoReelESP32/DemoReelESP32.ino
new file mode 100644
index 0000000000..0d72f02a0d
--- /dev/null
+++ b/examples/DemoReelESP32/DemoReelESP32.ino
@@ -0,0 +1,182 @@
+#include "FastLED.h"
+
+FASTLED_USING_NAMESPACE
+
+// FastLED "100-lines-of-code" demo reel, showing just a few 
+// of the kinds of animation patterns you can quickly and easily 
+// compose using FastLED.  
+//
+// This example also shows one easy way to define multiple 
+// animations patterns and have them automatically rotate.
+//
+// -Mark Kriegsman, December 2014
+
+#if defined(FASTLED_VERSION) && (FASTLED_VERSION < 3001000)
+#warning "Requires FastLED 3.1 or later; check github for latest code."
+#endif
+
+#define DATA_PIN    12
+//#define CLK_PIN   4
+#define LED_TYPE    WS2811
+#define COLOR_ORDER GRB
+#define NUM_LEDS    27
+CRGB leds[NUM_LEDS];
+
+#define BRIGHTNESS          60
+#define FRAMES_PER_SECOND  120
+
+// -- The core to run FastLED.show()
+#define FASTLED_SHOW_CORE 0
+
+// -- Task handles for use in the notifications
+static TaskHandle_t FastLEDshowTaskHandle = 0;
+static TaskHandle_t userTaskHandle = 0;
+
+/** show() for ESP32
+ *  Call this function instead of FastLED.show(). It signals core 0 to issue a show, 
+ *  then waits for a notification that it is done.
+ */
+void FastLEDshowESP32()
+{
+    if (userTaskHandle == 0) {
+        const TickType_t xMaxBlockTime = pdMS_TO_TICKS( 200 );
+        // -- Store the handle of the current task, so that the show task can
+        //    notify it when it's done
+        userTaskHandle = xTaskGetCurrentTaskHandle();
+
+        // -- Trigger the show task
+        xTaskNotifyGive(FastLEDshowTaskHandle);
+
+        // -- Wait to be notified that it's done
+        ulTaskNotifyTake(pdTRUE, xMaxBlockTime);
+        userTaskHandle = 0;
+    }
+}
+
+/** show Task
+ *  This function runs on core 0 and just waits for requests to call FastLED.show()
+ */
+void FastLEDshowTask(void *pvParameters)
+{
+    const TickType_t xMaxBlockTime = pdMS_TO_TICKS( 500 );
+    // -- Run forever...
+    for(;;) {
+        // -- Wait for the trigger
+        ulTaskNotifyTake(pdTRUE, xMaxBlockTime);
+
+        // -- Do the show (synchronously)
+        FastLED.show();
+
+        // -- Notify the calling task
+        xTaskNotifyGive(userTaskHandle);
+    }
+}
+
+void setup() {
+  delay(3000); // 3 second delay for recovery
+  Serial.begin(115200);
+  
+  // tell FastLED about the LED strip configuration
+  FastLED.addLeds<LED_TYPE,DATA_PIN,COLOR_ORDER>(leds, NUM_LEDS).setCorrection(TypicalLEDStrip);
+  //FastLED.addLeds<LED_TYPE,DATA_PIN,CLK_PIN,COLOR_ORDER>(leds, NUM_LEDS).setCorrection(TypicalLEDStrip);
+
+  // set master brightness control
+  FastLED.setBrightness(BRIGHTNESS);
+
+    int core = xPortGetCoreID();
+    Serial.print("Main code running on core ");
+    Serial.println(core);
+
+    // -- Create the FastLED show task
+    xTaskCreatePinnedToCore(FastLEDshowTask, "FastLEDshowTask", 2048, NULL, 2, &FastLEDshowTaskHandle, FASTLED_SHOW_CORE);
+}
+
+
+// List of patterns to cycle through.  Each is defined as a separate function below.
+typedef void (*SimplePatternList[])();
+SimplePatternList gPatterns = { rainbow, rainbowWithGlitter, confetti, sinelon, juggle, bpm };
+
+uint8_t gCurrentPatternNumber = 0; // Index number of which pattern is current
+uint8_t gHue = 0; // rotating "base color" used by many of the patterns
+  
+void loop()
+{
+  // Call the current pattern function once, updating the 'leds' array
+  gPatterns[gCurrentPatternNumber]();
+
+  // send the 'leds' array out to the actual LED strip
+  FastLEDshowESP32();
+  // FastLED.show();
+  // insert a delay to keep the framerate modest
+  FastLED.delay(1000/FRAMES_PER_SECOND); 
+
+  // do some periodic updates
+  EVERY_N_MILLISECONDS( 20 ) { gHue++; } // slowly cycle the "base color" through the rainbow
+  EVERY_N_SECONDS( 10 ) { nextPattern(); } // change patterns periodically
+}
+
+#define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0]))
+
+void nextPattern()
+{
+  // add one to the current pattern number, and wrap around at the end
+  gCurrentPatternNumber = (gCurrentPatternNumber + 1) % ARRAY_SIZE( gPatterns);
+}
+
+void rainbow() 
+{
+  // FastLED's built-in rainbow generator
+  fill_rainbow( leds, NUM_LEDS, gHue, 7);
+}
+
+void rainbowWithGlitter() 
+{
+  // built-in FastLED rainbow, plus some random sparkly glitter
+  rainbow();
+  addGlitter(80);
+}
+
+void addGlitter( fract8 chanceOfGlitter) 
+{
+  if( random8() < chanceOfGlitter) {
+    leds[ random16(NUM_LEDS) ] += CRGB::White;
+  }
+}
+
+void confetti() 
+{
+  // random colored speckles that blink in and fade smoothly
+  fadeToBlackBy( leds, NUM_LEDS, 10);
+  int pos = random16(NUM_LEDS);
+  leds[pos] += CHSV( gHue + random8(64), 200, 255);
+}
+
+void sinelon()
+{
+  // a colored dot sweeping back and forth, with fading trails
+  fadeToBlackBy( leds, NUM_LEDS, 20);
+  int pos = beatsin16( 13, 0, NUM_LEDS-1 );
+  leds[pos] += CHSV( gHue, 255, 192);
+}
+
+void bpm()
+{
+  // colored stripes pulsing at a defined Beats-Per-Minute (BPM)
+  uint8_t BeatsPerMinute = 62;
+  CRGBPalette16 palette = PartyColors_p;
+  uint8_t beat = beatsin8( BeatsPerMinute, 64, 255);
+  for( int i = 0; i < NUM_LEDS; i++) { //9948
+    leds[i] = ColorFromPalette(palette, gHue+(i*2), beat-gHue+(i*10));
+  }
+}
+
+void juggle() {
+  // eight colored dots, weaving in and out of sync with each other
+  fadeToBlackBy( leds, NUM_LEDS, 20);
+  byte dothue = 0;
+  for( int i = 0; i < 8; i++) {
+    leds[beatsin16( i+7, 0, NUM_LEDS-1 )] |= CHSV(dothue, 200, 255);
+    dothue += 32;
+  }
+}
+

From ab3a0bc5acc652aece406b5e86facfec554cf816 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 18 Feb 2018 21:45:10 -0500
Subject: [PATCH 019/204] Avoid unnecessary timeouts

Replaced a 500ms delay in the show task with MAX_DELAY. There's really no point in timing out (and crashing the program) just because the application hasn't called show.
---
 examples/DemoReelESP32/DemoReelESP32.ino | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/DemoReelESP32/DemoReelESP32.ino b/examples/DemoReelESP32/DemoReelESP32.ino
index 0d72f02a0d..3a32d4c4d0 100644
--- a/examples/DemoReelESP32/DemoReelESP32.ino
+++ b/examples/DemoReelESP32/DemoReelESP32.ino
@@ -39,7 +39,6 @@ static TaskHandle_t userTaskHandle = 0;
 void FastLEDshowESP32()
 {
     if (userTaskHandle == 0) {
-        const TickType_t xMaxBlockTime = pdMS_TO_TICKS( 200 );
         // -- Store the handle of the current task, so that the show task can
         //    notify it when it's done
         userTaskHandle = xTaskGetCurrentTaskHandle();
@@ -48,6 +47,7 @@ void FastLEDshowESP32()
         xTaskNotifyGive(FastLEDshowTaskHandle);
 
         // -- Wait to be notified that it's done
+        const TickType_t xMaxBlockTime = pdMS_TO_TICKS( 200 );
         ulTaskNotifyTake(pdTRUE, xMaxBlockTime);
         userTaskHandle = 0;
     }
@@ -58,11 +58,10 @@ void FastLEDshowESP32()
  */
 void FastLEDshowTask(void *pvParameters)
 {
-    const TickType_t xMaxBlockTime = pdMS_TO_TICKS( 500 );
     // -- Run forever...
     for(;;) {
         // -- Wait for the trigger
-        ulTaskNotifyTake(pdTRUE, xMaxBlockTime);
+        ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
 
         // -- Do the show (synchronously)
         FastLED.show();

From bfd59e0b0d68dafb23a3bf23a5bec31f40706c84 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 22 Feb 2018 16:25:14 -0500
Subject: [PATCH 020/204] Parallel output

Reworked the code again in order to support parallel output, which is now the default mode. You can also now ask it to use the built-in RMT driver if you have other parts of your code that need the RMT peripheral.

Two #defines control choices -- put either or both of these before including FastLED.h:

#define FASTLED_RMT_CORE_DRIVER

Uses the ESP core RMT driver. To do this, though, it allocates a big buffer to hold all of the pixel bits, so there is a memory and compute cost.

#define FASTLED_RMT_SERIAL_OUTPUT

Force serial output of each strip.
---
 platforms/esp/32/clockless_esp32.h | 222 +++++++++++++++++++++++------
 1 file changed, 179 insertions(+), 43 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index cd65008682..4d2842da94 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -79,12 +79,33 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Number of cycles to reset the strip
 #define RMT_RESET_DURATION NS_TO_CYCLES(50000)
 
+// -- Global counter of channels used
+//    Each FastLED.addLeds uses the next consecutive channel
+static uint8_t gNextChannel;
+
 // -- Global information for the interrupt handler
+//    Information is indexed by the RMT channel, so we can get it 
+//    when we are in the interrupt handler.
 static CLEDController * gControllers[8];
+
 typedef void (*RefillDispatcher_t)(uint8_t);
 static RefillDispatcher_t gRefillFunctions[8];
+
 static intr_handle_t gRMT_intr_handle;
-static uint8_t gNext_channel;
+
+// -- Parallelize the output This works because most of the work of
+//    pumping out the bits is handled by the RMT peripheral, which we
+//    keep filled by responding to interrupts. All we need to do is
+//    detect when all of the channels have finished.
+
+// -- Global semaphore for the whole show process
+//    Only used in parallel output, to signal when all controllers are done
+static xSemaphoreHandle gTX_sem = NULL;
+
+// -- Globals to keep track of how many controllers have started and
+//    how many have finished
+static int gNumShowing = 0;
+static int gNumDone = 0;
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
@@ -93,6 +114,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_channel_t mRMT_channel;
 
     // -- Semaphore to signal when show() is done
+    //    Per-controller, so only needed for serial output
     xSemaphoreHandle mTX_sem = NULL;
 
     // -- Timing values for zero and one bits
@@ -100,11 +122,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t mOne;
 
     // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels  = NULL;
+    PixelController<RGB_ORDER> * mPixels = NULL;
+    void * mPixelSpace = NULL;
     uint8_t mRGB_channel;
     uint16_t mCurPulse;
     CMinWait<WAIT_TIME> mWait;
 
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t mBufferSize;
+
 public:
 
     virtual void init()
@@ -126,7 +154,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
 
         // -- Sequentially assign RMT channels -- at most 8
-        mRMT_channel =  (rmt_channel_t) gNext_channel++;
+        mRMT_channel =  (rmt_channel_t) gNextChannel++;
         if (mRMT_channel > 7) {
             assert("Only 8 RMT Channels are allowed");
         }
@@ -154,13 +182,32 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Apply the configuration
         rmt_config(&rmt_tx);
 
+	// -- Allocate space for a cope of the pixels
+	// mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
+
+#ifdef FASTLED_RMT_CORE_DRIVER
+	// -- Use the built-in RMT driver. The only reason to choose
+	//    this option is if you have other parts of your code that
+	//    are using the RMT peripheral, and you want them to
+	//    co-exist with FastLED.
+	rmt_driver_install(mRMT_channel, 0, 0);
+#else
+	// -- Use the custom RMT driver implemented here, which computes
+        //    pulses on demand to reduce memory requirements and latency.
+
         // -- Set up the RMT to send 1/2 of the pulse buffer and then
         //    generate an interrupt. When we get this interrupt we
         //    fill the other half in preparation (kind of like double-buffering)
         rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
 
+        // -- Turn on the interrupts
+        rmt_set_tx_intr_en(mRMT_channel, true);
+
         // -- Semaphore to signal completion of each show()
+        //    Only needed for serial output
         mTX_sem = xSemaphoreCreateBinary();
+
+#endif
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
@@ -171,8 +218,39 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         mWait.wait();
 
-        // -- Initialize the local state, save a pointer to the pixel data
-        mPixels = &pixels;
+	gNumShowing++;
+
+#ifdef FASTLED_RMT_CORE_DRIVER
+	// -- Fill a big buffer with all of the pixel data
+	mBufferSize = pixels.size() * 3 * 8;
+	computeAllRMTItems(pixels);
+
+	// -- Serial or parallel
+	bool wait_done;
+#ifdef FASTLED_RMT_SERIAL_OUTPUT
+	wait_done = true;
+#else
+	// -- Only wait on the last channel
+	wait_done = (gNumShowing == gNextChannel);
+#endif
+
+	// -- Send it all at once using the built-in RMT driver
+	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
+	return;
+#endif
+
+	// -- Create a global semaphore that signals when all the
+	//    controllers are done (only needed for parallel output).
+	if (gTX_sem == NULL)
+	    gTX_sem = xSemaphoreCreateBinary();
+
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+        // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
+	if (mPixels != NULL) delete mPixels;
+	mPixels = new PixelController<RGB_ORDER>(pixels);
         mCurPulse = 0;
         mRGB_channel = 0;
 
@@ -181,9 +259,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         fillHalfRMTBuffer();
 
         // -- Allocate the interrupt if we have not done so yet. This
-        // -- interrupt handler must work for all different kinds of
-        // -- strips, so it delegates to the refill function for each
-        // -- specific instantiation of ClocklessController.
+        //    interrupt handler must work for all different kinds of
+        //    strips, so it delegates to the refill function for each
+        //    specific instantiation of ClocklessController.
         if (gRMT_intr_handle == NULL)
             esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
 
@@ -193,55 +271,63 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Start the RMT TX operation
         rmt_tx_start(mRMT_channel, true);
 
-        // -- Block until done
+#ifdef FASTLED_RMT_SERIAL_OUTPUT
+        // -- Block until this controller is done
         //    All of the data transmission happens while we wait here
-        xSemaphoreTake(mTX_sem, portMAX_DELAY);
-
+	xSemaphoreTake(mTX_sem, portMAX_DELAY);
+	
         // -- Turn off the interrupts
         rmt_set_tx_intr_en(mRMT_channel, false);
-
+#else
+	// -- If this is the last controller, then this is the place to
+        //    wait for all the data to be sent.
+	if (gNumShowing == gNextChannel) {
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	    gNumDone = 0;
+	    gNumShowing = 0;
+	}
+#endif
         mWait.mark();
     }
 
-    static void interruptHandler(void *arg)
+    static IRAM_ATTR void interruptHandler(void *arg)
     {
         // -- The basic structure of this code is borrowed from the
         //    interrupt handler in esp-idf/components/driver/rmt.c
         uint32_t intr_st = RMT.int_st.val;
-        uint32_t i = 0;
         uint8_t channel;
         portBASE_TYPE HPTaskAwoken = 0;
 
-        // -- Loop over all the bits in the interrupt status word; the particular
-        //    bit set indicates both the meaning and the RMT channel to which it applies
-        for(i = 0; i < 32; i++) {
-            if(i < 24) {
-                // -- The low 24 bits consist of 3 bits per channel that indicate that
-                //    when a tx/rx operation completes
-                if(intr_st & BIT(i)) {
-                    channel = i / 3;
-                    if (i % 3 == 0) {
-                        // -- Transmission is complete, signal the semaphore that show() is finished
-                        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-                        xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
-                        if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-                    }
-                    RMT.int_clr.val = BIT(i);
-                }
-            } else {
-                // -- The high 8 bits signal that the current 1/2 buffer has been sent, so we should
-                //    fill the next half and continue.
-                if(intr_st & (BIT(i))) {
-                    channel = i - 24;
-		    // -- Look up the appropriate refill dispatcher and call it
-		    (gRefillFunctions[channel])(channel);
-                    RMT.int_clr.val = BIT(i);
-                }
-            }
-        }
+	for (channel = 0; channel < 8; channel++) {
+	    int tx_done_bit = channel * 3;
+	    int tx_next_bit = channel + 24;
+	    if (intr_st & BIT(tx_done_bit)) {
+		// -- Transmission is complete, signal the semaphore that show() is finished
+		ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+		gNumDone++;
+#ifdef FASTLED_RMT_SERIAL_OUTPUT
+		xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
+#else
+		if (gNumDone == gNextChannel)
+		    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+#endif
+		if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+		RMT.int_clr.val = BIT(tx_done_bit);
+	    }
+	    if (intr_st & BIT(tx_next_bit)) {
+		// -- Look up the appropriate refill dispatcher and call it
+		(gRefillFunctions[channel])(channel);
+		RMT.int_clr.val = BIT(tx_next_bit);
+	    }
+	}
     }
 
-    static void refillDispatcher(uint8_t channel)
+    /* Refill the RMT buffer
+     * We need this dispatch function because there will be one for each instantiation of this template
+     * class -- in particular, one for each possible RGB_ORDER. We need to dispatch to the correct one
+     * so that fillHalfRMTBuffer will use the right ordering for this strip.
+     */
+    static IRAM_ATTR void refillDispatcher(uint8_t channel)
     {
 	ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
 	controller->fillHalfRMTBuffer();
@@ -264,7 +350,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         //    When we run out of pixel data, just fill the remaining items
         //    with zero pulses.
-        uint16_t pulse_count = 0;
+
+	RMT.apb_conf.fifo_mask = RMT_DATA_MODE_MEM;
+        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
         uint32_t byteval = 0;
         while (mPixels->has(1) && pulse_count < MAX_PULSES) {
             // -- Cycle through the R,G, and B values in the right order
@@ -317,6 +405,54 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (mCurPulse >= MAX_PULSES*2)
             mCurPulse = 0;
     }
+    
+    void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
+    {
+	// -- Compute the pulse values for the whole strip at once.
+	//    Requires a large buffer
+
+	// TODO: need a specific number here
+	if (mBuffer == NULL) {
+	    mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+	}
+
+        mCurPulse = 0;
+        mRGB_channel = 0;
+        uint32_t byteval = 0;
+        while (pixels.has(1)) {
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = pixels.loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = pixels.loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = pixels.loadAndScale2();
+                pixels.advanceData();
+                pixels.stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+		mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+        }
+
+	mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+	assert(mCurPulse == mBufferSize);
+    }
 };
 
 FASTLED_NAMESPACE_END

From 7f624ca5a61a1a4da8ad165de44119b02a739161 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 23 Feb 2018 10:40:06 -0500
Subject: [PATCH 021/204] Documentation

Describing the implementation and the compile-time switches
---
 platforms/esp/32/clockless_esp32.h | 58 ++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 4d2842da94..8d29f283ea 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -5,6 +5,64 @@
  *
  * Modifications Copyright (c) 2018 Samuel Z. Guyer
  *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently with sequences of
+ * high/low bits. Memory for each channel is limited, however, so in
+ * order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * PARALLEL vs SERIAL
+ *
+ * By default, this driver sends the data for all LED strips in
+ * parallel. We get parallelism essentially for free because the RMT
+ * is an independent processing unit. It only interrupts the CPU when
+ * it needs more data to send, and the CPU is fast enough to keep all
+ * 8 channels filled.
+ *
+ * However, there may be cases where you want serial output -- that
+ * is, you want to send the data for each strip before moving on to
+ * the next one. The performance will be much lower, limiting the
+ * framerate. To force serial output, add this directive before you
+ * include FastLED.h:
+ *
+ *      #define FASTLED_RMT_SERIAL_OUTPUT
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT
+ * interrupts, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_CORE_DRIVER
+ *
+ * There is a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ * This driver assigns channels to LED strips sequentially starting at
+ * zero. So, for other RMT applications make sure to choose a channel
+ * at the higher end to avoid collisions.
+ *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
  *

From 29e62ea44d2055231c13d92551cb5be811da7fea Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 23 Feb 2018 16:56:13 -0500
Subject: [PATCH 022/204] Removing files that should not be there

---
 FastLED.cpp~                              | 270 ----------------------
 platforms/esp/32/clockless_esp32.h-safe   | 126 ----------
 platforms/esp/32/clockless_esp32.h~       | 268 ---------------------
 platforms/esp/32/led_sysdefs_esp32.h.orig |  37 ---
 4 files changed, 701 deletions(-)
 delete mode 100644 FastLED.cpp~
 delete mode 100644 platforms/esp/32/clockless_esp32.h-safe
 delete mode 100644 platforms/esp/32/clockless_esp32.h~
 delete mode 100644 platforms/esp/32/led_sysdefs_esp32.h.orig

diff --git a/FastLED.cpp~ b/FastLED.cpp~
deleted file mode 100644
index 349e8c0ac1..0000000000
--- a/FastLED.cpp~
+++ /dev/null
@@ -1,270 +0,0 @@
-#define FASTLED_INTERNAL
-#include "FastLED.h"
-
-
-#if defined(__SAM3X8E__)
-volatile uint32_t fuckit;
-#endif
-
-FASTLED_NAMESPACE_BEGIN
-
-void *pSmartMatrix = NULL;
-
-CFastLED FastLED;
-
-CLEDController *CLEDController::m_pHead = NULL;
-CLEDController *CLEDController::m_pTail = NULL;
-static uint32_t lastshow = 0;
-
-uint32_t _frame_cnt=0;
-uint32_t _retry_cnt=0;
-
-// uint32_t CRGB::Squant = ((uint32_t)((__TIME__[4]-'0') * 28))<<16 | ((__TIME__[6]-'0')*50)<<8 | ((__TIME__[7]-'0')*28);
-
-CFastLED::CFastLED() {
-	// clear out the array of led controllers
-	// m_nControllers = 0;
-	m_Scale = 255;
-	m_nFPS = 0;
-	m_pPowerFunc = NULL;
-	m_nPowerData = 0xFFFFFFFF;
-}
-
-CLEDController &CFastLED::addLeds(CLEDController *pLed,
-									   struct CRGB *data,
-									   int nLedsOrOffset, int nLedsIfOffset) {
-	int nOffset = (nLedsIfOffset > 0) ? nLedsOrOffset : 0;
-	int nLeds = (nLedsIfOffset > 0) ? nLedsIfOffset : nLedsOrOffset;
-
-	pLed->init();
-	pLed->setLeds(data + nOffset, nLeds);
-	FastLED.setMaxRefreshRate(pLed->getMaxRefreshRate(),true);
-	return *pLed;
-}
-
-void CFastLED::show(uint8_t scale) {
-	// guard against showing too rapidly
-	while(m_nMinMicros && ((micros()-lastshow) < m_nMinMicros));
-	lastshow = micros();
-
-	// If we have a function for computing power, use it!
-	if(m_pPowerFunc) {
-		scale = (*m_pPowerFunc)(scale, m_nPowerData);
-	}
-
-	CLEDController *pCur = CLEDController::head();
-	while(pCur) {
-		uint8_t d = pCur->getDither();
-		if(m_nFPS < 100) { pCur->setDither(0); }
-		pCur->showLeds(scale);
-		pCur->setDither(d);
-		pCur = pCur->next();
-	}
-	countFPS();
-}
-
-int CFastLED::count() {
-    int x = 0;
-	CLEDController *pCur = CLEDController::head();
-	while( pCur) {
-        x++;
-		pCur = pCur->next();
-	}
-    return x;
-}
-
-CLEDController & CFastLED::operator[](int x) {
-	CLEDController *pCur = CLEDController::head();
-	while(x-- && pCur) {
-		pCur = pCur->next();
-	}
-	if(pCur == NULL) {
-		return *(CLEDController::head());
-	} else {
-		return *pCur;
-	}
-}
-
-void CFastLED::showColor(const struct CRGB & color, uint8_t scale) {
-	while(m_nMinMicros && ((micros()-lastshow) < m_nMinMicros));
-	lastshow = micros();
-
-	// If we have a function for computing power, use it!
-	if(m_pPowerFunc) {
-		scale = (*m_pPowerFunc)(scale, m_nPowerData);
-	}
-
-	CLEDController *pCur = CLEDController::head();
-	while(pCur) {
-		uint8_t d = pCur->getDither();
-		if(m_nFPS < 100) { pCur->setDither(0); }
-		pCur->showColor(color, scale);
-		pCur->setDither(d);
-		pCur = pCur->next();
-	}
-	countFPS();
-}
-
-void CFastLED::clear(boolean writeData) {
-	if(writeData) {
-		showColor(CRGB(0,0,0), 0);
-	}
-    clearData();
-}
-
-void CFastLED::clearData() {
-	CLEDController *pCur = CLEDController::head();
-	while(pCur) {
-		pCur->clearLedData();
-		pCur = pCur->next();
-	}
-}
-
-void CFastLED::delay(unsigned long ms) {
-	unsigned long start = millis();
-        do {
-#ifndef FASTLED_ACCURATE_CLOCK
-		// make sure to allow at least one ms to pass to ensure the clock moves
-		// forward
-		::delay(1);
-#endif
-		show();
-#if defined(ARDUINO) && (ARDUINO > 150) && !defined(IS_BEAN) && !defined (ARDUINO_AVR_DIGISPARK)
-		yield();
-#endif
-	}
-	while((millis()-start) < ms);
-}
-
-void CFastLED::setTemperature(const struct CRGB & temp) {
-	CLEDController *pCur = CLEDController::head();
-	while(pCur) {
-		pCur->setTemperature(temp);
-		pCur = pCur->next();
-	}
-}
-
-void CFastLED::setCorrection(const struct CRGB & correction) {
-	CLEDController *pCur = CLEDController::head();
-	while(pCur) {
-		pCur->setCorrection(correction);
-		pCur = pCur->next();
-	}
-}
-
-void CFastLED::setDither(uint8_t ditherMode)  {
-	CLEDController *pCur = CLEDController::head();
-	while(pCur) {
-		pCur->setDither(ditherMode);
-		pCur = pCur->next();
-	}
-}
-
-//
-// template<int m, int n> void transpose8(unsigned char A[8], unsigned char B[8]) {
-// 	uint32_t x, y, t;
-//
-// 	// Load the array and pack it into x and y.
-//   	y = *(unsigned int*)(A);
-// 	x = *(unsigned int*)(A+4);
-//
-// 	// x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
-// 	// y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
-//
-        // // pre-transform x
-        // t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
-        // t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
-				//
-        // // pre-transform y
-        // t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
-        // t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
-				//
-        // // final transform
-        // t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
-        // y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
-        // x = t;
-//
-// 	B[7*n] = y; y >>= 8;
-// 	B[6*n] = y; y >>= 8;
-// 	B[5*n] = y; y >>= 8;
-// 	B[4*n] = y;
-//
-//   B[3*n] = x; x >>= 8;
-// 	B[2*n] = x; x >>= 8;
-// 	B[n] = x; x >>= 8;
-// 	B[0] = x;
-// 	// B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x>>0;
-// 	// B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y>>0;
-// }
-//
-// void transposeLines(Lines & out, Lines & in) {
-// 	transpose8<1,2>(in.bytes, out.bytes);
-// 	transpose8<1,2>(in.bytes + 8, out.bytes + 1);
-// }
-
-extern int noise_min;
-extern int noise_max;
-
-void CFastLED::countFPS(int nFrames) {
-  static int br = 0;
-  static uint32_t lastframe = 0; // millis();
-
-  if(br++ >= nFrames) {
-		uint32_t now = millis();
-		now -= lastframe;
-		m_nFPS = (br * 1000) / now;
-    br = 0;
-    lastframe = millis();
-  }
-}
-
-void CFastLED::setMaxRefreshRate(uint16_t refresh, bool constrain) {
-  if(constrain) {
-    // if we're constraining, the new value of m_nMinMicros _must_ be higher than previously (because we're only
-    // allowed to slow things down if constraining)
-    if(refresh > 0) {
-      m_nMinMicros = ( (1000000/refresh) >  m_nMinMicros) ? (1000000/refresh) : m_nMinMicros;
-    }
-  } else if(refresh > 0) {
-    m_nMinMicros = 1000000 / refresh;
-  } else {
-    m_nMinMicros = 0;
-  }
-}
-
-extern "C" int atexit(void (* /*func*/ )()) { return 0; }
-
-#ifdef NEED_CXX_BITS
-namespace __cxxabiv1
-{
-	#ifndef ESP8266
-	extern "C" void __cxa_pure_virtual (void) {}
-	#endif
-
-	/* guard variables */
-
-	/* The ABI requires a 64-bit type.  */
-	__extension__ typedef int __guard __attribute__((mode(__DI__)));
-
-	extern "C" int __cxa_guard_acquire (__guard *) __attribute__((weak));
-	extern "C" void __cxa_guard_release (__guard *) __attribute__((weak));
-	extern "C" void __cxa_guard_abort (__guard *) __attribute__((weak));
-
-	extern "C" int __cxa_guard_acquire (__guard *g)
-	{
-		return !*(char *)(g);
-	}
-
-	extern "C" void __cxa_guard_release (__guard *g)
-	{
-		*(char *)g = 1;
-	}
-
-	extern "C" void __cxa_guard_abort (__guard *)
-	{
-
-	}
-}
-#endif
-
-FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h-safe b/platforms/esp/32/clockless_esp32.h-safe
deleted file mode 100644
index 605ba28530..0000000000
--- a/platforms/esp/32/clockless_esp32.h-safe
+++ /dev/null
@@ -1,126 +0,0 @@
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-extern uint32_t _frame_cnt;
-extern uint32_t _retry_cnt;
-#endif
-
-// Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER> {
-
-    typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
-    typedef typename FastPin<DATA_PIN>::port_t data_t;
-
-    data_t mPinMask;
-    data_ptr_t mPort;
-    CMinWait<WAIT_TIME> mWait;
-public:
-    virtual void init() {
-	FastPin<DATA_PIN>::setOutput();
-	mPinMask = FastPin<DATA_PIN>::mask();
-	mPort = FastPin<DATA_PIN>::port();
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-	mWait.wait();
-	int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
-	while((showRGBInternal(pixels)==0) && cnt--) {
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	    _retry_cnt++;
-#endif
-	    ets_intr_unlock();
-	    delayMicroseconds(WAIT_TIME);
-	    ets_intr_lock();
-	}
-	mWait.mark();
-    }
-
-#define _ESP_ADJ (0)
-#define _ESP_ADJ2 (0)
-
-    template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b) {
-	b = ~b; b <<= 24;
-	for(register uint32_t i = BITS; i > 0; i--) {
-	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
-	    last_mark = __clock_cycles();
-	    FastPin<DATA_PIN>::hi();
-	    
-	    while((__clock_cycles() - last_mark) < T1);
-	    if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
-	    b <<= 1;
-	    
-	    while((__clock_cycles() - last_mark) < (T1+T2));
-	    FastPin<DATA_PIN>::lo();
-	}
-    }
-
-    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-    // gcc will use register Y for the this pointer.
-    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-	// Setup the pixel controller and load/scale the first byte
-	pixels.preStepFirstByteDithering();
-	register uint32_t b = pixels.loadAndScale0();
-	pixels.preStepFirstByteDithering();
-
-	ets_intr_lock();
-
-	uint32_t start = __clock_cycles();
-	uint32_t last_mark = start;
-	while(pixels.has(1)) {
-
-	    // Write first byte, read next byte
-	    writeBits<8+XTRA0>(last_mark, b);
-	    b = pixels.loadAndScale1();
-	    
-	    // Write second byte, read 3rd byte
-	    writeBits<8+XTRA0>(last_mark, b);
-	    b = pixels.loadAndScale2();
-	    
-	    // Write third byte, read 1st byte of next pixel
-	    writeBits<8+XTRA0>(last_mark, b);
-	    b = pixels.advanceAndLoadAndScale0();
-	    
-#if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_unlock();	    
-#endif
-
-	    pixels.stepDithering();
-	    
-#if (FASTLED_ALLOW_INTERRUPTS == 1)
-	    ets_intr_lock();
-	    // if interrupts took longer than 45µs, punt on the current frame
-	    if((int32_t)(__clock_cycles()-last_mark) > 0) {
-		if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) {
-		    ets_intr_unlock();
-		    return 0; 
-		}
-	    }
-#endif
-	};
-
-	ets_intr_unlock();
-
-#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	_frame_cnt++;
-#endif
-
-	return __clock_cycles() - start;
-    }
-};
-
-FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h~ b/platforms/esp/32/clockless_esp32.h~
deleted file mode 100644
index 0a3750e863..0000000000
--- a/platforms/esp/32/clockless_esp32.h~
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- */
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp32-hal.h"
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "driver/rmt.h"
-#include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
-#include "esp_log.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-// -- Configuration constants
-#define DIVIDER             4 /* 8 still seems to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-#define RMT_DURATION_NS  12.5 /* minimum time of a single RMT duration based on clock ns */
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))
-
-static uint8_t rmt_channels_used = 0;
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    rmt_item32_t mZero;
-    rmt_item32_t mOne;
-
-    rmt_channel_t mRMT_channel;
-    xSemaphoreHandle mTX_sem = NULL;
-    intr_handle_t mRMT_intr_handle = NULL;
-    
-    PixelController<RGB_ORDER> *local_pixels  = NULL;
-    uint8_t mRGB_channel;
-    uint16_t mCurPulse;
-
-public:
-
-    virtual void init()
-    {
-	// TRS = 50000;
-
-	// -- Precompute rmt items corresponding to a zero bit and a one bit
-	//    according to the timing values given in the template instantiation
-	// T1H
-	mOne.level0 = 1;
-	mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-	// T1L
-	mOne.level1 = 0;
-	mOne.duration1 = TO_RMT_CYCLES(T3);
-
-	// T0H
-	mZero.level0 = 1;
-	mZero.duration0 = TO_RMT_CYCLES(T1);
-	// T0L
-	mZero.level1 = 0;
-	mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-	// -- Sequentially assign RMT channels -- at most 8
-	mRMT_channel =  (rmt_channel_t) rmt_channels_used++;
-	if (mRMT_channel > 7) {
-	    assert("Only 8 RMT Channels are allowed");
-	}
-
-	ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
-
-	// -- RMT set up magic
-	DPORT_SET_PERI_REG_MASK(DPORT_PERIP_CLK_EN_REG, DPORT_RMT_CLK_EN);
-	DPORT_CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN_REG, DPORT_RMT_RST);
-
-	rmt_set_pin(static_cast<rmt_channel_t>(mRMT_channel),
-		    RMT_MODE_TX,
-		    static_cast<gpio_num_t>(DATA_PIN));
-
-	RMT.apb_conf.fifo_mask = 1;  //enable memory access, instead of FIFO mode.
-	RMT.apb_conf.mem_tx_wrap_en = 1; //wrap around when hitting end of buffer
-	
-	RMT.conf_ch[mRMT_channel].conf0.div_cnt = DIVIDER;
-	RMT.conf_ch[mRMT_channel].conf0.mem_size = 1;
-	RMT.conf_ch[mRMT_channel].conf0.carrier_en = 0;
-	RMT.conf_ch[mRMT_channel].conf0.carrier_out_lv = 1;
-	RMT.conf_ch[mRMT_channel].conf0.mem_pd = 0;
-	RMT.conf_ch[mRMT_channel].conf1.rx_en = 0;
-	RMT.conf_ch[mRMT_channel].conf1.mem_owner = 0;
-	RMT.conf_ch[mRMT_channel].conf1.tx_conti_mode = 0;    //loop back mode.
-	RMT.conf_ch[mRMT_channel].conf1.ref_always_on = 1;    // use apb clock: 80M
-	RMT.conf_ch[mRMT_channel].conf1.idle_out_en = 1;
-	RMT.conf_ch[mRMT_channel].conf1.idle_out_lv = 0;
-		
-	RMT.tx_lim_ch[mRMT_channel].limit = MAX_PULSES;
-	
-	RMT.int_ena.val |= BIT(24 + mRMT_channel); // set ch*_tx_thr_event
-	RMT.int_ena.val |= BIT(mRMT_channel * 3); // set ch*_tx_end
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-	esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, handleInterrupt, this, &mRMT_intr_handle);
-
-	// -- Initialize the local state, save a pointer to the pixel data
-	local_pixels = &pixels;
-	mCurPulse = 0;
-	mRGB_channel = 0;
-		
-	// -- Fill both halves of the buffer
-	copyToRmtBlock_half();
-	copyToRmtBlock_half();
-
-	mTX_sem = xSemaphoreCreateBinary();
-
-	// -- Start the RMT TX operationb
-	RMT.conf_ch[mRMT_channel].conf1.mem_rd_rst = 1;
-	RMT.conf_ch[mRMT_channel].conf1.tx_start = 1;
-
-	// -- Block until done
-	xSemaphoreTake(mTX_sem, portMAX_DELAY);
-
-	// -- When we get here, all of the data has been sent
-	vSemaphoreDelete(mTX_sem);
-	mTX_sem = NULL;
-
-	esp_intr_free(mRMT_intr_handle);
-    }
-
-    static void handleInterrupt(void *arg)
-    {
-	ClocklessController* c = static_cast<ClocklessController*>(arg);
-	rmt_channel_t rmt_channel = c->mRMT_channel;
-
-	portBASE_TYPE xHigherPriorityTaskWoken  = 0;
-
-	if (RMT.int_st.val & BIT(24 + rmt_channel)) { // check if ch*_tx_thr_event is set
-	    // -- Interrupt is telling us the RMT is ready for the next set of pulses
-	    c->copyToRmtBlock_half();
-	    RMT.int_clr.val |= BIT(24 + rmt_channel); // set ch*_tx_thr_event
-	}
-	else if ((RMT.int_st.val & BIT(rmt_channel * 3)) && c->mTX_sem) { // check if ch*_tx_end is set
-	    // -- Interrupt is telling us the RMT is done -- release the semaphore
-	    xSemaphoreGiveFromISR(c->mTX_sem, &xHigherPriorityTaskWoken);
-	    RMT.int_clr.val |= BIT(rmt_channel * 3); // set ch*_tx_end
-
-	    if (xHigherPriorityTaskWoken == pdTRUE) {
-		portYIELD_FROM_ISR();
-	    }
-	}
-    }
-
-    void copyToRmtBlock_half()
-    {
-	// -- Fill half of the RMT pulse buffer
-
-	//    The buffer holds 64 total pulse items, so this loop converts
-	//    as many pixels as can fit in half of the buffer (MAX_PULSES =
-	//    32 items). In our case, each pixel consists of three bytes,
-	//    each bit turns into one pulse item -- 24 items per pixel. So,
-	//    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-	//    The member variable mCurPulse keeps track of which of the 64
-	//    items we are writing. During the first call to this method it
-	//    fills 0-31; in the second call it fills 32-63, and then wraps
-	//    back around to zero.
-
-	//    When we run out of pixel data, just fill the remaining items
-	//    with zero pulses.
-	
-	uint16_t pulse_count = 0;
-	uint32_t byteval = 0;
-	while (local_pixels->has(1) && pulse_count < MAX_PULSES) {
-	    // -- Cycle through the R,G, and B values in the right order
-	    switch (mRGB_channel) {
-	    case 0:
-		byteval = local_pixels->loadAndScale0();
-		mRGB_channel = 1;
-		break;
-	    case 1:
-		byteval = local_pixels->loadAndScale1();
-		mRGB_channel = 2;
-		break;
-	    case 2:
-		byteval = local_pixels->loadAndScale2();
-		local_pixels->advanceData();
-		local_pixels->stepDithering();
-		mRGB_channel = 0;
-		break;
-	    default:
-		break;
-	    }
-
-	    byteval <<= 24;
-	    // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the rmt_item32_t value corresponding to the buffered bit value
-	    for (register uint32_t j = 0; j < 8; j++) {
-		uint32_t val = (byteval & 0x80000000L) ? mOne.val : mZero.val;
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-		byteval <<= 1;
-		mCurPulse++;
-		pulse_count++;
-	    }
-	}
-	
-	// -- Fill the remaining items with zero pulses
-	while (pulse_count < MAX_PULSES) {
-	    RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-	    mCurPulse++;
-	    pulse_count++;
-	}
-
-	// -- When we have filled the back half the buffer, reset the position to the first half
-	if (mCurPulse >= MAX_PULSES*2)
-	    mCurPulse = 0;
-    }
-};
-
-FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/led_sysdefs_esp32.h.orig b/platforms/esp/32/led_sysdefs_esp32.h.orig
deleted file mode 100644
index 4063cd542b..0000000000
--- a/platforms/esp/32/led_sysdefs_esp32.h.orig
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#ifndef ESP32
-#define ESP32
-#endif
-
-#define FASTLED_ESP32
-
-// Use system millis timer
-#define FASTLED_HAS_MILLIS
-
-typedef volatile uint32_t RoReg;
-typedef volatile uint32_t RwReg;
-typedef unsigned long prog_uint32_t;
-typedef bool boolean;
-
-// Default to NOT using PROGMEM here
-#ifndef FASTLED_USE_PROGMEM
-# define FASTLED_USE_PROGMEM 0
-#endif
-
-#ifndef FASTLED_ALLOW_INTERRUPTS
-<<<<<<< HEAD
-# define FASTLED_ALLOW_INTERRUPTS 0
-=======
-# define FASTLED_ALLOW_INTERRUPTS 1
->>>>>>> upstream/master
-# define INTERRUPT_THRESHOLD 0
-#endif
-
-#define NEED_CXX_BITS
-
-// These can be overridden
-#   define FASTLED_ESP32_RAW_PIN_ORDER
-
-// #define cli() os_intr_lock();
-// #define sei() os_intr_lock();

From 8e5b12e5921a2bd4fc1ac71e3e68a968c1efdcb4 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 5 Mar 2018 14:43:14 -0500
Subject: [PATCH 023/204] Fixed synchronization

The previous checkin had bugs in the syncronization that caused problems in parallel mode when strips are different lengths.
---
 platforms/esp/32/clockless_esp32.h | 318 +++++++++++++++++------------
 1 file changed, 189 insertions(+), 129 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 8d29f283ea..436ed488db 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -137,6 +137,16 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Number of cycles to reset the strip
 #define RMT_RESET_DURATION NS_TO_CYCLES(50000)
 
+// -- Parallel or serial outut
+#ifndef FASTLED_RMT_SERIAL_OUTPUT
+#define FASTLED_RMT_SERIAL_OUTPUT false
+#endif
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_CORE_DRIVER
+#define FASTLED_RMT_CORE_DRIVER false
+#endif
+
 // -- Global counter of channels used
 //    Each FastLED.addLeds uses the next consecutive channel
 static uint8_t gNextChannel;
@@ -162,6 +172,7 @@ static xSemaphoreHandle gTX_sem = NULL;
 
 // -- Globals to keep track of how many controllers have started and
 //    how many have finished
+static int gNumControllers = 0;
 static int gNumShowing = 0;
 static int gNumDone = 0;
 
@@ -211,16 +222,26 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mZero.level1 = 0;
         mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
 
+        // -- First time though: initialize the globals
+        if (gNextChannel == 0) {
+            for (int i = 0; i < 8; i++) {
+                gControllers[8] = 0;
+                gRefillFunctions[8] = 0;
+            }
+        }
+
         // -- Sequentially assign RMT channels -- at most 8
         mRMT_channel =  (rmt_channel_t) gNextChannel++;
         if (mRMT_channel > 7) {
             assert("Only 8 RMT Channels are allowed");
         }
 
+        gNumControllers++;
+
         // -- Save this controller object, indexed by the RMT channel
         //    This allows us to get the pointer inside the interrupt handler
         gControllers[mRMT_channel] = this;
-	gRefillFunctions[mRMT_channel] = &refillDispatcher;
+        gRefillFunctions[mRMT_channel] = &refillDispatcher;
 
         ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
 
@@ -240,32 +261,32 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Apply the configuration
         rmt_config(&rmt_tx);
 
-	// -- Allocate space for a cope of the pixels
-	// mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
-
-#ifdef FASTLED_RMT_CORE_DRIVER
-	// -- Use the built-in RMT driver. The only reason to choose
-	//    this option is if you have other parts of your code that
-	//    are using the RMT peripheral, and you want them to
-	//    co-exist with FastLED.
-	rmt_driver_install(mRMT_channel, 0, 0);
-#else
-	// -- Use the custom RMT driver implemented here, which computes
-        //    pulses on demand to reduce memory requirements and latency.
-
-        // -- Set up the RMT to send 1/2 of the pulse buffer and then
-        //    generate an interrupt. When we get this interrupt we
-        //    fill the other half in preparation (kind of like double-buffering)
-        rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
-
-        // -- Turn on the interrupts
-        rmt_set_tx_intr_en(mRMT_channel, true);
-
-        // -- Semaphore to signal completion of each show()
-        //    Only needed for serial output
-        mTX_sem = xSemaphoreCreateBinary();
-
-#endif
+        // -- Allocate space for a cope of the pixels
+        // mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
+
+        if (FASTLED_RMT_CORE_DRIVER) {
+            // -- Use the built-in RMT driver. The only reason to choose
+            //    this option is if you have other parts of your code that
+            //    are using the RMT peripheral, and you want them to
+            //    co-exist with FastLED.
+            rmt_driver_install(mRMT_channel, 0, 0);
+        } else {
+            // -- Use the custom RMT driver implemented here, which computes
+            //    pulses on demand to reduce memory requirements and latency.
+
+            // -- Set up the RMT to send 1/2 of the pulse buffer and then
+            //    generate an interrupt. When we get this interrupt we
+            //    fill the other half in preparation (kind of like double-buffering)
+            rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+
+            // -- Semaphore to signal completion of each show()
+            //    Only needed for serial output
+            mTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(mTX_sem);
+        }
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
@@ -276,75 +297,98 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         mWait.wait();
 
-	gNumShowing++;
+        gNumShowing++;
 
-#ifdef FASTLED_RMT_CORE_DRIVER
-	// -- Fill a big buffer with all of the pixel data
-	mBufferSize = pixels.size() * 3 * 8;
-	computeAllRMTItems(pixels);
+        if (FASTLED_RMT_CORE_DRIVER) {
+            // === Built-in RMT driver ===
 
-	// -- Serial or parallel
-	bool wait_done;
-#ifdef FASTLED_RMT_SERIAL_OUTPUT
-	wait_done = true;
-#else
-	// -- Only wait on the last channel
-	wait_done = (gNumShowing == gNextChannel);
-#endif
+            //    Fill a big buffer with all of the pixel data
+            mBufferSize = pixels.size() * 3 * 8;
+            computeAllRMTItems(pixels);
 
-	// -- Send it all at once using the built-in RMT driver
-	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
-	return;
-#endif
+            // -- Serial or parallel
+            bool wait_done;
 
-	// -- Create a global semaphore that signals when all the
-	//    controllers are done (only needed for parallel output).
-	if (gTX_sem == NULL)
-	    gTX_sem = xSemaphoreCreateBinary();
-
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-        // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
-	if (mPixels != NULL) delete mPixels;
-	mPixels = new PixelController<RGB_ORDER>(pixels);
-        mCurPulse = 0;
-        mRGB_channel = 0;
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                wait_done = true;
+            } else {
+                // -- Parallel: only wait on the last channel
+                wait_done = (gNumShowing == gNumControllers);
+            }
+
+            // -- Send it all at once using the built-in RMT driver
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
+
+        } else {
+            // === Custom RMT driver ===
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                // -- Local semaphore just for this controller
+                xSemaphoreTake(mTX_sem, portMAX_DELAY);
+            } else {
+                // -- Create a global semaphore that signals when all the
+                //    controllers are done
+                if (gTX_sem == NULL) {
+                    gTX_sem = xSemaphoreCreateBinary();
+                    xSemaphoreGive(gTX_sem);
+                }
+                if (gNumShowing == 1) {
+                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+                }
+            }
+
+            // -- Initialize the local state, save a pointer to the pixel
+            //    data. We need to make a copy because pixels is a local
+            //    variable in the calling function, and this data structure
+            //    needs to outlive this call to showPixels.
+            // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
+            if (mPixels != NULL) 
+                delete mPixels;
+            mPixels = new PixelController<RGB_ORDER>(pixels);
+            mCurPulse = 0;
+            mRGB_channel = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                // -- Block until this controller is done
+                //    All of the data transmission happens while we wait here
+                xSemaphoreTake(mTX_sem, portMAX_DELAY);
+                xSemaphoreGive(mTX_sem);
+        
+                // -- Turn off the interrupts
+                rmt_set_tx_intr_en(mRMT_channel, false);
+            } else {
+                // -- If this is the last controller, then this is the place to
+                //    wait for all the data to be sent.
+                if (gNumShowing == gNumControllers) {
+                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+                    xSemaphoreGive(gTX_sem);
+                }
+            }
+        }
+
+        // -- All controllers are done: reset the counters
+        if (gNumShowing == gNumControllers) {
+            gNumDone = 0;
+            gNumShowing = 0;
+        }
 
-        // -- Fill both halves of the buffer
-        fillHalfRMTBuffer();
-        fillHalfRMTBuffer();
-
-        // -- Allocate the interrupt if we have not done so yet. This
-        //    interrupt handler must work for all different kinds of
-        //    strips, so it delegates to the refill function for each
-        //    specific instantiation of ClocklessController.
-        if (gRMT_intr_handle == NULL)
-            esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-
-        // -- Turn on the interrupts
-        rmt_set_tx_intr_en(mRMT_channel, true);
-
-        // -- Start the RMT TX operation
-        rmt_tx_start(mRMT_channel, true);
-
-#ifdef FASTLED_RMT_SERIAL_OUTPUT
-        // -- Block until this controller is done
-        //    All of the data transmission happens while we wait here
-	xSemaphoreTake(mTX_sem, portMAX_DELAY);
-	
-        // -- Turn off the interrupts
-        rmt_set_tx_intr_en(mRMT_channel, false);
-#else
-	// -- If this is the last controller, then this is the place to
-        //    wait for all the data to be sent.
-	if (gNumShowing == gNextChannel) {
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	    gNumDone = 0;
-	    gNumShowing = 0;
-	}
-#endif
         mWait.mark();
     }
 
@@ -356,28 +400,36 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         uint8_t channel;
         portBASE_TYPE HPTaskAwoken = 0;
 
-	for (channel = 0; channel < 8; channel++) {
-	    int tx_done_bit = channel * 3;
-	    int tx_next_bit = channel + 24;
-	    if (intr_st & BIT(tx_done_bit)) {
-		// -- Transmission is complete, signal the semaphore that show() is finished
-		ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-		gNumDone++;
-#ifdef FASTLED_RMT_SERIAL_OUTPUT
-		xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
-#else
-		if (gNumDone == gNextChannel)
-		    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-#endif
-		if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-		RMT.int_clr.val = BIT(tx_done_bit);
-	    }
-	    if (intr_st & BIT(tx_next_bit)) {
-		// -- Look up the appropriate refill dispatcher and call it
-		(gRefillFunctions[channel])(channel);
-		RMT.int_clr.val = BIT(tx_next_bit);
-	    }
-	}
+        for (channel = 0; channel < 8; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (intr_st & BIT(tx_done_bit)) {
+                // -- Transmission is complete on this channel
+                RMT.int_clr.val |= BIT(tx_done_bit);
+                gNumDone++;
+
+                if (FASTLED_RMT_SERIAL_OUTPUT) {
+                    // -- Serial mode: unblock the call to showPixels for this strip
+                    ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+                    xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
+                } else {
+                    // -- Parallel mode: unblock the global semaphore when all strips are done
+                    if (gNumDone == gNumControllers)
+                        xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                }
+
+                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+            }
+
+            if (intr_st & BIT(tx_next_bit)) {
+                // -- More to send on this channel: call the appropriate refill function
+                //    Note that we refill the half of the buffer that we just finished,
+                //    allowing the other half to proceed.
+                RMT.int_clr.val |= BIT(tx_next_bit);
+                (gRefillFunctions[channel])(channel);
+            }
+        }
     }
 
     /* Refill the RMT buffer
@@ -387,11 +439,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
      */
     static IRAM_ATTR void refillDispatcher(uint8_t channel)
     {
-	ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-	controller->fillHalfRMTBuffer();
+        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+        controller->fillHalfRMTBuffer();
     }
 
-    void fillHalfRMTBuffer()
+    IRAM_ATTR void fillHalfRMTBuffer()
     {
         // -- Fill half of the RMT pulse buffer
 
@@ -409,10 +461,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    When we run out of pixel data, just fill the remaining items
         //    with zero pulses.
 
-	RMT.apb_conf.fifo_mask = RMT_DATA_MODE_MEM;
         uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
         uint32_t byteval = 0;
-        while (mPixels->has(1) && pulse_count < MAX_PULSES) {
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+        bool done_strip = false;
+
+        while (pulse_count < MAX_PULSES) {
+            if (! mPixels->has(1)) {
+                done_strip = true;
+                break;
+            }
+
             // -- Cycle through the R,G, and B values in the right order
             switch (mRGB_channel) {
             case 0:
@@ -437,7 +497,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
             for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? mOne.val : mZero.val;
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
                 RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
                 byteval <<= 1;
                 mCurPulse++;
@@ -447,7 +507,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
         // -- At the end, stretch out the last pulse to signal to the strip
         //    that we're done
-        if ( ! mPixels->has(1)) {
+        if (done_strip) {
             RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
 
             // -- And fill the remaining items with zero pulses. The zero values triggers
@@ -466,13 +526,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     
     void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
     {
-	// -- Compute the pulse values for the whole strip at once.
-	//    Requires a large buffer
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
 
-	// TODO: need a specific number here
-	if (mBuffer == NULL) {
-	    mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-	}
+        // TODO: need a specific number here
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
 
         mCurPulse = 0;
         mRGB_channel = 0;
@@ -502,14 +562,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
             for (register uint32_t j = 0; j < 8; j++) {
-		mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
                 byteval <<= 1;
                 mCurPulse++;
             }
         }
 
-	mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-	assert(mCurPulse == mBufferSize);
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
     }
 };
 

From 38dffc4b80f737da9613ee495894146f4f6df856 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 5 Apr 2018 23:23:46 -0400
Subject: [PATCH 024/204] Fixed a stupid bug

Made the code bullet-proof in a few ways, but most importantly fixed a terrible integer underflow bug in the code that fills the RMT buffer.
---
 platforms/esp/32/clockless_esp32.h  |  54 +--
 platforms/esp/32/clockless_esp32.h~ | 582 ++++++++++++++++++++++++++++
 2 files changed, 609 insertions(+), 27 deletions(-)
 create mode 100644 platforms/esp/32/clockless_esp32.h~

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 436ed488db..e3fdeb944e 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -149,7 +149,7 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 
 // -- Global counter of channels used
 //    Each FastLED.addLeds uses the next consecutive channel
-static uint8_t gNextChannel;
+static uint8_t gNextChannel = 0;
 
 // -- Global information for the interrupt handler
 //    Information is indexed by the RMT channel, so we can get it 
@@ -332,9 +332,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                     gTX_sem = xSemaphoreCreateBinary();
                     xSemaphoreGive(gTX_sem);
                 }
-                if (gNumShowing == 1) {
-                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-                }
             }
 
             // -- Initialize the local state, save a pointer to the pixel
@@ -404,30 +401,32 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             int tx_done_bit = channel * 3;
             int tx_next_bit = channel + 24;
 
-            if (intr_st & BIT(tx_done_bit)) {
-                // -- Transmission is complete on this channel
-                RMT.int_clr.val |= BIT(tx_done_bit);
-                gNumDone++;
-
-                if (FASTLED_RMT_SERIAL_OUTPUT) {
-                    // -- Serial mode: unblock the call to showPixels for this strip
-                    ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-                    xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
-                } else {
-                    // -- Parallel mode: unblock the global semaphore when all strips are done
-                    if (gNumDone == gNumControllers)
-                        xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if (gRefillFunctions[channel]) {
+                if (intr_st & BIT(tx_done_bit)) {
+                    // -- Transmission is complete on this channel
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+                    gNumDone++;
+
+                    if (FASTLED_RMT_SERIAL_OUTPUT) {
+                        // -- Serial mode: unblock the call to showPixels for this strip
+                        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+                        xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
+                    } else {
+                        // -- Parallel mode: unblock the global semaphore when all strips are done
+                        if (gNumDone == gNumControllers)
+                            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                    }
+
+                    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
                 }
 
-                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-            }
-
-            if (intr_st & BIT(tx_next_bit)) {
-                // -- More to send on this channel: call the appropriate refill function
-                //    Note that we refill the half of the buffer that we just finished,
-                //    allowing the other half to proceed.
-                RMT.int_clr.val |= BIT(tx_next_bit);
-                (gRefillFunctions[channel])(channel);
+                if (intr_st & BIT(tx_next_bit)) {
+                    // -- More to send on this channel: call the appropriate refill function
+                    //    Note that we refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+                    (gRefillFunctions[channel])(channel);
+                }
             }
         }
     }
@@ -508,7 +507,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- At the end, stretch out the last pulse to signal to the strip
         //    that we're done
         if (done_strip) {
-            RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+	    // AAAAAGGGG Integer underflow!!!
+            // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
 
             // -- And fill the remaining items with zero pulses. The zero values triggers
             //    the tx_done interrupt.
diff --git a/platforms/esp/32/clockless_esp32.h~ b/platforms/esp/32/clockless_esp32.h~
new file mode 100644
index 0000000000..70533b9f5e
--- /dev/null
+++ b/platforms/esp/32/clockless_esp32.h~
@@ -0,0 +1,582 @@
+/*
+ * Integration into FastLED ClocklessController 2017 Thomas Basler
+ *
+ * Modifications Copyright (c) 2017 Martin F. Falatic
+ *
+ * Modifications Copyright (c) 2018 Samuel Z. Guyer
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently with sequences of
+ * high/low bits. Memory for each channel is limited, however, so in
+ * order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * PARALLEL vs SERIAL
+ *
+ * By default, this driver sends the data for all LED strips in
+ * parallel. We get parallelism essentially for free because the RMT
+ * is an independent processing unit. It only interrupts the CPU when
+ * it needs more data to send, and the CPU is fast enough to keep all
+ * 8 channels filled.
+ *
+ * However, there may be cases where you want serial output -- that
+ * is, you want to send the data for each strip before moving on to
+ * the next one. The performance will be much lower, limiting the
+ * framerate. To force serial output, add this directive before you
+ * include FastLED.h:
+ *
+ *      #define FASTLED_RMT_SERIAL_OUTPUT
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT
+ * interrupts, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_CORE_DRIVER
+ *
+ * There is a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ * This driver assigns channels to LED strips sequentially starting at
+ * zero. So, for other RMT applications make sure to choose a channel
+ * at the higher end to avoid collisions.
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to reset the strip
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Parallel or serial outut
+#ifndef FASTLED_RMT_SERIAL_OUTPUT
+#define FASTLED_RMT_SERIAL_OUTPUT false
+#endif
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_CORE_DRIVER
+#define FASTLED_RMT_CORE_DRIVER false
+#endif
+
+// -- Global counter of channels used
+//    Each FastLED.addLeds uses the next consecutive channel
+static uint8_t gNextChannel = 1;
+
+// -- Global information for the interrupt handler
+//    Information is indexed by the RMT channel, so we can get it 
+//    when we are in the interrupt handler.
+static CLEDController * gControllers[8];
+
+typedef void (*RefillDispatcher_t)(uint8_t);
+static RefillDispatcher_t gRefillFunctions[8];
+
+static intr_handle_t gRMT_intr_handle;
+
+// -- Parallelize the output This works because most of the work of
+//    pumping out the bits is handled by the RMT peripheral, which we
+//    keep filled by responding to interrupts. All we need to do is
+//    detect when all of the channels have finished.
+
+// -- Global semaphore for the whole show process
+//    Only used in parallel output, to signal when all controllers are done
+static xSemaphoreHandle gTX_sem = NULL;
+
+// -- Globals to keep track of how many controllers have started and
+//    how many have finished
+static int gNumControllers = 0;
+static int gNumShowing = 0;
+static int gNumDone = 0;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t mRMT_channel;
+
+    // -- Semaphore to signal when show() is done
+    //    Per-controller, so only needed for serial output
+    xSemaphoreHandle mTX_sem = NULL;
+
+    // -- Timing values for zero and one bits
+    rmt_item32_t mZero;
+    rmt_item32_t mOne;
+
+    // -- State information for keeping track of where we are in the pixel data
+    PixelController<RGB_ORDER> * mPixels = NULL;
+    void * mPixelSpace = NULL;
+    uint8_t mRGB_channel;
+    uint16_t mCurPulse;
+    CMinWait<WAIT_TIME> mWait;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t mBufferSize;
+
+public:
+
+    virtual void init()
+    {
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+        // -- First time though: initialize the globals
+        if (gNextChannel == 0) {
+            for (int i = 0; i < 8; i++) {
+                gControllers[8] = 0;
+                gRefillFunctions[8] = 0;
+            }
+        }
+
+        // -- Sequentially assign RMT channels -- at most 8
+        mRMT_channel =  (rmt_channel_t) gNextChannel++;
+        if (mRMT_channel > 7) {
+            assert("Only 8 RMT Channels are allowed");
+        }
+
+        gNumControllers++;
+
+        // -- Save this controller object, indexed by the RMT channel
+        //    This allows us to get the pointer inside the interrupt handler
+        gControllers[mRMT_channel] = this;
+        gRefillFunctions[mRMT_channel] = &refillDispatcher;
+
+        ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
+
+        // -- RMT configuration for transmission
+        rmt_config_t rmt_tx;
+        rmt_tx.channel = mRMT_channel;
+        rmt_tx.rmt_mode = RMT_MODE_TX;
+        rmt_tx.gpio_num = gpio_num_t(DATA_PIN);
+        rmt_tx.mem_block_num = 1;
+        rmt_tx.clk_div = DIVIDER;
+        rmt_tx.tx_config.loop_en = false;
+        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+        rmt_tx.tx_config.carrier_en = false;
+        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+        rmt_tx.tx_config.idle_output_en = true;
+        
+        // -- Apply the configuration
+        rmt_config(&rmt_tx);
+
+        // -- Allocate space for a cope of the pixels
+        // mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
+
+        if (FASTLED_RMT_CORE_DRIVER) {
+            // -- Use the built-in RMT driver. The only reason to choose
+            //    this option is if you have other parts of your code that
+            //    are using the RMT peripheral, and you want them to
+            //    co-exist with FastLED.
+            rmt_driver_install(mRMT_channel, 0, 0);
+        } else {
+            // -- Use the custom RMT driver implemented here, which computes
+            //    pulses on demand to reduce memory requirements and latency.
+
+            // -- Set up the RMT to send 1/2 of the pulse buffer and then
+            //    generate an interrupt. When we get this interrupt we
+            //    fill the other half in preparation (kind of like double-buffering)
+            rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+
+            // -- Semaphore to signal completion of each show()
+            //    Only needed for serial output
+            mTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(mTX_sem);
+        }
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        mWait.wait();
+
+        gNumShowing++;
+
+        if (FASTLED_RMT_CORE_DRIVER) {
+            // === Built-in RMT driver ===
+
+            //    Fill a big buffer with all of the pixel data
+            mBufferSize = pixels.size() * 3 * 8;
+            computeAllRMTItems(pixels);
+
+            // -- Serial or parallel
+            bool wait_done;
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                wait_done = true;
+            } else {
+                // -- Parallel: only wait on the last channel
+                wait_done = (gNumShowing == gNumControllers);
+            }
+
+            // -- Send it all at once using the built-in RMT driver
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
+
+        } else {
+            // === Custom RMT driver ===
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                // -- Local semaphore just for this controller
+                xSemaphoreTake(mTX_sem, portMAX_DELAY);
+            } else {
+                // -- Create a global semaphore that signals when all the
+                //    controllers are done
+                if (gTX_sem == NULL) {
+                    gTX_sem = xSemaphoreCreateBinary();
+                    xSemaphoreGive(gTX_sem);
+                }
+                if (gNumShowing == 1) {
+                    xSemaphoreGive(gTX_sem);
+                }
+            }
+
+            // -- Initialize the local state, save a pointer to the pixel
+            //    data. We need to make a copy because pixels is a local
+            //    variable in the calling function, and this data structure
+            //    needs to outlive this call to showPixels.
+            // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
+            if (mPixels != NULL) 
+                delete mPixels;
+            mPixels = new PixelController<RGB_ORDER>(pixels);
+            mCurPulse = 0;
+            mRGB_channel = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                // -- Block until this controller is done
+                //    All of the data transmission happens while we wait here
+                xSemaphoreTake(mTX_sem, portMAX_DELAY);
+                xSemaphoreGive(mTX_sem);
+        
+                // -- Turn off the interrupts
+                rmt_set_tx_intr_en(mRMT_channel, false);
+            } else {
+                // -- If this is the last controller, then this is the place to
+                //    wait for all the data to be sent.
+                if (gNumShowing == gNumControllers) {
+                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+                    xSemaphoreGive(gTX_sem);
+                }
+            }
+        }
+
+        // -- All controllers are done: reset the counters
+        if (gNumShowing == gNumControllers) {
+            gNumDone = 0;
+            gNumShowing = 0;
+        }
+
+        //mWait.mark();
+    }
+
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        for (channel = 0; channel < 8; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (intr_st & BIT(tx_done_bit)) {
+                // -- Transmission is complete on this channel
+                RMT.int_clr.val |= BIT(tx_done_bit);
+		if (gRefillFunctions[channel]) {
+		    gNumDone++;
+
+		    if (FASTLED_RMT_SERIAL_OUTPUT) {
+			// -- Serial mode: unblock the call to showPixels for this strip
+			ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+			xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
+		    } else {
+			// -- Parallel mode: unblock the global semaphore when all strips are done
+			if (gNumDone == gNumControllers)
+			    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+		    }
+		}
+
+                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+            }
+
+            if (intr_st & BIT(tx_next_bit)) {
+                // -- More to send on this channel: call the appropriate refill function
+                //    Note that we refill the half of the buffer that we just finished,
+                //    allowing the other half to proceed.
+                RMT.int_clr.val |= BIT(tx_next_bit);
+                (gRefillFunctions[channel])(channel);
+            }
+	}
+    }
+
+    /* Refill the RMT buffer
+     * We need this dispatch function because there will be one for each instantiation of this template
+     * class -- in particular, one for each possible RGB_ORDER. We need to dispatch to the correct one
+     * so that fillHalfRMTBuffer will use the right ordering for this strip.
+     */
+    static IRAM_ATTR void refillDispatcher(uint8_t channel)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+        controller->fillHalfRMTBuffer();
+    }
+
+    IRAM_ATTR void fillHalfRMTBuffer()
+    {
+        // -- Fill half of the RMT pulse buffer
+
+        //    The buffer holds 64 total pulse items, so this loop converts
+        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
+        //    32 items). In our case, each pixel consists of three bytes,
+        //    each bit turns into one pulse item -- 24 items per pixel. So,
+        //    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+        //    The member variable mCurPulse keeps track of which of the 64
+        //    items we are writing. During the first call to this method it
+        //    fills 0-31; in the second call it fills 32-63, and then wraps
+        //    back around to zero.
+
+        //    When we run out of pixel data, just fill the remaining items
+        //    with zero pulses.
+
+        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
+        uint32_t byteval = 0;
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+        bool done_strip = false;
+
+	if (gNumDone == gNumControllers) {
+	    if (mPixels->has(1)) ;
+	}
+
+        while (pulse_count < MAX_PULSES) {
+            if (! mPixels->has(1)) {
+                done_strip = true;
+                break;
+            }
+
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+        
+        // -- At the end, stretch out the last pulse to signal to the strip
+        //    that we're done
+        if (done_strip) {
+            // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+
+            // -- And fill the remaining items with zero pulses. The zero values triggers
+            //    the tx_done interrupt.
+            while (pulse_count < MAX_PULSES) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+    
+    void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+
+        // TODO: need a specific number here
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        mCurPulse = 0;
+        mRGB_channel = 0;
+        uint32_t byteval = 0;
+        while (pixels.has(1)) {
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = pixels.loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = pixels.loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = pixels.loadAndScale2();
+                pixels.advanceData();
+                pixels.stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+};
+
+FASTLED_NAMESPACE_END

From cad7225f2c7c333e8647801b7dfb235d6b175675 Mon Sep 17 00:00:00 2001
From: Marc MERLIN <marc_soft@merlins.org>
Date: Sun, 15 Apr 2018 17:23:39 -0700
Subject: [PATCH 025/204] WAIT_TIME=20 to allow interrupts without breaking
 matrix output.

WAIT_TIME set to 5 was too short and FastLED aborted frames too early
and restarted them incorrectly.
(as explained by Daniel Garcia in
https://plus.google.com/communities/109127054924227823508 )

Tested on ESP8266 with 768 LED matrix in both serialized output
and 3x 256 output.
---
 platforms/esp/8266/clockless_block_esp8266.h | 2 +-
 platforms/esp/8266/clockless_esp8266.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/8266/clockless_block_esp8266.h b/platforms/esp/8266/clockless_block_esp8266.h
index e3341d1613..8ea18bb0a6 100644
--- a/platforms/esp/8266/clockless_block_esp8266.h
+++ b/platforms/esp/8266/clockless_block_esp8266.h
@@ -17,7 +17,7 @@ extern uint32_t _frame_cnt;
 extern uint32_t _retry_cnt;
 #endif
 
-template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 20>
 class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
 	typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
 	typedef typename FastPin<FIRST_PIN>::port_t data_t;
diff --git a/platforms/esp/8266/clockless_esp8266.h b/platforms/esp/8266/clockless_esp8266.h
index 20ae641270..80fe60ec32 100644
--- a/platforms/esp/8266/clockless_esp8266.h
+++ b/platforms/esp/8266/clockless_esp8266.h
@@ -16,7 +16,7 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 
 #define FASTLED_HAS_CLOCKLESS 1
 
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 20>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
 	typedef typename FastPin<DATA_PIN>::port_t data_t;

From 38bca0ea6b4ca458a96d903099e16831badf7126 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 2 May 2018 22:10:53 -0400
Subject: [PATCH 026/204] Another major overhaul

The big change in this version is the ability to support more than 8 controllers. Instead of assigning RMT channels to controllers in a fixed mapping, channels are assigned on the fly, allowing the driver to reuse channels as they become available.
---
 platforms/esp/32/clockless_esp32.h      | 474 +++++++++----------
 platforms/esp/32/clockless_esp32.h-safe | 579 ++++++++++++++++++++++++
 platforms/esp/32/clockless_esp32.h~     | 469 +++++++++----------
 3 files changed, 1017 insertions(+), 505 deletions(-)
 create mode 100644 platforms/esp/32/clockless_esp32.h-safe

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index e3fdeb944e..2163a2bd87 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -25,21 +25,21 @@
  * sent. This strategy effectively overlaps computation (by the CPU)
  * and communication (by the RMT).
  *
- * PARALLEL vs SERIAL
+ * Since the RMT device only has 8 channels, we need a strategy to
+ * allow more than 8 LED controllers. Our driver assigns controllers
+ * to channels on the fly, queuing up controllers as necessary until a
+ * channel is free. The main showPixels routine just fires off the
+ * first 8 controllers; the interrupt handler starts new controllers
+ * asynchronously as previous ones finish. So, for example, it should
+ * be able to send the data for 8 controllers at once, but 16
+ * controllers would take approximately twice as much time.
  *
- * By default, this driver sends the data for all LED strips in
- * parallel. We get parallelism essentially for free because the RMT
- * is an independent processing unit. It only interrupts the CPU when
- * it needs more data to send, and the CPU is fast enough to keep all
- * 8 channels filled.
+ * There is a #define that allows a program to control the total
+ * number of channels that the driver is allowed to use. It defaults
+ * to 8 -- use all the channels. Setting it to 1, for example, results
+ * in fully serial output:
  *
- * However, there may be cases where you want serial output -- that
- * is, you want to send the data for each strip before moving on to
- * the next one. The performance will be much lower, limiting the
- * framerate. To force serial output, add this directive before you
- * include FastLED.h:
- *
- *      #define FASTLED_RMT_SERIAL_OUTPUT
+ *     #define FASTLED_RMT_MAX_CHANNELS 1
  *
  * OTHER RMT APPLICATIONS
  *
@@ -50,18 +50,15 @@
  * co-exist. To switch to this mode, add the following directive
  * before you include FastLED.h:
  *
- *      #define FASTLED_RMT_CORE_DRIVER
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
  *
- * There is a performance penalty for using this mode. We need to
+ * There may be a performance penalty for using this mode. We need to
  * compute the RMT signal for the entire LED strip ahead of time,
  * rather than overlapping it with communication. We also need a large
  * buffer to hold the signal specification. Each bit of pixel data is
  * represented by a 32-bit pulse specification, so it is a 32X blow-up
  * in memory use.
  *
- * This driver assigns channels to LED strips sequentially starting at
- * zero. So, for other RMT applications make sure to choose a channel
- * at the higher end to avoid collisions.
  *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
@@ -134,47 +131,39 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Convert ESP32 cycles to RMT cycles
 #define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
 
-// -- Number of cycles to reset the strip
+// -- Number of cycles to signal the strip to latch
 #define RMT_RESET_DURATION NS_TO_CYCLES(50000)
 
-// -- Parallel or serial outut
-#ifndef FASTLED_RMT_SERIAL_OUTPUT
-#define FASTLED_RMT_SERIAL_OUTPUT false
-#endif
-
 // -- Core or custom driver
-#ifndef FASTLED_RMT_CORE_DRIVER
-#define FASTLED_RMT_CORE_DRIVER false
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
 #endif
 
-// -- Global counter of channels used
-//    Each FastLED.addLeds uses the next consecutive channel
-static uint8_t gNextChannel = 0;
+// -- Array of all controllers
+static CLEDController * gControllers[32];
 
-// -- Global information for the interrupt handler
-//    Information is indexed by the RMT channel, so we can get it 
-//    when we are in the interrupt handler.
-static CLEDController * gControllers[8];
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
 
-typedef void (*RefillDispatcher_t)(uint8_t);
-static RefillDispatcher_t gRefillFunctions[8];
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
 
-static intr_handle_t gRMT_intr_handle;
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
 
-// -- Parallelize the output This works because most of the work of
-//    pumping out the bits is handled by the RMT peripheral, which we
-//    keep filled by responding to interrupts. All we need to do is
-//    detect when all of the channels have finished.
+static intr_handle_t gRMT_intr_handle;
 
 // -- Global semaphore for the whole show process
-//    Only used in parallel output, to signal when all controllers are done
+//    Semaphore is not given until all data has been sent
 static xSemaphoreHandle gTX_sem = NULL;
 
-// -- Globals to keep track of how many controllers have started and
-//    how many have finished
-static int gNumControllers = 0;
-static int gNumShowing = 0;
-static int gNumDone = 0;
+static bool gInitialized = false;
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
@@ -182,9 +171,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- RMT has 8 channels, numbered 0 to 7
     rmt_channel_t mRMT_channel;
 
-    // -- Semaphore to signal when show() is done
-    //    Per-controller, so only needed for serial output
-    xSemaphoreHandle mTX_sem = NULL;
+    // -- Store the GPIO pin
+    gpio_num_t mPin;
 
     // -- Timing values for zero and one bits
     rmt_item32_t mZero;
@@ -222,227 +210,214 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mZero.level1 = 0;
         mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
 
-        // -- First time though: initialize the globals
-        if (gNextChannel == 0) {
-            for (int i = 0; i < 8; i++) {
-                gControllers[8] = 0;
-                gRefillFunctions[8] = 0;
-            }
-        }
-
-        // -- Sequentially assign RMT channels -- at most 8
-        mRMT_channel =  (rmt_channel_t) gNextChannel++;
-        if (mRMT_channel > 7) {
-            assert("Only 8 RMT Channels are allowed");
-        }
-
+	gControllers[gNumControllers] = this;
         gNumControllers++;
 
-        // -- Save this controller object, indexed by the RMT channel
-        //    This allows us to get the pointer inside the interrupt handler
-        gControllers[mRMT_channel] = this;
-        gRefillFunctions[mRMT_channel] = &refillDispatcher;
-
-        ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
-
-        // -- RMT configuration for transmission
-        rmt_config_t rmt_tx;
-        rmt_tx.channel = mRMT_channel;
-        rmt_tx.rmt_mode = RMT_MODE_TX;
-        rmt_tx.gpio_num = gpio_num_t(DATA_PIN);
-        rmt_tx.mem_block_num = 1;
-        rmt_tx.clk_div = DIVIDER;
-        rmt_tx.tx_config.loop_en = false;
-        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-        rmt_tx.tx_config.carrier_en = false;
-        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-        rmt_tx.tx_config.idle_output_en = true;
-        
-        // -- Apply the configuration
-        rmt_config(&rmt_tx);
-
-        // -- Allocate space for a cope of the pixels
-        // mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
-
-        if (FASTLED_RMT_CORE_DRIVER) {
-            // -- Use the built-in RMT driver. The only reason to choose
-            //    this option is if you have other parts of your code that
-            //    are using the RMT peripheral, and you want them to
-            //    co-exist with FastLED.
-            rmt_driver_install(mRMT_channel, 0, 0);
-        } else {
-            // -- Use the custom RMT driver implemented here, which computes
-            //    pulses on demand to reduce memory requirements and latency.
-
-            // -- Set up the RMT to send 1/2 of the pulse buffer and then
-            //    generate an interrupt. When we get this interrupt we
-            //    fill the other half in preparation (kind of like double-buffering)
-            rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-
-            // -- Semaphore to signal completion of each show()
-            //    Only needed for serial output
-            mTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(mTX_sem);
-        }
+	mPin = gpio_num_t(DATA_PIN);
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    void initRMT()
     {
-        mWait.wait();
-
-        gNumShowing++;
-
-        if (FASTLED_RMT_CORE_DRIVER) {
-            // === Built-in RMT driver ===
-
-            //    Fill a big buffer with all of the pixel data
-            mBufferSize = pixels.size() * 3 * 8;
-            computeAllRMTItems(pixels);
-
-            // -- Serial or parallel
-            bool wait_done;
+	for (int i = 0; i < 8; i++) {
+	    gOnChannel[i] = NULL;
+
+	    // -- RMT configuration for transmission
+	    rmt_config_t rmt_tx;
+	    rmt_tx.channel = rmt_channel_t(rmt_channel_t(i));
+	    rmt_tx.rmt_mode = RMT_MODE_TX;
+	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+	    rmt_tx.mem_block_num = 1;
+	    rmt_tx.clk_div = DIVIDER;
+	    rmt_tx.tx_config.loop_en = false;
+	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+	    rmt_tx.tx_config.carrier_en = false;
+	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+	    rmt_tx.tx_config.idle_output_en = true;
+		
+	    // -- Apply the configuration
+	    rmt_config(&rmt_tx);
+
+	    // -- Set up the RMT to send 1/2 of the pulse buffer and then
+	    //    generate an interrupt. When we get this interrupt we
+	    //    fill the other half in preparation (kind of like double-buffering)
+	    rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+	}
+
+	// -- Create a semaphore to block execution until all the controllers are done
+	if (gTX_sem == NULL) {
+	    gTX_sem = xSemaphoreCreateBinary();
+	    xSemaphoreGive(gTX_sem);
+	}
+		
+	// -- Allocate the interrupt if we have not done so yet. This
+	//    interrupt handler must work for all different kinds of
+	//    strips, so it delegates to the refill function for each
+	//    specific instantiation of ClocklessController.
+	if (gRMT_intr_handle == NULL)
+	    esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+
+	gInitialized = true;
+    }
 
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                wait_done = true;
-            } else {
-                // -- Parallel: only wait on the last channel
-                wait_done = (gNumShowing == gNumControllers);
-            }
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+	if (gNumStarted == 0) {
+	    // -- First controller: make sure everything is set up
+	    if (! gInitialized) initRMT();
+
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	}
+
+	// -- Initialize the local state, save a pointer to the pixel
+	//    data. We need to make a copy because pixels is a local
+	//    variable in the calling function, and this data structure
+	//    needs to outlive this call to showPixels.
+
+	if (mPixels != NULL) 
+	    delete mPixels;
+
+	mPixels = new PixelController<RGB_ORDER>(pixels);
+	
+	// -- Keep track of the number of strips we've seen
+	gNumStarted++;
+
+	// -- The last call to showPixels is the one responsible for doing
+	//    all of the actual worl
+	if (gNumStarted == gNumControllers) {
+	    gNext = 0;
+
+	    // -- First, fill all the available channels
+	    int channel = 0;
+	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+		startNext(channel);
+		channel++;
+	    }
+
+	    // -- Wait here while the rest of the data is sent. The interrupt handler
+	    //    will keep refilling the RMT buffers until it is all sent; then it
+	    //    gives the semaphore back.
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	    xSemaphoreGive(gTX_sem);
+
+	    // -- Reset the counters
+	    gNumStarted = 0;
+	    gNumDone = 0;
+	    gNext = 0;
+	}
+    }
 
-            // -- Send it all at once using the built-in RMT driver
-            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
-
-        } else {
-            // === Custom RMT driver ===
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                // -- Local semaphore just for this controller
-                xSemaphoreTake(mTX_sem, portMAX_DELAY);
-            } else {
-                // -- Create a global semaphore that signals when all the
-                //    controllers are done
-                if (gTX_sem == NULL) {
-                    gTX_sem = xSemaphoreCreateBinary();
-                    xSemaphoreGive(gTX_sem);
-                }
-            }
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the appropriate
+    //    startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+	if (gNext < gNumControllers) {
+	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+	    pController->startOnChannel(channel);
+	    gNext++;
+	}
+    }
 
-            // -- Initialize the local state, save a pointer to the pixel
-            //    data. We need to make a copy because pixels is a local
-            //    variable in the calling function, and this data structure
-            //    needs to outlive this call to showPixels.
-            // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
-            if (mPixels != NULL) 
-                delete mPixels;
-            mPixels = new PixelController<RGB_ORDER>(pixels);
-            mCurPulse = 0;
-            mRGB_channel = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                // -- Block until this controller is done
-                //    All of the data transmission happens while we wait here
-                xSemaphoreTake(mTX_sem, portMAX_DELAY);
-                xSemaphoreGive(mTX_sem);
-        
-                // -- Turn off the interrupts
-                rmt_set_tx_intr_en(mRMT_channel, false);
-            } else {
-                // -- If this is the last controller, then this is the place to
-                //    wait for all the data to be sent.
-                if (gNumShowing == gNumControllers) {
-                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-                    xSemaphoreGive(gTX_sem);
-                }
-            }
-        }
+    virtual void startOnChannel(int channel)
+    {
+	// -- Assign this channel and configure the RMT
+	mRMT_channel = rmt_channel_t(channel);
+
+	// -- Store a reference to this controller, so we can get it
+	//    inside the interrupt handler
+	gOnChannel[channel] = this;
+
+	// -- Assign the pin to this channel
+	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+	if (FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Use the built-in RMT driver to send all the data in one shot
+	    rmt_register_tx_end_callback(doneOnChannel, 0);
+	    writeAllRMTItems();
+	} else {
+	    // -- Use our custom driver to send the data incrementally
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	
+	    // -- Initialize the counters that keep track of where we are in
+	    //    the pixel data.
+	    mCurPulse = 0;
+	    mRGB_channel = 0;
+
+	    // -- Fill both halves of the buffer
+	    fillHalfRMTBuffer();
+	    fillHalfRMTBuffer();
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	    
+	    // -- Start the RMT TX operation
+	    rmt_tx_start(mRMT_channel, true);
+	}
+    }
 
-        // -- All controllers are done: reset the counters
-        if (gNumShowing == gNumControllers) {
-            gNumDone = 0;
-            gNumShowing = 0;
-        }
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
 
-        mWait.mark();
+	// -- Turn off output on the pin
+	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+	gOnChannel[channel] = NULL;
+	gNumDone++;
+
+	if (gNumDone == gNumControllers) {
+	    // -- If this is the last controller, signal that we are all done
+	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+	} else {
+	    // -- Otherwise, if there are still controllers waiting, then
+	    //    start the next one on this channel
+	    if (gNext < gNumControllers)
+		startNext(channel);
+	}
     }
-
+    
     static IRAM_ATTR void interruptHandler(void *arg)
     {
         // -- The basic structure of this code is borrowed from the
         //    interrupt handler in esp-idf/components/driver/rmt.c
         uint32_t intr_st = RMT.int_st.val;
         uint8_t channel;
-        portBASE_TYPE HPTaskAwoken = 0;
 
         for (channel = 0; channel < 8; channel++) {
             int tx_done_bit = channel * 3;
             int tx_next_bit = channel + 24;
 
-            if (gRefillFunctions[channel]) {
-                if (intr_st & BIT(tx_done_bit)) {
-                    // -- Transmission is complete on this channel
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    gNumDone++;
-
-                    if (FASTLED_RMT_SERIAL_OUTPUT) {
-                        // -- Serial mode: unblock the call to showPixels for this strip
-                        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-                        xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
-                    } else {
-                        // -- Parallel mode: unblock the global semaphore when all strips are done
-                        if (gNumDone == gNumControllers)
-                            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-                    }
-
-                    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-                }
+            if (gOnChannel[channel] != NULL) {
+
+		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
 
+		// -- More to send on this channel
                 if (intr_st & BIT(tx_next_bit)) {
-                    // -- More to send on this channel: call the appropriate refill function
-                    //    Note that we refill the half of the buffer that we just finished,
+		    RMT.int_clr.val |= BIT(tx_next_bit);
+
+                    // -- Refill the half of the buffer that we just finished,
                     //    allowing the other half to proceed.
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-                    (gRefillFunctions[channel])(channel);
+		    controller->fillHalfRMTBuffer();
+                }
+
+		// -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+		    doneOnChannel(rmt_channel_t(channel), 0);
                 }
             }
         }
     }
 
-    /* Refill the RMT buffer
-     * We need this dispatch function because there will be one for each instantiation of this template
-     * class -- in particular, one for each possible RGB_ORDER. We need to dispatch to the correct one
-     * so that fillHalfRMTBuffer will use the right ordering for this strip.
-     */
-    static IRAM_ATTR void refillDispatcher(uint8_t channel)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-        controller->fillHalfRMTBuffer();
-    }
-
-    IRAM_ATTR void fillHalfRMTBuffer()
+    virtual void fillHalfRMTBuffer()
     {
         // -- Fill half of the RMT pulse buffer
 
@@ -502,14 +477,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 mCurPulse++;
                 pulse_count++;
             }
+
+	    if (done_strip)
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
         }
         
-        // -- At the end, stretch out the last pulse to signal to the strip
-        //    that we're done
         if (done_strip) {
-	    // AAAAAGGGG Integer underflow!!!
-            // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-
             // -- And fill the remaining items with zero pulses. The zero values triggers
             //    the tx_done interrupt.
             while (pulse_count < MAX_PULSES) {
@@ -523,11 +496,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (mCurPulse >= MAX_PULSES*2)
             mCurPulse = 0;
     }
-    
-    void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
+
+    virtual void writeAllRMTItems()
     {
         // -- Compute the pulse values for the whole strip at once.
         //    Requires a large buffer
+	mBufferSize = mPixels->size() * 3 * 8;
 
         // TODO: need a specific number here
         if (mBuffer == NULL) {
@@ -537,21 +511,21 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mCurPulse = 0;
         mRGB_channel = 0;
         uint32_t byteval = 0;
-        while (pixels.has(1)) {
+        while (mPixels->has(1)) {
             // -- Cycle through the R,G, and B values in the right order
             switch (mRGB_channel) {
             case 0:
-                byteval = pixels.loadAndScale0();
+                byteval = mPixels->loadAndScale0();
                 mRGB_channel = 1;
                 break;
             case 1:
-                byteval = pixels.loadAndScale1();
+                byteval = mPixels->loadAndScale1();
                 mRGB_channel = 2;
                 break;
             case 2:
-                byteval = pixels.loadAndScale2();
-                pixels.advanceData();
-                pixels.stepDithering();
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
                 mRGB_channel = 0;
                 break;
             default:
@@ -570,6 +544,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
         assert(mCurPulse == mBufferSize);
+
+	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
     }
 };
 
diff --git a/platforms/esp/32/clockless_esp32.h-safe b/platforms/esp/32/clockless_esp32.h-safe
new file mode 100644
index 0000000000..e86842bfac
--- /dev/null
+++ b/platforms/esp/32/clockless_esp32.h-safe
@@ -0,0 +1,579 @@
+/*
+ * Integration into FastLED ClocklessController 2017 Thomas Basler
+ *
+ * Modifications Copyright (c) 2017 Martin F. Falatic
+ *
+ * Modifications Copyright (c) 2018 Samuel Z. Guyer
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently with sequences of
+ * high/low bits. Memory for each channel is limited, however, so in
+ * order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * PARALLEL vs SERIAL
+ *
+ * By default, this driver sends the data for all LED strips in
+ * parallel. We get parallelism essentially for free because the RMT
+ * is an independent processing unit. It only interrupts the CPU when
+ * it needs more data to send, and the CPU is fast enough to keep all
+ * 8 channels filled.
+ *
+ * However, there may be cases where you want serial output -- that
+ * is, you want to send the data for each strip before moving on to
+ * the next one. The performance will be much lower, limiting the
+ * framerate. To force serial output, add this directive before you
+ * include FastLED.h:
+ *
+ *      #define FASTLED_RMT_SERIAL_OUTPUT
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT
+ * interrupts, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_CORE_DRIVER
+ *
+ * There is a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ * This driver assigns channels to LED strips sequentially starting at
+ * zero. So, for other RMT applications make sure to choose a channel
+ * at the higher end to avoid collisions.
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to reset the strip
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Parallel or serial outut
+#ifndef FASTLED_RMT_SERIAL_OUTPUT
+#define FASTLED_RMT_SERIAL_OUTPUT false
+#endif
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_CORE_DRIVER
+#define FASTLED_RMT_CORE_DRIVER false
+#endif
+
+// -- Global counter of channels used
+//    Each FastLED.addLeds uses the next consecutive channel
+static uint8_t gNextChannel = 0;
+
+// -- Global information for the interrupt handler
+//    Information is indexed by the RMT channel, so we can get it 
+//    when we are in the interrupt handler.
+static CLEDController * gControllers[8];
+
+typedef void (*RefillDispatcher_t)(uint8_t);
+static RefillDispatcher_t gRefillFunctions[8];
+
+static intr_handle_t gRMT_intr_handle;
+
+// -- Parallelize the output This works because most of the work of
+//    pumping out the bits is handled by the RMT peripheral, which we
+//    keep filled by responding to interrupts. All we need to do is
+//    detect when all of the channels have finished.
+
+// -- Global semaphore for the whole show process
+//    Only used in parallel output, to signal when all controllers are done
+static xSemaphoreHandle gTX_sem = NULL;
+
+// -- Globals to keep track of how many controllers have started and
+//    how many have finished
+static int gNumControllers = 0;
+static int gNumShowing = 0;
+static int gNumDone = 0;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t mRMT_channel;
+
+    // -- Semaphore to signal when show() is done
+    //    Per-controller, so only needed for serial output
+    xSemaphoreHandle mTX_sem = NULL;
+
+    // -- Timing values for zero and one bits
+    rmt_item32_t mZero;
+    rmt_item32_t mOne;
+
+    // -- State information for keeping track of where we are in the pixel data
+    PixelController<RGB_ORDER> * mPixels = NULL;
+    void * mPixelSpace = NULL;
+    uint8_t mRGB_channel;
+    uint16_t mCurPulse;
+    CMinWait<WAIT_TIME> mWait;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t mBufferSize;
+
+public:
+
+    virtual void init()
+    {
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+        // -- First time though: initialize the globals
+        if (gNextChannel == 0) {
+            for (int i = 0; i < 8; i++) {
+                gControllers[8] = 0;
+                gRefillFunctions[8] = 0;
+            }
+        }
+
+        // -- Sequentially assign RMT channels -- at most 8
+        mRMT_channel =  (rmt_channel_t) gNextChannel++;
+        if (mRMT_channel > 7) {
+            assert("Only 8 RMT Channels are allowed");
+        }
+
+        gNumControllers++;
+
+        // -- Save this controller object, indexed by the RMT channel
+        //    This allows us to get the pointer inside the interrupt handler
+        gControllers[mRMT_channel] = this;
+        gRefillFunctions[mRMT_channel] = &refillDispatcher;
+
+        ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
+
+        // -- RMT configuration for transmission
+        rmt_config_t rmt_tx;
+        rmt_tx.channel = mRMT_channel;
+        rmt_tx.rmt_mode = RMT_MODE_TX;
+        rmt_tx.gpio_num = gpio_num_t(DATA_PIN);
+        rmt_tx.mem_block_num = 1;
+        rmt_tx.clk_div = DIVIDER;
+        rmt_tx.tx_config.loop_en = false;
+        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+        rmt_tx.tx_config.carrier_en = false;
+        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+        rmt_tx.tx_config.idle_output_en = true;
+        
+        // -- Apply the configuration
+        rmt_config(&rmt_tx);
+
+        // -- Allocate space for a cope of the pixels
+        // mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
+
+        if (FASTLED_RMT_CORE_DRIVER) {
+            // -- Use the built-in RMT driver. The only reason to choose
+            //    this option is if you have other parts of your code that
+            //    are using the RMT peripheral, and you want them to
+            //    co-exist with FastLED.
+            rmt_driver_install(mRMT_channel, 0, 0);
+        } else {
+            // -- Use the custom RMT driver implemented here, which computes
+            //    pulses on demand to reduce memory requirements and latency.
+
+            // -- Set up the RMT to send 1/2 of the pulse buffer and then
+            //    generate an interrupt. When we get this interrupt we
+            //    fill the other half in preparation (kind of like double-buffering)
+            rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+
+            // -- Semaphore to signal completion of each show()
+            //    Only needed for serial output
+            mTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(mTX_sem);
+        }
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        mWait.wait();
+
+        gNumShowing++;
+
+        if (FASTLED_RMT_CORE_DRIVER) {
+            // === Built-in RMT driver ===
+
+            //    Fill a big buffer with all of the pixel data
+            mBufferSize = pixels.size() * 3 * 8;
+            computeAllRMTItems(pixels);
+
+            // -- Serial or parallel
+            bool wait_done;
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                wait_done = true;
+            } else {
+                // -- Parallel: only wait on the last channel
+                wait_done = (gNumShowing == gNumControllers);
+            }
+
+            // -- Send it all at once using the built-in RMT driver
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
+
+        } else {
+            // === Custom RMT driver ===
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                // -- Local semaphore just for this controller
+                xSemaphoreTake(mTX_sem, portMAX_DELAY);
+            } else {
+                // -- Create a global semaphore that signals when all the
+                //    controllers are done
+                if (gTX_sem == NULL) {
+                    gTX_sem = xSemaphoreCreateBinary();
+                    xSemaphoreGive(gTX_sem);
+                }
+		if (gNumShowing == 1) {
+		    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+		}
+            }
+
+            // -- Initialize the local state, save a pointer to the pixel
+            //    data. We need to make a copy because pixels is a local
+            //    variable in the calling function, and this data structure
+            //    needs to outlive this call to showPixels.
+            // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
+            if (mPixels != NULL) 
+                delete mPixels;
+            mPixels = new PixelController<RGB_ORDER>(pixels);
+            mCurPulse = 0;
+            mRGB_channel = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+
+            if (FASTLED_RMT_SERIAL_OUTPUT) {
+                // -- Block until this controller is done
+                //    All of the data transmission happens while we wait here
+                xSemaphoreTake(mTX_sem, portMAX_DELAY);
+                xSemaphoreGive(mTX_sem);
+        
+                // -- Turn off the interrupts
+                rmt_set_tx_intr_en(mRMT_channel, false);
+            } else {
+                // -- If this is the last controller, then this is the place to
+                //    wait for all the data to be sent.
+                if (gNumShowing == gNumControllers) {
+                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+                    xSemaphoreGive(gTX_sem);
+                }
+            }
+        }
+
+        // -- All controllers are done: reset the counters
+        if (gNumShowing == gNumControllers) {
+            gNumDone = 0;
+            gNumShowing = 0;
+        }
+
+        mWait.mark();
+    }
+
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        for (channel = 0; channel < 8; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (gRefillFunctions[channel]) {
+                if (intr_st & BIT(tx_done_bit)) {
+                    // -- Transmission is complete on this channel
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+                    gNumDone++;
+
+                    if (FASTLED_RMT_SERIAL_OUTPUT) {
+                        // -- Serial mode: unblock the call to showPixels for this strip
+                        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+                        xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
+                    } else {
+                        // -- Parallel mode: unblock the global semaphore when all strips are done
+                        if (gNumDone == gNumControllers)
+                            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                    }
+
+                    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+                }
+
+                if (intr_st & BIT(tx_next_bit)) {
+                    // -- More to send on this channel: call the appropriate refill function
+                    //    Note that we refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+                    (gRefillFunctions[channel])(channel);
+                }
+            }
+        }
+    }
+
+    /* Refill the RMT buffer
+     * We need this dispatch function because there will be one for each instantiation of this template
+     * class -- in particular, one for each possible RGB_ORDER. We need to dispatch to the correct one
+     * so that fillHalfRMTBuffer will use the right ordering for this strip.
+     */
+    static IRAM_ATTR void refillDispatcher(uint8_t channel)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
+        controller->fillHalfRMTBuffer();
+    }
+
+    IRAM_ATTR void fillHalfRMTBuffer()
+    {
+        // -- Fill half of the RMT pulse buffer
+
+        //    The buffer holds 64 total pulse items, so this loop converts
+        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
+        //    32 items). In our case, each pixel consists of three bytes,
+        //    each bit turns into one pulse item -- 24 items per pixel. So,
+        //    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+        //    The member variable mCurPulse keeps track of which of the 64
+        //    items we are writing. During the first call to this method it
+        //    fills 0-31; in the second call it fills 32-63, and then wraps
+        //    back around to zero.
+
+        //    When we run out of pixel data, just fill the remaining items
+        //    with zero pulses.
+
+        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
+        uint32_t byteval = 0;
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+        bool done_strip = false;
+
+        while (pulse_count < MAX_PULSES) {
+            if (! mPixels->has(1)) {
+                done_strip = true;
+                break;
+            }
+
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+        
+        // -- At the end, stretch out the last pulse to signal to the strip
+        //    that we're done
+        if (done_strip) {
+	    // AAAAAGGGG Integer underflow!!!
+            // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+
+            // -- And fill the remaining items with zero pulses. The zero values triggers
+            //    the tx_done interrupt.
+            while (pulse_count < MAX_PULSES) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulse_count++;
+            }
+        }
+
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+    
+    void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+
+        // TODO: need a specific number here
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        mCurPulse = 0;
+        mRGB_channel = 0;
+        uint32_t byteval = 0;
+        while (pixels.has(1)) {
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = pixels.loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = pixels.loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = pixels.loadAndScale2();
+                pixels.advanceData();
+                pixels.stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h~ b/platforms/esp/32/clockless_esp32.h~
index 70533b9f5e..2c1bbf5bc0 100644
--- a/platforms/esp/32/clockless_esp32.h~
+++ b/platforms/esp/32/clockless_esp32.h~
@@ -25,21 +25,8 @@
  * sent. This strategy effectively overlaps computation (by the CPU)
  * and communication (by the RMT).
  *
- * PARALLEL vs SERIAL
+ * 
  *
- * By default, this driver sends the data for all LED strips in
- * parallel. We get parallelism essentially for free because the RMT
- * is an independent processing unit. It only interrupts the CPU when
- * it needs more data to send, and the CPU is fast enough to keep all
- * 8 channels filled.
- *
- * However, there may be cases where you want serial output -- that
- * is, you want to send the data for each strip before moving on to
- * the next one. The performance will be much lower, limiting the
- * framerate. To force serial output, add this directive before you
- * include FastLED.h:
- *
- *      #define FASTLED_RMT_SERIAL_OUTPUT
  *
  * OTHER RMT APPLICATIONS
  *
@@ -50,18 +37,15 @@
  * co-exist. To switch to this mode, add the following directive
  * before you include FastLED.h:
  *
- *      #define FASTLED_RMT_CORE_DRIVER
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
  *
- * There is a performance penalty for using this mode. We need to
+ * There may be a performance penalty for using this mode. We need to
  * compute the RMT signal for the entire LED strip ahead of time,
  * rather than overlapping it with communication. We also need a large
  * buffer to hold the signal specification. Each bit of pixel data is
  * represented by a 32-bit pulse specification, so it is a 32X blow-up
  * in memory use.
  *
- * This driver assigns channels to LED strips sequentially starting at
- * zero. So, for other RMT applications make sure to choose a channel
- * at the higher end to avoid collisions.
  *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
@@ -134,47 +118,39 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Convert ESP32 cycles to RMT cycles
 #define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
 
-// -- Number of cycles to reset the strip
+// -- Number of cycles to signal the strip to latch
 #define RMT_RESET_DURATION NS_TO_CYCLES(50000)
 
-// -- Parallel or serial outut
-#ifndef FASTLED_RMT_SERIAL_OUTPUT
-#define FASTLED_RMT_SERIAL_OUTPUT false
-#endif
-
 // -- Core or custom driver
-#ifndef FASTLED_RMT_CORE_DRIVER
-#define FASTLED_RMT_CORE_DRIVER false
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
 #endif
 
-// -- Global counter of channels used
-//    Each FastLED.addLeds uses the next consecutive channel
-static uint8_t gNextChannel = 1;
+// -- Array of all controllers
+static CLEDController * gControllers[32];
+
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
 
-// -- Global information for the interrupt handler
-//    Information is indexed by the RMT channel, so we can get it 
-//    when we are in the interrupt handler.
-static CLEDController * gControllers[8];
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
 
-typedef void (*RefillDispatcher_t)(uint8_t);
-static RefillDispatcher_t gRefillFunctions[8];
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
 
 static intr_handle_t gRMT_intr_handle;
 
-// -- Parallelize the output This works because most of the work of
-//    pumping out the bits is handled by the RMT peripheral, which we
-//    keep filled by responding to interrupts. All we need to do is
-//    detect when all of the channels have finished.
-
 // -- Global semaphore for the whole show process
-//    Only used in parallel output, to signal when all controllers are done
+//    Semaphore is not given until all data has been sent
 static xSemaphoreHandle gTX_sem = NULL;
 
-// -- Globals to keep track of how many controllers have started and
-//    how many have finished
-static int gNumControllers = 0;
-static int gNumShowing = 0;
-static int gNumDone = 0;
+static bool gInitialized = false;
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
@@ -182,9 +158,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- RMT has 8 channels, numbered 0 to 7
     rmt_channel_t mRMT_channel;
 
-    // -- Semaphore to signal when show() is done
-    //    Per-controller, so only needed for serial output
-    xSemaphoreHandle mTX_sem = NULL;
+    // -- Store the GPIO pin
+    gpio_num_t mPin;
 
     // -- Timing values for zero and one bits
     rmt_item32_t mZero;
@@ -222,230 +197,214 @@ public:
         mZero.level1 = 0;
         mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
 
-        // -- First time though: initialize the globals
-        if (gNextChannel == 0) {
-            for (int i = 0; i < 8; i++) {
-                gControllers[8] = 0;
-                gRefillFunctions[8] = 0;
-            }
-        }
-
-        // -- Sequentially assign RMT channels -- at most 8
-        mRMT_channel =  (rmt_channel_t) gNextChannel++;
-        if (mRMT_channel > 7) {
-            assert("Only 8 RMT Channels are allowed");
-        }
-
+	gControllers[gNumControllers] = this;
         gNumControllers++;
 
-        // -- Save this controller object, indexed by the RMT channel
-        //    This allows us to get the pointer inside the interrupt handler
-        gControllers[mRMT_channel] = this;
-        gRefillFunctions[mRMT_channel] = &refillDispatcher;
-
-        ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
-
-        // -- RMT configuration for transmission
-        rmt_config_t rmt_tx;
-        rmt_tx.channel = mRMT_channel;
-        rmt_tx.rmt_mode = RMT_MODE_TX;
-        rmt_tx.gpio_num = gpio_num_t(DATA_PIN);
-        rmt_tx.mem_block_num = 1;
-        rmt_tx.clk_div = DIVIDER;
-        rmt_tx.tx_config.loop_en = false;
-        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-        rmt_tx.tx_config.carrier_en = false;
-        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-        rmt_tx.tx_config.idle_output_en = true;
-        
-        // -- Apply the configuration
-        rmt_config(&rmt_tx);
-
-        // -- Allocate space for a cope of the pixels
-        // mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
-
-        if (FASTLED_RMT_CORE_DRIVER) {
-            // -- Use the built-in RMT driver. The only reason to choose
-            //    this option is if you have other parts of your code that
-            //    are using the RMT peripheral, and you want them to
-            //    co-exist with FastLED.
-            rmt_driver_install(mRMT_channel, 0, 0);
-        } else {
-            // -- Use the custom RMT driver implemented here, which computes
-            //    pulses on demand to reduce memory requirements and latency.
-
-            // -- Set up the RMT to send 1/2 of the pulse buffer and then
-            //    generate an interrupt. When we get this interrupt we
-            //    fill the other half in preparation (kind of like double-buffering)
-            rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-
-            // -- Semaphore to signal completion of each show()
-            //    Only needed for serial output
-            mTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(mTX_sem);
-        }
+	mPin = gpio_num_t(DATA_PIN);
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    void initRMT()
     {
-        mWait.wait();
+	for (int i = 0; i < 8; i++) {
+	    gOnChannel[i] = NULL;
+
+	    // -- RMT configuration for transmission
+	    rmt_config_t rmt_tx;
+	    rmt_tx.channel = rmt_channel_t(rmt_channel_t(i));
+	    rmt_tx.rmt_mode = RMT_MODE_TX;
+	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+	    rmt_tx.mem_block_num = 1;
+	    rmt_tx.clk_div = DIVIDER;
+	    rmt_tx.tx_config.loop_en = false;
+	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+	    rmt_tx.tx_config.carrier_en = false;
+	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+	    rmt_tx.tx_config.idle_output_en = true;
+		
+	    // -- Apply the configuration
+	    rmt_config(&rmt_tx);
+
+	    // -- Set up the RMT to send 1/2 of the pulse buffer and then
+	    //    generate an interrupt. When we get this interrupt we
+	    //    fill the other half in preparation (kind of like double-buffering)
+	    rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+	}
 
-        gNumShowing++;
+	// -- Create a semaphore to block execution until all the controllers are done
+	if (gTX_sem == NULL) {
+	    gTX_sem = xSemaphoreCreateBinary();
+	    xSemaphoreGive(gTX_sem);
+	}
+		
+	// -- Allocate the interrupt if we have not done so yet. This
+	//    interrupt handler must work for all different kinds of
+	//    strips, so it delegates to the refill function for each
+	//    specific instantiation of ClocklessController.
+	if (gRMT_intr_handle == NULL)
+	    esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+
+	gInitialized = true;
+    }
 
-        if (FASTLED_RMT_CORE_DRIVER) {
-            // === Built-in RMT driver ===
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+	if (gNumStarted == 0) {
+	    // -- First controller: make sure everything is set up
+	    if (! gInitialized) initRMT();
 
-            //    Fill a big buffer with all of the pixel data
-            mBufferSize = pixels.size() * 3 * 8;
-            computeAllRMTItems(pixels);
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	}
 
-            // -- Serial or parallel
-            bool wait_done;
+	// -- Initialize the local state, save a pointer to the pixel
+	//    data. We need to make a copy because pixels is a local
+	//    variable in the calling function, and this data structure
+	//    needs to outlive this call to showPixels.
+
+	if (mPixels != NULL) 
+	    delete mPixels;
+
+	mPixels = new PixelController<RGB_ORDER>(pixels);
+	
+	// -- Keep track of the number of strips we've seen
+	gNumStarted++;
+
+	// -- The last call to showPixels is the one responsible for doing
+	//    all of the actual worl
+	if (gNumStarted == gNumControllers) {
+	    gNext = 0;
+
+	    // -- First, fill all the available channels
+	    int channel = 0;
+	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+		startNext(channel);
+		channel++;
+	    }
+
+	    // -- Wait here while the rest of the data is sent. The interrupt handler
+	    //    will keep refilling the RMT buffers until it is all sent; then it
+	    //    gives the semaphore back.
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	    xSemaphoreGive(gTX_sem);
+
+	    // -- Reset the counters
+	    gNumStarted = 0;
+	    gNumDone = 0;
+	    gNext = 0;
+	}
+    }
 
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                wait_done = true;
-            } else {
-                // -- Parallel: only wait on the last channel
-                wait_done = (gNumShowing == gNumControllers);
-            }
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the appropriate
+    //    startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+	if (gNext < gNumControllers) {
+	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+	    pController->startOnChannel(channel);
+	    gNext++;
+	}
+    }
 
-            // -- Send it all at once using the built-in RMT driver
-            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
-
-        } else {
-            // === Custom RMT driver ===
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                // -- Local semaphore just for this controller
-                xSemaphoreTake(mTX_sem, portMAX_DELAY);
-            } else {
-                // -- Create a global semaphore that signals when all the
-                //    controllers are done
-                if (gTX_sem == NULL) {
-                    gTX_sem = xSemaphoreCreateBinary();
-                    xSemaphoreGive(gTX_sem);
-                }
-                if (gNumShowing == 1) {
-                    xSemaphoreGive(gTX_sem);
-                }
-            }
+    virtual void startOnChannel(int channel)
+    {
+	// -- Assign this channel and configure the RMT
+	mRMT_channel = rmt_channel_t(channel);
+
+	// -- Store a reference to this controller, so we can get it
+	//    inside the interrupt handler
+	gOnChannel[channel] = this;
+
+	// -- Assign the pin to this channel
+	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+	if (FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Use the built-in RMT driver to send all the data in one shot
+	    rmt_register_tx_end_callback(doneOnChannel, 0);
+	    writeAllRMTItems();
+	} else {
+	    // -- Use our custom driver to send the data incrementally
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	
+	    // -- Initialize the counters that keep track of where we are in
+	    //    the pixel data.
+	    mCurPulse = 0;
+	    mRGB_channel = 0;
+
+	    // -- Fill both halves of the buffer
+	    fillHalfRMTBuffer();
+	    fillHalfRMTBuffer();
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	    
+	    // -- Start the RMT TX operation
+	    rmt_tx_start(mRMT_channel, true);
+	}
+    }
 
-            // -- Initialize the local state, save a pointer to the pixel
-            //    data. We need to make a copy because pixels is a local
-            //    variable in the calling function, and this data structure
-            //    needs to outlive this call to showPixels.
-            // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
-            if (mPixels != NULL) 
-                delete mPixels;
-            mPixels = new PixelController<RGB_ORDER>(pixels);
-            mCurPulse = 0;
-            mRGB_channel = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                // -- Block until this controller is done
-                //    All of the data transmission happens while we wait here
-                xSemaphoreTake(mTX_sem, portMAX_DELAY);
-                xSemaphoreGive(mTX_sem);
-        
-                // -- Turn off the interrupts
-                rmt_set_tx_intr_en(mRMT_channel, false);
-            } else {
-                // -- If this is the last controller, then this is the place to
-                //    wait for all the data to be sent.
-                if (gNumShowing == gNumControllers) {
-                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-                    xSemaphoreGive(gTX_sem);
-                }
-            }
-        }
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
 
-        // -- All controllers are done: reset the counters
-        if (gNumShowing == gNumControllers) {
-            gNumDone = 0;
-            gNumShowing = 0;
-        }
+	// -- Turn off output on the pin
+	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
 
-        //mWait.mark();
-    }
+	gOnChannel[channel] = NULL;
+	gNumDone++;
 
+	if (gNumDone == gNumControllers) {
+	    // -- If this is the last controller, signal that we are all done
+	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+	} else {
+	    // -- Otherwise, if there are still controllers waiting, then
+	    //    start the next one on this channel
+	    if (gNext < gNumControllers)
+		startNext(channel);
+	}
+    }
+    
     static IRAM_ATTR void interruptHandler(void *arg)
     {
         // -- The basic structure of this code is borrowed from the
         //    interrupt handler in esp-idf/components/driver/rmt.c
         uint32_t intr_st = RMT.int_st.val;
         uint8_t channel;
-        portBASE_TYPE HPTaskAwoken = 0;
 
         for (channel = 0; channel < 8; channel++) {
             int tx_done_bit = channel * 3;
             int tx_next_bit = channel + 24;
 
-            if (intr_st & BIT(tx_done_bit)) {
-                // -- Transmission is complete on this channel
-                RMT.int_clr.val |= BIT(tx_done_bit);
-		if (gRefillFunctions[channel]) {
-		    gNumDone++;
-
-		    if (FASTLED_RMT_SERIAL_OUTPUT) {
-			// -- Serial mode: unblock the call to showPixels for this strip
-			ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-			xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
-		    } else {
-			// -- Parallel mode: unblock the global semaphore when all strips are done
-			if (gNumDone == gNumControllers)
-			    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-		    }
-		}
-
-                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-            }
+            if (gOnChannel[channel] != NULL) {
 
-            if (intr_st & BIT(tx_next_bit)) {
-                // -- More to send on this channel: call the appropriate refill function
-                //    Note that we refill the half of the buffer that we just finished,
-                //    allowing the other half to proceed.
-                RMT.int_clr.val |= BIT(tx_next_bit);
-                (gRefillFunctions[channel])(channel);
-            }
-	}
-    }
+		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
 
-    /* Refill the RMT buffer
-     * We need this dispatch function because there will be one for each instantiation of this template
-     * class -- in particular, one for each possible RGB_ORDER. We need to dispatch to the correct one
-     * so that fillHalfRMTBuffer will use the right ordering for this strip.
-     */
-    static IRAM_ATTR void refillDispatcher(uint8_t channel)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-        controller->fillHalfRMTBuffer();
+		// -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+		    RMT.int_clr.val |= BIT(tx_next_bit);
+
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+		    controller->fillHalfRMTBuffer();
+                }
+
+		// -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+		    doneOnChannel(rmt_channel_t(channel), 0);
+                }
+            }
+        }
     }
 
-    IRAM_ATTR void fillHalfRMTBuffer()
+    virtual void fillHalfRMTBuffer()
     {
         // -- Fill half of the RMT pulse buffer
 
@@ -469,10 +428,6 @@ protected:
         uint32_t zero_val = mZero.val;
         bool done_strip = false;
 
-	if (gNumDone == gNumControllers) {
-	    if (mPixels->has(1)) ;
-	}
-
         while (pulse_count < MAX_PULSES) {
             if (! mPixels->has(1)) {
                 done_strip = true;
@@ -509,13 +464,12 @@ protected:
                 mCurPulse++;
                 pulse_count++;
             }
+
+	    if (done_strip)
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
         }
         
-        // -- At the end, stretch out the last pulse to signal to the strip
-        //    that we're done
         if (done_strip) {
-            // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-
             // -- And fill the remaining items with zero pulses. The zero values triggers
             //    the tx_done interrupt.
             while (pulse_count < MAX_PULSES) {
@@ -529,11 +483,12 @@ protected:
         if (mCurPulse >= MAX_PULSES*2)
             mCurPulse = 0;
     }
-    
-    void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
+
+    virtual void writeAllRMTItems()
     {
         // -- Compute the pulse values for the whole strip at once.
         //    Requires a large buffer
+	mBufferSize = mPixels->size() * 3 * 8;
 
         // TODO: need a specific number here
         if (mBuffer == NULL) {
@@ -543,21 +498,21 @@ protected:
         mCurPulse = 0;
         mRGB_channel = 0;
         uint32_t byteval = 0;
-        while (pixels.has(1)) {
+        while (mPixels->has(1)) {
             // -- Cycle through the R,G, and B values in the right order
             switch (mRGB_channel) {
             case 0:
-                byteval = pixels.loadAndScale0();
+                byteval = mPixels->loadAndScale0();
                 mRGB_channel = 1;
                 break;
             case 1:
-                byteval = pixels.loadAndScale1();
+                byteval = mPixels->loadAndScale1();
                 mRGB_channel = 2;
                 break;
             case 2:
-                byteval = pixels.loadAndScale2();
-                pixels.advanceData();
-                pixels.stepDithering();
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
                 mRGB_channel = 0;
                 break;
             default:
@@ -576,6 +531,8 @@ protected:
 
         mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
         assert(mCurPulse == mBufferSize);
+
+	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
     }
 };
 

From 7901f7d6f773dcb41e0187a77c7c2b97432bad16 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 2 May 2018 22:55:00 -0400
Subject: [PATCH 027/204] Oops

Didn't mean to check these in.
---
 platforms/esp/32/clockless_esp32.h-safe | 579 ------------------------
 platforms/esp/32/clockless_esp32.h~     | 539 ----------------------
 2 files changed, 1118 deletions(-)
 delete mode 100644 platforms/esp/32/clockless_esp32.h-safe
 delete mode 100644 platforms/esp/32/clockless_esp32.h~

diff --git a/platforms/esp/32/clockless_esp32.h-safe b/platforms/esp/32/clockless_esp32.h-safe
deleted file mode 100644
index e86842bfac..0000000000
--- a/platforms/esp/32/clockless_esp32.h-safe
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Modifications Copyright (c) 2018 Samuel Z. Guyer
- *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently with sequences of
- * high/low bits. Memory for each channel is limited, however, so in
- * order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * PARALLEL vs SERIAL
- *
- * By default, this driver sends the data for all LED strips in
- * parallel. We get parallelism essentially for free because the RMT
- * is an independent processing unit. It only interrupts the CPU when
- * it needs more data to send, and the CPU is fast enough to keep all
- * 8 channels filled.
- *
- * However, there may be cases where you want serial output -- that
- * is, you want to send the data for each strip before moving on to
- * the next one. The performance will be much lower, limiting the
- * framerate. To force serial output, add this directive before you
- * include FastLED.h:
- *
- *      #define FASTLED_RMT_SERIAL_OUTPUT
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT
- * interrupts, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_CORE_DRIVER
- *
- * There is a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- * This driver assigns channels to LED strips sequentially starting at
- * zero. So, for other RMT applications make sure to choose a channel
- * at the higher end to avoid collisions.
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- */
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp32-hal.h"
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "driver/rmt.h"
-#include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
-#include "esp_log.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to reset the strip
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Parallel or serial outut
-#ifndef FASTLED_RMT_SERIAL_OUTPUT
-#define FASTLED_RMT_SERIAL_OUTPUT false
-#endif
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_CORE_DRIVER
-#define FASTLED_RMT_CORE_DRIVER false
-#endif
-
-// -- Global counter of channels used
-//    Each FastLED.addLeds uses the next consecutive channel
-static uint8_t gNextChannel = 0;
-
-// -- Global information for the interrupt handler
-//    Information is indexed by the RMT channel, so we can get it 
-//    when we are in the interrupt handler.
-static CLEDController * gControllers[8];
-
-typedef void (*RefillDispatcher_t)(uint8_t);
-static RefillDispatcher_t gRefillFunctions[8];
-
-static intr_handle_t gRMT_intr_handle;
-
-// -- Parallelize the output This works because most of the work of
-//    pumping out the bits is handled by the RMT peripheral, which we
-//    keep filled by responding to interrupts. All we need to do is
-//    detect when all of the channels have finished.
-
-// -- Global semaphore for the whole show process
-//    Only used in parallel output, to signal when all controllers are done
-static xSemaphoreHandle gTX_sem = NULL;
-
-// -- Globals to keep track of how many controllers have started and
-//    how many have finished
-static int gNumControllers = 0;
-static int gNumShowing = 0;
-static int gNumDone = 0;
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t mRMT_channel;
-
-    // -- Semaphore to signal when show() is done
-    //    Per-controller, so only needed for serial output
-    xSemaphoreHandle mTX_sem = NULL;
-
-    // -- Timing values for zero and one bits
-    rmt_item32_t mZero;
-    rmt_item32_t mOne;
-
-    // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels = NULL;
-    void * mPixelSpace = NULL;
-    uint8_t mRGB_channel;
-    uint16_t mCurPulse;
-    CMinWait<WAIT_TIME> mWait;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t mBufferSize;
-
-public:
-
-    virtual void init()
-    {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-        // -- First time though: initialize the globals
-        if (gNextChannel == 0) {
-            for (int i = 0; i < 8; i++) {
-                gControllers[8] = 0;
-                gRefillFunctions[8] = 0;
-            }
-        }
-
-        // -- Sequentially assign RMT channels -- at most 8
-        mRMT_channel =  (rmt_channel_t) gNextChannel++;
-        if (mRMT_channel > 7) {
-            assert("Only 8 RMT Channels are allowed");
-        }
-
-        gNumControllers++;
-
-        // -- Save this controller object, indexed by the RMT channel
-        //    This allows us to get the pointer inside the interrupt handler
-        gControllers[mRMT_channel] = this;
-        gRefillFunctions[mRMT_channel] = &refillDispatcher;
-
-        ESP_LOGI("fastled", "RMT Channel Init: %d", mRMT_channel);
-
-        // -- RMT configuration for transmission
-        rmt_config_t rmt_tx;
-        rmt_tx.channel = mRMT_channel;
-        rmt_tx.rmt_mode = RMT_MODE_TX;
-        rmt_tx.gpio_num = gpio_num_t(DATA_PIN);
-        rmt_tx.mem_block_num = 1;
-        rmt_tx.clk_div = DIVIDER;
-        rmt_tx.tx_config.loop_en = false;
-        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-        rmt_tx.tx_config.carrier_en = false;
-        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-        rmt_tx.tx_config.idle_output_en = true;
-        
-        // -- Apply the configuration
-        rmt_config(&rmt_tx);
-
-        // -- Allocate space for a cope of the pixels
-        // mPixelSpace = malloc(sizeof(PixelController<RGB_ORDER>));
-
-        if (FASTLED_RMT_CORE_DRIVER) {
-            // -- Use the built-in RMT driver. The only reason to choose
-            //    this option is if you have other parts of your code that
-            //    are using the RMT peripheral, and you want them to
-            //    co-exist with FastLED.
-            rmt_driver_install(mRMT_channel, 0, 0);
-        } else {
-            // -- Use the custom RMT driver implemented here, which computes
-            //    pulses on demand to reduce memory requirements and latency.
-
-            // -- Set up the RMT to send 1/2 of the pulse buffer and then
-            //    generate an interrupt. When we get this interrupt we
-            //    fill the other half in preparation (kind of like double-buffering)
-            rmt_set_tx_thr_intr_en(mRMT_channel, true, MAX_PULSES);
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-
-            // -- Semaphore to signal completion of each show()
-            //    Only needed for serial output
-            mTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(mTX_sem);
-        }
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-        mWait.wait();
-
-        gNumShowing++;
-
-        if (FASTLED_RMT_CORE_DRIVER) {
-            // === Built-in RMT driver ===
-
-            //    Fill a big buffer with all of the pixel data
-            mBufferSize = pixels.size() * 3 * 8;
-            computeAllRMTItems(pixels);
-
-            // -- Serial or parallel
-            bool wait_done;
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                wait_done = true;
-            } else {
-                // -- Parallel: only wait on the last channel
-                wait_done = (gNumShowing == gNumControllers);
-            }
-
-            // -- Send it all at once using the built-in RMT driver
-            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, wait_done);
-
-        } else {
-            // === Custom RMT driver ===
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                // -- Local semaphore just for this controller
-                xSemaphoreTake(mTX_sem, portMAX_DELAY);
-            } else {
-                // -- Create a global semaphore that signals when all the
-                //    controllers are done
-                if (gTX_sem == NULL) {
-                    gTX_sem = xSemaphoreCreateBinary();
-                    xSemaphoreGive(gTX_sem);
-                }
-		if (gNumShowing == 1) {
-		    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-		}
-            }
-
-            // -- Initialize the local state, save a pointer to the pixel
-            //    data. We need to make a copy because pixels is a local
-            //    variable in the calling function, and this data structure
-            //    needs to outlive this call to showPixels.
-            // mPixels = new (mPixelSpace) PixelController<RGB_ORDER>(pixels);
-            if (mPixels != NULL) 
-                delete mPixels;
-            mPixels = new PixelController<RGB_ORDER>(pixels);
-            mCurPulse = 0;
-            mRGB_channel = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-
-            if (FASTLED_RMT_SERIAL_OUTPUT) {
-                // -- Block until this controller is done
-                //    All of the data transmission happens while we wait here
-                xSemaphoreTake(mTX_sem, portMAX_DELAY);
-                xSemaphoreGive(mTX_sem);
-        
-                // -- Turn off the interrupts
-                rmt_set_tx_intr_en(mRMT_channel, false);
-            } else {
-                // -- If this is the last controller, then this is the place to
-                //    wait for all the data to be sent.
-                if (gNumShowing == gNumControllers) {
-                    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-                    xSemaphoreGive(gTX_sem);
-                }
-            }
-        }
-
-        // -- All controllers are done: reset the counters
-        if (gNumShowing == gNumControllers) {
-            gNumDone = 0;
-            gNumShowing = 0;
-        }
-
-        mWait.mark();
-    }
-
-    static IRAM_ATTR void interruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        for (channel = 0; channel < 8; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gRefillFunctions[channel]) {
-                if (intr_st & BIT(tx_done_bit)) {
-                    // -- Transmission is complete on this channel
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    gNumDone++;
-
-                    if (FASTLED_RMT_SERIAL_OUTPUT) {
-                        // -- Serial mode: unblock the call to showPixels for this strip
-                        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-                        xSemaphoreGiveFromISR(controller->mTX_sem, &HPTaskAwoken);
-                    } else {
-                        // -- Parallel mode: unblock the global semaphore when all strips are done
-                        if (gNumDone == gNumControllers)
-                            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-                    }
-
-                    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-                }
-
-                if (intr_st & BIT(tx_next_bit)) {
-                    // -- More to send on this channel: call the appropriate refill function
-                    //    Note that we refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-                    (gRefillFunctions[channel])(channel);
-                }
-            }
-        }
-    }
-
-    /* Refill the RMT buffer
-     * We need this dispatch function because there will be one for each instantiation of this template
-     * class -- in particular, one for each possible RGB_ORDER. We need to dispatch to the correct one
-     * so that fillHalfRMTBuffer will use the right ordering for this strip.
-     */
-    static IRAM_ATTR void refillDispatcher(uint8_t channel)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gControllers[channel]);
-        controller->fillHalfRMTBuffer();
-    }
-
-    IRAM_ATTR void fillHalfRMTBuffer()
-    {
-        // -- Fill half of the RMT pulse buffer
-
-        //    The buffer holds 64 total pulse items, so this loop converts
-        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
-        //    32 items). In our case, each pixel consists of three bytes,
-        //    each bit turns into one pulse item -- 24 items per pixel. So,
-        //    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-        //    The member variable mCurPulse keeps track of which of the 64
-        //    items we are writing. During the first call to this method it
-        //    fills 0-31; in the second call it fills 32-63, and then wraps
-        //    back around to zero.
-
-        //    When we run out of pixel data, just fill the remaining items
-        //    with zero pulses.
-
-        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
-        uint32_t byteval = 0;
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-        bool done_strip = false;
-
-        while (pulse_count < MAX_PULSES) {
-            if (! mPixels->has(1)) {
-                done_strip = true;
-                break;
-            }
-
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-        
-        // -- At the end, stretch out the last pulse to signal to the strip
-        //    that we're done
-        if (done_strip) {
-	    // AAAAAGGGG Integer underflow!!!
-            // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-
-            // -- And fill the remaining items with zero pulses. The zero values triggers
-            //    the tx_done interrupt.
-            while (pulse_count < MAX_PULSES) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-    
-    void computeAllRMTItems(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-
-        // TODO: need a specific number here
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        mCurPulse = 0;
-        mRGB_channel = 0;
-        uint32_t byteval = 0;
-        while (pixels.has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = pixels.loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = pixels.loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = pixels.loadAndScale2();
-                pixels.advanceData();
-                pixels.stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-    }
-};
-
-FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h~ b/platforms/esp/32/clockless_esp32.h~
deleted file mode 100644
index 2c1bbf5bc0..0000000000
--- a/platforms/esp/32/clockless_esp32.h~
+++ /dev/null
@@ -1,539 +0,0 @@
-/*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Modifications Copyright (c) 2018 Samuel Z. Guyer
- *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently with sequences of
- * high/low bits. Memory for each channel is limited, however, so in
- * order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * 
- *
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT
- * interrupts, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
- *
- * There may be a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- */
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp32-hal.h"
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "driver/rmt.h"
-#include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
-#include "esp_log.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_BUILTIN_DRIVER
-#define FASTLED_RMT_BUILTIN_DRIVER false
-#endif
-
-// -- Array of all controllers
-static CLEDController * gControllers[32];
-
-// -- Number of RMT channels to use (up to 8)
-//    Redefine this value to 1 to force serial output
-#ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
-#endif
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-static bool gInitialized = false;
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t mRMT_channel;
-
-    // -- Store the GPIO pin
-    gpio_num_t mPin;
-
-    // -- Timing values for zero and one bits
-    rmt_item32_t mZero;
-    rmt_item32_t mOne;
-
-    // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels = NULL;
-    void * mPixelSpace = NULL;
-    uint8_t mRGB_channel;
-    uint16_t mCurPulse;
-    CMinWait<WAIT_TIME> mWait;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t mBufferSize;
-
-public:
-
-    virtual void init()
-    {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-	gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-	mPin = gpio_num_t(DATA_PIN);
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    void initRMT()
-    {
-	for (int i = 0; i < 8; i++) {
-	    gOnChannel[i] = NULL;
-
-	    // -- RMT configuration for transmission
-	    rmt_config_t rmt_tx;
-	    rmt_tx.channel = rmt_channel_t(rmt_channel_t(i));
-	    rmt_tx.rmt_mode = RMT_MODE_TX;
-	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-	    rmt_tx.mem_block_num = 1;
-	    rmt_tx.clk_div = DIVIDER;
-	    rmt_tx.tx_config.loop_en = false;
-	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-	    rmt_tx.tx_config.carrier_en = false;
-	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-	    rmt_tx.tx_config.idle_output_en = true;
-		
-	    // -- Apply the configuration
-	    rmt_config(&rmt_tx);
-
-	    // -- Set up the RMT to send 1/2 of the pulse buffer and then
-	    //    generate an interrupt. When we get this interrupt we
-	    //    fill the other half in preparation (kind of like double-buffering)
-	    rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-	}
-
-	// -- Create a semaphore to block execution until all the controllers are done
-	if (gTX_sem == NULL) {
-	    gTX_sem = xSemaphoreCreateBinary();
-	    xSemaphoreGive(gTX_sem);
-	}
-		
-	// -- Allocate the interrupt if we have not done so yet. This
-	//    interrupt handler must work for all different kinds of
-	//    strips, so it delegates to the refill function for each
-	//    specific instantiation of ClocklessController.
-	if (gRMT_intr_handle == NULL)
-	    esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-
-	gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-	if (gNumStarted == 0) {
-	    // -- First controller: make sure everything is set up
-	    if (! gInitialized) initRMT();
-
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	}
-
-	// -- Initialize the local state, save a pointer to the pixel
-	//    data. We need to make a copy because pixels is a local
-	//    variable in the calling function, and this data structure
-	//    needs to outlive this call to showPixels.
-
-	if (mPixels != NULL) 
-	    delete mPixels;
-
-	mPixels = new PixelController<RGB_ORDER>(pixels);
-	
-	// -- Keep track of the number of strips we've seen
-	gNumStarted++;
-
-	// -- The last call to showPixels is the one responsible for doing
-	//    all of the actual worl
-	if (gNumStarted == gNumControllers) {
-	    gNext = 0;
-
-	    // -- First, fill all the available channels
-	    int channel = 0;
-	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-		startNext(channel);
-		channel++;
-	    }
-
-	    // -- Wait here while the rest of the data is sent. The interrupt handler
-	    //    will keep refilling the RMT buffers until it is all sent; then it
-	    //    gives the semaphore back.
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	    xSemaphoreGive(gTX_sem);
-
-	    // -- Reset the counters
-	    gNumStarted = 0;
-	    gNumDone = 0;
-	    gNext = 0;
-	}
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-	if (gNext < gNumControllers) {
-	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-	    pController->startOnChannel(channel);
-	    gNext++;
-	}
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-	// -- Assign this channel and configure the RMT
-	mRMT_channel = rmt_channel_t(channel);
-
-	// -- Store a reference to this controller, so we can get it
-	//    inside the interrupt handler
-	gOnChannel[channel] = this;
-
-	// -- Assign the pin to this channel
-	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-	if (FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Use the built-in RMT driver to send all the data in one shot
-	    rmt_register_tx_end_callback(doneOnChannel, 0);
-	    writeAllRMTItems();
-	} else {
-	    // -- Use our custom driver to send the data incrementally
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	
-	    // -- Initialize the counters that keep track of where we are in
-	    //    the pixel data.
-	    mCurPulse = 0;
-	    mRGB_channel = 0;
-
-	    // -- Fill both halves of the buffer
-	    fillHalfRMTBuffer();
-	    fillHalfRMTBuffer();
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	    
-	    // -- Start the RMT TX operation
-	    rmt_tx_start(mRMT_channel, true);
-	}
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-	// -- Turn off output on the pin
-	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-	gOnChannel[channel] = NULL;
-	gNumDone++;
-
-	if (gNumDone == gNumControllers) {
-	    // -- If this is the last controller, signal that we are all done
-	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-	} else {
-	    // -- Otherwise, if there are still controllers waiting, then
-	    //    start the next one on this channel
-	    if (gNext < gNumControllers)
-		startNext(channel);
-	}
-    }
-    
-    static IRAM_ATTR void interruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < 8; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gOnChannel[channel] != NULL) {
-
-		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-		// -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-		    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-		    controller->fillHalfRMTBuffer();
-                }
-
-		// -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-		    doneOnChannel(rmt_channel_t(channel), 0);
-                }
-            }
-        }
-    }
-
-    virtual void fillHalfRMTBuffer()
-    {
-        // -- Fill half of the RMT pulse buffer
-
-        //    The buffer holds 64 total pulse items, so this loop converts
-        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
-        //    32 items). In our case, each pixel consists of three bytes,
-        //    each bit turns into one pulse item -- 24 items per pixel. So,
-        //    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-        //    The member variable mCurPulse keeps track of which of the 64
-        //    items we are writing. During the first call to this method it
-        //    fills 0-31; in the second call it fills 32-63, and then wraps
-        //    back around to zero.
-
-        //    When we run out of pixel data, just fill the remaining items
-        //    with zero pulses.
-
-        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
-        uint32_t byteval = 0;
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-        bool done_strip = false;
-
-        while (pulse_count < MAX_PULSES) {
-            if (! mPixels->has(1)) {
-                done_strip = true;
-                break;
-            }
-
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulse_count++;
-            }
-
-	    if (done_strip)
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        }
-        
-        if (done_strip) {
-            // -- And fill the remaining items with zero pulses. The zero values triggers
-            //    the tx_done interrupt.
-            while (pulse_count < MAX_PULSES) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-
-    virtual void writeAllRMTItems()
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-	mBufferSize = mPixels->size() * 3 * 8;
-
-        // TODO: need a specific number here
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        mCurPulse = 0;
-        mRGB_channel = 0;
-        uint32_t byteval = 0;
-        while (mPixels->has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-
-	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-    }
-};
-
-FASTLED_NAMESPACE_END

From ccef6df9a8276f9ac713f6e20e4acece962444b5 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 4 May 2018 22:30:14 -0400
Subject: [PATCH 028/204] Fixed built-in driver mode

Fixed the code so that it works with the built-in RMT driver. There's nothing special to do to enable it -- just #define FASTLED_RMT_BUILTIN_DRIVER true
---
 platforms/esp/32/clockless_esp32.h | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 2163a2bd87..87ff8e5ff5 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -222,12 +222,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     void initRMT()
     {
-	for (int i = 0; i < 8; i++) {
+	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
 	    gOnChannel[i] = NULL;
 
 	    // -- RMT configuration for transmission
 	    rmt_config_t rmt_tx;
-	    rmt_tx.channel = rmt_channel_t(rmt_channel_t(i));
+	    rmt_tx.channel = rmt_channel_t(i);
 	    rmt_tx.rmt_mode = RMT_MODE_TX;
 	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
 	    rmt_tx.mem_block_num = 1;
@@ -241,10 +241,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	    // -- Apply the configuration
 	    rmt_config(&rmt_tx);
 
-	    // -- Set up the RMT to send 1/2 of the pulse buffer and then
-	    //    generate an interrupt. When we get this interrupt we
-	    //    fill the other half in preparation (kind of like double-buffering)
-	    rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+	    if (FASTLED_RMT_BUILTIN_DRIVER) {
+		rmt_driver_install(rmt_channel_t(i), 0, 0);
+	    } else {
+		// -- Set up the RMT to send 1/2 of the pulse buffer and then
+		//    generate an interrupt. When we get this interrupt we
+		//    fill the other half in preparation (kind of like double-buffering)
+		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+	    }
 	}
 
 	// -- Create a semaphore to block execution until all the controllers are done
@@ -253,12 +257,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	    xSemaphoreGive(gTX_sem);
 	}
 		
-	// -- Allocate the interrupt if we have not done so yet. This
-	//    interrupt handler must work for all different kinds of
-	//    strips, so it delegates to the refill function for each
-	//    specific instantiation of ClocklessController.
-	if (gRMT_intr_handle == NULL)
-	    esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Allocate the interrupt if we have not done so yet. This
+	    //    interrupt handler must work for all different kinds of
+	    //    strips, so it delegates to the refill function for each
+	    //    specific instantiation of ClocklessController.
+	    if (gRMT_intr_handle == NULL)
+		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+	}
 
 	gInitialized = true;
     }

From a631e24ca1f73246b59868da9ad8db0ef72f87aa Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 7 May 2018 12:45:59 -0400
Subject: [PATCH 029/204] Cleanup

Fixing some documentation and configuration stuff
---
 platforms/esp/32/clockless_esp32.h | 58 ++++++++++++++++--------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 87ff8e5ff5..6ab277006d 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -16,9 +16,9 @@
  *
  * The implementation strategy is borrowed from previous work and from
  * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently with sequences of
- * high/low bits. Memory for each channel is limited, however, so in
- * order to send a long sequence of bits, we need to continuously
+ * channels, which can be programmed independently to send sequences
+ * of high/low bits. Memory for each channel is limited, however, so
+ * in order to send a long sequence of bits, we need to continuously
  * refill the buffer until all the data is sent. To do this, we fill
  * half the buffer and then set an interrupt to go off when that half
  * is sent. Then we refill that half while the second half is being
@@ -30,9 +30,9 @@
  * to channels on the fly, queuing up controllers as necessary until a
  * channel is free. The main showPixels routine just fires off the
  * first 8 controllers; the interrupt handler starts new controllers
- * asynchronously as previous ones finish. So, for example, it should
- * be able to send the data for 8 controllers at once, but 16
- * controllers would take approximately twice as much time.
+ * asynchronously as previous ones finish. So, for example, it can
+ * send the data for 8 controllers simultaneously, but 16 controllers
+ * would take approximately twice as much time.
  *
  * There is a #define that allows a program to control the total
  * number of channels that the driver is allowed to use. It defaults
@@ -43,8 +43,8 @@
  *
  * OTHER RMT APPLICATIONS
  *
- * The default FastLED driver takes over control of the RMT
- * interrupts, making it hard to use the RMT device for other
+ * The default FastLED driver takes over control of the RMT interrupt
+ * handler, making it hard to use the RMT device for other
  * (non-FastLED) purposes. You can change it's behavior to use the ESP
  * core driver instead, allowing other RMT applications to
  * co-exist. To switch to this mode, add the following directive
@@ -139,8 +139,10 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define FASTLED_RMT_BUILTIN_DRIVER false
 #endif
 
-// -- Array of all controllers
-static CLEDController * gControllers[32];
+// -- Max number of controllers we can support
+#ifndef FASTLED_RMT_MAX_CONTROLLERS
+#define FASTLED_RMT_MAX_CONTROLLERS 32
+#endif
 
 // -- Number of RMT channels to use (up to 8)
 //    Redefine this value to 1 to force serial output
@@ -148,6 +150,9 @@ static CLEDController * gControllers[32];
 #define FASTLED_RMT_MAX_CHANNELS 8
 #endif
 
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+
 // -- Current set of active controllers, indexed by the RMT
 //    channel assigned to them.
 static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
@@ -157,7 +162,7 @@ static int gNumStarted = 0;
 static int gNumDone = 0;
 static int gNext = 0;
 
-static intr_handle_t gRMT_intr_handle;
+static intr_handle_t gRMT_intr_handle = NULL;
 
 // -- Global semaphore for the whole show process
 //    Semaphore is not given until all data has been sent
@@ -169,26 +174,25 @@ template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
 {
     // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t mRMT_channel;
+    rmt_channel_t  mRMT_channel;
 
     // -- Store the GPIO pin
-    gpio_num_t mPin;
+    gpio_num_t     mPin;
 
-    // -- Timing values for zero and one bits
-    rmt_item32_t mZero;
-    rmt_item32_t mOne;
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
 
     // -- State information for keeping track of where we are in the pixel data
     PixelController<RGB_ORDER> * mPixels = NULL;
-    void * mPixelSpace = NULL;
-    uint8_t mRGB_channel;
-    uint16_t mCurPulse;
-    CMinWait<WAIT_TIME> mWait;
+    void *         mPixelSpace = NULL;
+    uint8_t        mRGB_channel;
+    uint16_t       mCurPulse;
 
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
     rmt_item32_t * mBuffer;
-    uint16_t mBufferSize;
+    uint16_t       mBufferSize;
 
 public:
 
@@ -222,6 +226,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     void initRMT()
     {
+	// -- Only need to do this once
+	if (gInitialized) return;
+
 	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
 	    gOnChannel[i] = NULL;
 
@@ -273,8 +280,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
 	if (gNumStarted == 0) {
 	    // -- First controller: make sure everything is set up
-	    if (! gInitialized) initRMT();
-
+	    initRMT();
 	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
 	}
 
@@ -283,9 +289,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 	//    variable in the calling function, and this data structure
 	//    needs to outlive this call to showPixels.
 
-	if (mPixels != NULL) 
-	    delete mPixels;
-
+	if (mPixels != NULL) delete mPixels;
 	mPixels = new PixelController<RGB_ORDER>(pixels);
 	
 	// -- Keep track of the number of strips we've seen
@@ -397,7 +401,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         uint32_t intr_st = RMT.int_st.val;
         uint8_t channel;
 
-        for (channel = 0; channel < 8; channel++) {
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
             int tx_done_bit = channel * 3;
             int tx_next_bit = channel + 24;
 

From b2b4443038ffa030fa727d3f2e661956be9fca70 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <danielgarcia@gmail.com>
Date: Fri, 22 Jun 2018 17:26:48 -0700
Subject: [PATCH 030/204] Update clockless_block_esp8266.h

---
 platforms/esp/8266/clockless_block_esp8266.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/8266/clockless_block_esp8266.h b/platforms/esp/8266/clockless_block_esp8266.h
index 8ea18bb0a6..40c91612f7 100644
--- a/platforms/esp/8266/clockless_block_esp8266.h
+++ b/platforms/esp/8266/clockless_block_esp8266.h
@@ -17,7 +17,7 @@ extern uint32_t _frame_cnt;
 extern uint32_t _retry_cnt;
 #endif
 
-template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 20>
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, PORT_MASK> {
 	typedef typename FastPin<FIRST_PIN>::port_ptr_t data_ptr_t;
 	typedef typename FastPin<FIRST_PIN>::port_t data_t;

From ad04baa3abfc3cde54afdac160076af1878845e6 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <danielgarcia@gmail.com>
Date: Fri, 22 Jun 2018 17:27:05 -0700
Subject: [PATCH 031/204] Update clockless_esp8266.h

---
 platforms/esp/8266/clockless_esp8266.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/8266/clockless_esp8266.h b/platforms/esp/8266/clockless_esp8266.h
index 80fe60ec32..f799635ae3 100644
--- a/platforms/esp/8266/clockless_esp8266.h
+++ b/platforms/esp/8266/clockless_esp8266.h
@@ -16,7 +16,7 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 
 #define FASTLED_HAS_CLOCKLESS 1
 
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 20>
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
 	typedef typename FastPin<DATA_PIN>::port_t data_t;

From 0839d30d39cc4ff8be3ca457c11d969278fb5187 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 8 Jul 2018 19:54:27 -0400
Subject: [PATCH 032/204] Rewrite of fastpin

I've been needing to rewrite fastpin_esp32.h for the ESP32 ports and masks. This file also makes sure we don't use pins that won't work, even with clockless chips like the WS2812.
---
 platforms/esp/32/clockless_esp32.h |  11 +++-
 platforms/esp/32/fastpin_esp32.h   | 101 ++++++++++++++++-------------
 2 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 6ab277006d..95ca9c0ca6 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -179,6 +179,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- Store the GPIO pin
     gpio_num_t     mPin;
 
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+
     // -- Timing values for zero and one bits, derived from T1, T2, and T3
     rmt_item32_t   mZero;
     rmt_item32_t   mOne;
@@ -453,6 +456,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         while (pulse_count < MAX_PULSES) {
             if (! mPixels->has(1)) {
+		if (mCurPulse > 0) {
+		    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
+                    //    sure if this is really necessary.
+		    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+		}
                 done_strip = true;
                 break;
             }
@@ -487,9 +495,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 mCurPulse++;
                 pulse_count++;
             }
-
-	    if (done_strip)
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
         }
         
         if (done_strip) {
diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
index 188cb800af..e6b120b9ad 100644
--- a/platforms/esp/32/fastpin_esp32.h
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -2,19 +2,6 @@
 
 FASTLED_NAMESPACE_BEGIN
 
-struct FASTLED_ESP_IO {
-  volatile uint32_t _GPO;
-  volatile uint32_t _GPOS;
-  volatile uint32_t _GPOC;
-};
-
-#define _GPB0 (*(FASTLED_ESP_IO*)(GPIO_OUT_REG))
-// #define _GPB0 (*(FASTLED_ESP_IO*)(DR_REG_GPIO_BASE))
-// #define _GPB1 (*(FASTLED_ESP_IO*)(0x3ff44010))
-//THERE'S a second register for pins 32-39 (33 for outputs) but let's get one working first
-#define OUTPUT_PIN_LIMIT 31
-
-
 template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
 
 public:
@@ -24,68 +11,90 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
   inline static void setOutput() { pinMode(PIN, OUTPUT); }
   inline static void setInput() { pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { if(PIN < OUTPUT_PIN_LIMIT) { _GPB0._GPOS = MASK; } }
-  // inline static void hi() __attribute__ ((always_inline)) { gpio_set_level((gpio_num_t)PIN, HIGH); }
+  inline static void hi() __attribute__ ((always_inline)) { 
+      if (PIN < 32) GPIO.out_w1ts = MASK;
+      else if(PIN < 34) GPIO.out1_w1ts.val = MASK;
+  }
+
+  inline static void lo() __attribute__ ((always_inline)) {
+      if (PIN < 32) GPIO.out_w1tc = MASK;
+      else if(PIN < 34) GPIO.out1_w1tc.val = MASK;
+  }
 
-  inline static void lo() __attribute__ ((always_inline)) { if (PIN < OUTPUT_PIN_LIMIT){ _GPB0._GPOC = MASK; } }
-  // inline static void lo() __attribute__ ((always_inline)) { gpio_set_level((gpio_num_t)PIN, LOW); }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { if (PIN < OUTPUT_PIN_LIMIT){ _GPB0._GPO = val; }}
-  // inline static void set(register port_t val) __attribute__ ((always_inline)) { gpio_set_level((gpio_num_t)PIN, val); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) {
+      if (val) hi();
+      else lo();
+  }
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if (PIN < OUTPUT_PIN_LIMIT){ _GPB0._GPO = MASK; } }
+  inline static void toggle() __attribute__ ((always_inline)) {  }
 
   inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
   inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
   inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) { return GPIO_OUT_REG | MASK;    }}
-  inline static port_t loval() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) { return GPIO_OUT_REG & ~MASK;   }}
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { if(PIN<OUTPUT_PIN_LIMIT) { return &_GPB0._GPO;   }}
-  inline static port_ptr_t sport() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) {return &_GPB0._GPOS; }}
-  inline static port_ptr_t cport() __attribute__ ((always_inline)) { if (PIN<OUTPUT_PIN_LIMIT) {return &_GPB0._GPOC; }}
+  inline static port_t hival() __attribute__ ((always_inline)) { return MASK; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return MASK; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return 0; }
+
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) { 
+      if (PIN < 32) return &GPIO.out_w1ts;
+      else if(PIN < 34) return &GPIO.out1_w1ts.val;
+      else return 0;
+  }
+
+  inline static port_ptr_t cport() __attribute__ ((always_inline)) {
+      if (PIN < 32) return &GPIO.out_w1tc;
+      else if(PIN < 34) return &GPIO.out1_w1tc.val;
+      else return 0;
+  }
+
   inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
 
   inline static bool isset() __attribute__ ((always_inline)) { return (0x004 & MASK); }
 };
 
-#define _DEFPIN_ESP32(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
-
+#define _DEFPIN_ESP32(PIN)  template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << PIN)> {};
+#define _DEFPIN_32_33_ESP32(PIN) template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << (PIN-32))> {};
 
-#ifdef FASTLED_ESP32_RAW_PIN_ORDER
+_DEFPIN_ESP32(0);
+// _DEFPIN_ESP32(1); Using TX causes flashiness when uploading
+_DEFPIN_ESP32(2); 
+// _DEFPIN_ESP32(3); Using RX causes flashiness when uploading
+_DEFPIN_ESP32(4);
+_DEFPIN_ESP32(5);
 
-_DEFPIN_ESP32(0,0); _DEFPIN_ESP32(1,1); _DEFPIN_ESP32(2,2); 
-_DEFPIN_ESP32(3,3); _DEFPIN_ESP32(4,4); _DEFPIN_ESP32(5,5); 
-
-// -- These are not safe to use:
+// -- These pins are not safe to use:
 // _DEFPIN_ESP32(6,6); _DEFPIN_ESP32(7,7); _DEFPIN_ESP32(8,8); 
 // _DEFPIN_ESP32(9,9); _DEFPIN_ESP32(10,10); _DEFPIN_ESP32(11,11); 
 
-_DEFPIN_ESP32(12,12); _DEFPIN_ESP32(13,13);
-_DEFPIN_ESP32(14,14); _DEFPIN_ESP32(15,15); _DEFPIN_ESP32(16,16);
-_DEFPIN_ESP32(17,17); _DEFPIN_ESP32(18,18); _DEFPIN_ESP32(19,19);
+_DEFPIN_ESP32(12);
+_DEFPIN_ESP32(13);
+_DEFPIN_ESP32(14);
+_DEFPIN_ESP32(15);
+_DEFPIN_ESP32(16);
+_DEFPIN_ESP32(17);
+_DEFPIN_ESP32(18);
+_DEFPIN_ESP32(19);
 
 // No pin 20 : _DEFPIN_ESP32(20,20); 
 
-_DEFPIN_ESP32(21,21); _DEFPIN_ESP32(22,22); _DEFPIN_ESP32(23,23); 
+_DEFPIN_ESP32(21); // Works, but note that GPIO21 is I2C SDA
+_DEFPIN_ESP32(22); // Works, but note that GPIO22 is I2C SCL
+_DEFPIN_ESP32(23); 
 
 // No pin 24 : _DEFPIN_ESP32(24,24); 
 
-_DEFPIN_ESP32(25,25); _DEFPIN_ESP32(26,26); _DEFPIN_ESP32(27,27); 
+_DEFPIN_ESP32(25);
+_DEFPIN_ESP32(26);
+_DEFPIN_ESP32(27); 
 
 // No pin 28-31: _DEFPIN_ESP32(28,28); _DEFPIN_ESP32(29,29); _DEFPIN_ESP32(30,30); _DEFPIN_ESP32(31,31);
 
 // Need special handling for pins > 31
-// _DEFPIN_ESP32(32,32); _DEFPIN_ESP32(33,33);
-
-#define PORTA_FIRST_PIN 32
-// The rest of the pins - these are generally not available
-// _DEFPIN_ESP32(11,6);
-// _DEFPIN_ESP32(12,7); _DEFPIN_ESP32(13,8); _DEFPIN_ESP32(14,9); _DEFPIN_ESP32(15,10);
-// _DEFPIN_ESP32(16,11);
-
-#endif
+_DEFPIN_32_33_ESP32(32); 
+_DEFPIN_32_33_ESP32(33);
 
 #define HAS_HARDWARE_PIN_SUPPORT
 

From 4a3b3d4f259899e035e92021593a80f0982aa066 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 8 Jul 2018 19:57:35 -0400
Subject: [PATCH 033/204] Got rid of tabs

Which were making the code ugly.
---
 platforms/esp/32/clockless_esp32.h | 318 ++++++++++++++---------------
 1 file changed, 159 insertions(+), 159 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 95ca9c0ca6..126135cd14 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -217,10 +217,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mZero.level1 = 0;
         mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
 
-	gControllers[gNumControllers] = this;
+        gControllers[gNumControllers] = this;
         gNumControllers++;
 
-	mPin = gpio_num_t(DATA_PIN);
+        mPin = gpio_num_t(DATA_PIN);
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
@@ -229,98 +229,98 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     void initRMT()
     {
-	// -- Only need to do this once
-	if (gInitialized) return;
-
-	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-	    gOnChannel[i] = NULL;
-
-	    // -- RMT configuration for transmission
-	    rmt_config_t rmt_tx;
-	    rmt_tx.channel = rmt_channel_t(i);
-	    rmt_tx.rmt_mode = RMT_MODE_TX;
-	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-	    rmt_tx.mem_block_num = 1;
-	    rmt_tx.clk_div = DIVIDER;
-	    rmt_tx.tx_config.loop_en = false;
-	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-	    rmt_tx.tx_config.carrier_en = false;
-	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-	    rmt_tx.tx_config.idle_output_en = true;
-		
-	    // -- Apply the configuration
-	    rmt_config(&rmt_tx);
-
-	    if (FASTLED_RMT_BUILTIN_DRIVER) {
-		rmt_driver_install(rmt_channel_t(i), 0, 0);
-	    } else {
-		// -- Set up the RMT to send 1/2 of the pulse buffer and then
-		//    generate an interrupt. When we get this interrupt we
-		//    fill the other half in preparation (kind of like double-buffering)
-		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-	    }
-	}
-
-	// -- Create a semaphore to block execution until all the controllers are done
-	if (gTX_sem == NULL) {
-	    gTX_sem = xSemaphoreCreateBinary();
-	    xSemaphoreGive(gTX_sem);
-	}
-		
-	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Allocate the interrupt if we have not done so yet. This
-	    //    interrupt handler must work for all different kinds of
-	    //    strips, so it delegates to the refill function for each
-	    //    specific instantiation of ClocklessController.
-	    if (gRMT_intr_handle == NULL)
-		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-	}
-
-	gInitialized = true;
+        // -- Only need to do this once
+        if (gInitialized) return;
+
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+            gOnChannel[i] = NULL;
+
+            // -- RMT configuration for transmission
+            rmt_config_t rmt_tx;
+            rmt_tx.channel = rmt_channel_t(i);
+            rmt_tx.rmt_mode = RMT_MODE_TX;
+            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.mem_block_num = 1;
+            rmt_tx.clk_div = DIVIDER;
+            rmt_tx.tx_config.loop_en = false;
+            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+            rmt_tx.tx_config.carrier_en = false;
+            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+            rmt_tx.tx_config.idle_output_en = true;
+                
+            // -- Apply the configuration
+            rmt_config(&rmt_tx);
+
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                rmt_driver_install(rmt_channel_t(i), 0, 0);
+            } else {
+                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                //    generate an interrupt. When we get this interrupt we
+                //    fill the other half in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+            }
+        }
+
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+                
+        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+        }
+
+        gInitialized = true;
     }
 
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
-	if (gNumStarted == 0) {
-	    // -- First controller: make sure everything is set up
-	    initRMT();
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	}
-
-	// -- Initialize the local state, save a pointer to the pixel
-	//    data. We need to make a copy because pixels is a local
-	//    variable in the calling function, and this data structure
-	//    needs to outlive this call to showPixels.
-
-	if (mPixels != NULL) delete mPixels;
-	mPixels = new PixelController<RGB_ORDER>(pixels);
-	
-	// -- Keep track of the number of strips we've seen
-	gNumStarted++;
-
-	// -- The last call to showPixels is the one responsible for doing
-	//    all of the actual worl
-	if (gNumStarted == gNumControllers) {
-	    gNext = 0;
-
-	    // -- First, fill all the available channels
-	    int channel = 0;
-	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-		startNext(channel);
-		channel++;
-	    }
-
-	    // -- Wait here while the rest of the data is sent. The interrupt handler
-	    //    will keep refilling the RMT buffers until it is all sent; then it
-	    //    gives the semaphore back.
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	    xSemaphoreGive(gTX_sem);
-
-	    // -- Reset the counters
-	    gNumStarted = 0;
-	    gNumDone = 0;
-	    gNext = 0;
-	}
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            initRMT();
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+
+        if (mPixels != NULL) delete mPixels;
+        mPixels = new PixelController<RGB_ORDER>(pixels);
+        
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual worl
+        if (gNumStarted == gNumControllers) {
+            gNext = 0;
+
+            // -- First, fill all the available channels
+            int channel = 0;
+            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                startNext(channel);
+                channel++;
+            }
+
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the RMT buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+
+            // -- Reset the counters
+            gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+        }
     }
 
     // -- Start up the next controller
@@ -328,73 +328,73 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    startOnChannel method of the given controller.
     static void startNext(int channel)
     {
-	if (gNext < gNumControllers) {
-	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-	    pController->startOnChannel(channel);
-	    gNext++;
-	}
+        if (gNext < gNumControllers) {
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            pController->startOnChannel(channel);
+            gNext++;
+        }
     }
 
     virtual void startOnChannel(int channel)
     {
-	// -- Assign this channel and configure the RMT
-	mRMT_channel = rmt_channel_t(channel);
-
-	// -- Store a reference to this controller, so we can get it
-	//    inside the interrupt handler
-	gOnChannel[channel] = this;
-
-	// -- Assign the pin to this channel
-	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-	if (FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Use the built-in RMT driver to send all the data in one shot
-	    rmt_register_tx_end_callback(doneOnChannel, 0);
-	    writeAllRMTItems();
-	} else {
-	    // -- Use our custom driver to send the data incrementally
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	
-	    // -- Initialize the counters that keep track of where we are in
-	    //    the pixel data.
-	    mCurPulse = 0;
-	    mRGB_channel = 0;
-
-	    // -- Fill both halves of the buffer
-	    fillHalfRMTBuffer();
-	    fillHalfRMTBuffer();
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	    
-	    // -- Start the RMT TX operation
-	    rmt_tx_start(mRMT_channel, true);
-	}
+        // -- Assign this channel and configure the RMT
+        mRMT_channel = rmt_channel_t(channel);
+
+        // -- Store a reference to this controller, so we can get it
+        //    inside the interrupt handler
+        gOnChannel[channel] = this;
+
+        // -- Assign the pin to this channel
+        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Use the built-in RMT driver to send all the data in one shot
+            rmt_register_tx_end_callback(doneOnChannel, 0);
+            writeAllRMTItems();
+        } else {
+            // -- Use our custom driver to send the data incrementally
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+        
+            // -- Initialize the counters that keep track of where we are in
+            //    the pixel data.
+            mCurPulse = 0;
+            mRGB_channel = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+            
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+        }
     }
 
     static void doneOnChannel(rmt_channel_t channel, void * arg)
     {
-	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
         portBASE_TYPE HPTaskAwoken = 0;
 
-	// -- Turn off output on the pin
-	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-	gOnChannel[channel] = NULL;
-	gNumDone++;
-
-	if (gNumDone == gNumControllers) {
-	    // -- If this is the last controller, signal that we are all done
-	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-	} else {
-	    // -- Otherwise, if there are still controllers waiting, then
-	    //    start the next one on this channel
-	    if (gNext < gNumControllers)
-		startNext(channel);
-	}
+        // -- Turn off output on the pin
+        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+        gOnChannel[channel] = NULL;
+        gNumDone++;
+
+        if (gNumDone == gNumControllers) {
+            // -- If this is the last controller, signal that we are all done
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        } else {
+            // -- Otherwise, if there are still controllers waiting, then
+            //    start the next one on this channel
+            if (gNext < gNumControllers)
+                startNext(channel);
+        }
     }
     
     static IRAM_ATTR void interruptHandler(void *arg)
@@ -410,21 +410,21 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
             if (gOnChannel[channel] != NULL) {
 
-		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
 
-		// -- More to send on this channel
+                // -- More to send on this channel
                 if (intr_st & BIT(tx_next_bit)) {
-		    RMT.int_clr.val |= BIT(tx_next_bit);
+                    RMT.int_clr.val |= BIT(tx_next_bit);
 
                     // -- Refill the half of the buffer that we just finished,
                     //    allowing the other half to proceed.
-		    controller->fillHalfRMTBuffer();
+                    controller->fillHalfRMTBuffer();
                 }
 
-		// -- Transmission is complete on this channel
+                // -- Transmission is complete on this channel
                 if (intr_st & BIT(tx_done_bit)) {
                     RMT.int_clr.val |= BIT(tx_done_bit);
-		    doneOnChannel(rmt_channel_t(channel), 0);
+                    doneOnChannel(rmt_channel_t(channel), 0);
                 }
             }
         }
@@ -456,11 +456,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         while (pulse_count < MAX_PULSES) {
             if (! mPixels->has(1)) {
-		if (mCurPulse > 0) {
-		    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
+                if (mCurPulse > 0) {
+                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
                     //    sure if this is really necessary.
-		    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-		}
+                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+                }
                 done_strip = true;
                 break;
             }
@@ -516,7 +516,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         // -- Compute the pulse values for the whole strip at once.
         //    Requires a large buffer
-	mBufferSize = mPixels->size() * 3 * 8;
+        mBufferSize = mPixels->size() * 3 * 8;
 
         // TODO: need a specific number here
         if (mBuffer == NULL) {
@@ -560,7 +560,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
         assert(mCurPulse == mBufferSize);
 
-	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
     }
 };
 

From 48654876c13a6e33b5908630686392cba3d57ec6 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 10 Jul 2018 10:56:10 -0400
Subject: [PATCH 034/204] Minor tweaks

Added proper definitions for port() and toggle() to use the GPIO.out register. Changed the pin number test to avoid unnecessary conditions.
---
 platforms/esp/32/fastpin_esp32.h | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
index e6b120b9ad..595314de73 100644
--- a/platforms/esp/32/fastpin_esp32.h
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -13,12 +13,12 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
 
   inline static void hi() __attribute__ ((always_inline)) { 
       if (PIN < 32) GPIO.out_w1ts = MASK;
-      else if(PIN < 34) GPIO.out1_w1ts.val = MASK;
+      else GPIO.out1_w1ts.val = MASK;
   }
 
   inline static void lo() __attribute__ ((always_inline)) {
       if (PIN < 32) GPIO.out_w1tc = MASK;
-      else if(PIN < 34) GPIO.out1_w1tc.val = MASK;
+      else GPIO.out1_w1tc.val = MASK;
   }
 
   inline static void set(register port_t val) __attribute__ ((always_inline)) {
@@ -28,26 +28,28 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) {  }
+  inline static void toggle() __attribute__ ((always_inline)) { if(PIN < 32) { GPIO.out ^= MASK; } else { GPIO.out1.val ^=MASK; } }
 
   inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
   inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
   inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
   inline static port_t hival() __attribute__ ((always_inline)) { return MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return MASK; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return 0; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return ~MASK; }
+
+  inline static port_ptr_t port() __attribute__ ((always_inline)) {
+      if (PIN < 32) return &GPIO.out;
+      else return &GPIO.out1.val;
+  }
 
   inline static port_ptr_t sport() __attribute__ ((always_inline)) { 
       if (PIN < 32) return &GPIO.out_w1ts;
-      else if(PIN < 34) return &GPIO.out1_w1ts.val;
-      else return 0;
+      else return &GPIO.out1_w1ts.val;
   }
 
   inline static port_ptr_t cport() __attribute__ ((always_inline)) {
       if (PIN < 32) return &GPIO.out_w1tc;
-      else if(PIN < 34) return &GPIO.out1_w1tc.val;
-      else return 0;
+      else return &GPIO.out1_w1tc.val;
   }
 
   inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }

From 08e5f1b0740f51b60f48c98f00d1c2a2aa8a3e72 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 11 Jul 2018 15:41:27 -0400
Subject: [PATCH 035/204] Allow TX and RX pins

---
 platforms/esp/32/fastpin_esp32.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
index 595314de73..688e4e4420 100644
--- a/platforms/esp/32/fastpin_esp32.h
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -61,9 +61,9 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
 #define _DEFPIN_32_33_ESP32(PIN) template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << (PIN-32))> {};
 
 _DEFPIN_ESP32(0);
-// _DEFPIN_ESP32(1); Using TX causes flashiness when uploading
+_DEFPIN_ESP32(1); // WARNING: Using TX causes flashiness when uploading
 _DEFPIN_ESP32(2); 
-// _DEFPIN_ESP32(3); Using RX causes flashiness when uploading
+_DEFPIN_ESP32(3); // WARNING: Using RX causes flashiness when uploading
 _DEFPIN_ESP32(4);
 _DEFPIN_ESP32(5);
 

From 7cb840de8f1819b0ab82ab79f8c83bd82302cb83 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 11 Jul 2018 21:11:21 -0400
Subject: [PATCH 036/204] Fixed pin access methods

This should be the right set of definitions -- consistent with the other platforms.
---
 platforms/esp/32/fastpin_esp32.h | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
index 688e4e4420..fd03d5c813 100644
--- a/platforms/esp/32/fastpin_esp32.h
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -22,20 +22,30 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
   }
 
   inline static void set(register port_t val) __attribute__ ((always_inline)) {
-      if (val) hi();
-      else lo();
+      if (PIN < 32) GPIO.out = val;
+      else GPIO.out1.val = val;
   }
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if(PIN < 32) { GPIO.out ^= MASK; } else { GPIO.out1.val ^=MASK; } }
+  inline static void toggle() __attribute__ ((always_inline)) { 
+      if(PIN < 32) { GPIO.out ^= MASK; } 
+      else { GPIO.out1.val ^=MASK; } 
+  }
 
   inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
   inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
   inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return ~MASK; }
+  inline static port_t hival() __attribute__ ((always_inline)) {
+      if (PIN < 32) return GPIO.out | MASK;
+      else return GPIO.out1.val | MASK;
+  }
+
+  inline static port_t loval() __attribute__ ((always_inline)) {
+      if (PIN < 32) return GPIO.out & ~MASK;
+      else return GPIO.out1.val & ~MASK;
+  }
 
   inline static port_ptr_t port() __attribute__ ((always_inline)) {
       if (PIN < 32) return &GPIO.out;
@@ -54,7 +64,10 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
 
   inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
 
-  inline static bool isset() __attribute__ ((always_inline)) { return (0x004 & MASK); }
+  inline static bool isset() __attribute__ ((always_inline)) {
+      if (PIN < 32) return GPIO.out & MASK;
+      else return GPIO.out1.val & MASK;
+  }
 };
 
 #define _DEFPIN_ESP32(PIN)  template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << PIN)> {};

From c09c4bc796918e2600da9fe3f568e199c4028699 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 18 Jul 2018 22:45:27 -0400
Subject: [PATCH 037/204] Experimental

Do not merge this code
---
 platforms/esp/32/clockless_esp32.h | 71 +++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 6 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 126135cd14..be6eccfe02 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -187,11 +187,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t   mOne;
 
     // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels = NULL;
-    void *         mPixelSpace = NULL;
+    //PixelController<RGB_ORDER> * mPixels = NULL;
     uint8_t        mRGB_channel;
     uint16_t       mCurPulse;
 
+    uint8_t *      mPixelData = NULL;
+    int            mSize = 0;
+    int            mCur;
+
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
     rmt_item32_t * mBuffer;
@@ -279,6 +282,24 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gInitialized = true;
     }
 
+    void copyPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (mPixelData == NULL) {
+            mSize = pixels.size() * 3;
+            mPixelData = (uint8_t *) malloc( mSize);
+        }
+
+        int cur = 0;
+        while (pixels.has(1)) {
+            // -- Cycle through the R,G, and B values in the right order
+            mPixelData[cur++] = pixels.loadAndScale0();
+            mPixelData[cur++] = pixels.loadAndScale1();
+            mPixelData[cur++] = pixels.loadAndScale2();
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+    }
+
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
         if (gNumStarted == 0) {
@@ -292,9 +313,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    variable in the calling function, and this data structure
         //    needs to outlive this call to showPixels.
 
-        if (mPixels != NULL) delete mPixels;
-        mPixels = new PixelController<RGB_ORDER>(pixels);
-        
+        //if (mPixels != NULL) delete mPixels;
+        //mPixels = new PixelController<RGB_ORDER>(pixels);
+        copyPixels(pixels);
+
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
 
@@ -350,7 +372,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             // -- Use the built-in RMT driver to send all the data in one shot
             rmt_register_tx_end_callback(doneOnChannel, 0);
-            writeAllRMTItems();
+            // writeAllRMTItems();
         } else {
             // -- Use our custom driver to send the data incrementally
 
@@ -361,6 +383,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             //    the pixel data.
             mCurPulse = 0;
             mRGB_channel = 0;
+            mCur = 0;
 
             // -- Fill both halves of the buffer
             fillHalfRMTBuffer();
@@ -430,6 +453,41 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    virtual void fillHalfRMTBuffer()
+    {
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+
+        int pulses = 0;
+        uint32_t byteval;
+        while (pulses < 32 && mCur < mSize) {
+            byteval = mPixelData[mCur++];
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+                pulses++;
+            }
+        }
+
+        if (mCur == mSize) {
+            while (pulses < 32) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulses++;
+            }
+        }
+        
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+
+    /*
     virtual void fillHalfRMTBuffer()
     {
         // -- Fill half of the RMT pulse buffer
@@ -562,6 +620,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
     }
+    */
 };
 
 FASTLED_NAMESPACE_END

From 2c7d6a5fb96c1acc6ac991df6768373e957c5895 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 22 Jul 2018 22:06:22 -0400
Subject: [PATCH 038/204] Change pixel buffering

The previous version of this code saved a copy of the PixelController every time show() is called. It appears that this causes massive memory fragmentation, eventually locking up the processor. This new version saves the pixel data is a separate buffer that is allocated only one time.
---
 platforms/esp/32/clockless_esp32.h | 251 ++++++++++-------------------
 1 file changed, 83 insertions(+), 168 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index be6eccfe02..4d97cb9717 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -1,9 +1,8 @@
 /*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Modifications Copyright (c) 2018 Samuel Z. Guyer
+ * Integration into FastLED ClocklessController
+ * Copyright (c) 2018 Samuel Z. Guyer
+ * Copyright (c) 2017 Thomas Basler
+ * Copyright (c) 2017 Martin F. Falatic
  *
  * ESP32 support is provided using the RMT peripheral device -- a unit
  * on the chip designed specifically for generating (and receiving)
@@ -187,13 +186,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t   mOne;
 
     // -- State information for keeping track of where we are in the pixel data
-    //PixelController<RGB_ORDER> * mPixels = NULL;
-    uint8_t        mRGB_channel;
-    uint16_t       mCurPulse;
-
     uint8_t *      mPixelData = NULL;
     int            mSize = 0;
-    int            mCur;
+    int            mCurByte;
+    uint16_t       mCurPulse;
 
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
@@ -282,16 +278,21 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gInitialized = true;
     }
 
-    void copyPixels(PixelController<RGB_ORDER> & pixels)
+    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
     {
-        if (mPixelData == NULL) {
-            mSize = pixels.size() * 3;
+	// -- Make sure we have a buffer of the right size
+	//    (3 bytes per pixel)
+	int size_needed = pixels.size() * 3;
+	if (size_needed > mSize) {
+	    if (mPixelData != NULL) free(mPixelData);
+	    mSize = size_needed;
             mPixelData = (uint8_t *) malloc( mSize);
         }
 
+	// -- Cycle through the R,G, and B values in the right order,
+	//    storing the resulting raw pixel data in the buffer.
         int cur = 0;
         while (pixels.has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
             mPixelData[cur++] = pixels.loadAndScale0();
             mPixelData[cur++] = pixels.loadAndScale1();
             mPixelData[cur++] = pixels.loadAndScale2();
@@ -300,6 +301,47 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    void convertByte(uint32_t byteval)
+    {
+	// -- Write one byte's worth of RMT pulses to the big buffer
+	byteval <<= 24;
+	for (register uint32_t j = 0; j < 8; j++) {
+	    mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+	    byteval <<= 1;
+	    mCurPulse++;
+	}
+    }
+
+    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+        mBufferSize = pixels.size() * 3 * 8;
+
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+	// -- Cycle through the R,G, and B values in the right order,
+	//    storing the pulses in the big buffer
+	mCurPulse = 0;
+        int cur = 0;
+	uint32_t byteval;
+        while (pixels.has(1)) {
+            byteval = pixels.loadAndScale0();
+	    convertByte(byteval);
+            byteval = pixels.loadAndScale1();
+	    convertByte(byteval);
+            byteval = pixels.loadAndScale2();
+	    convertByte(byteval);
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
         if (gNumStarted == 0) {
@@ -315,7 +357,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         //if (mPixels != NULL) delete mPixels;
         //mPixels = new PixelController<RGB_ORDER>(pixels);
-        copyPixels(pixels);
+        if (FASTLED_RMT_BUILTIN_DRIVER)
+	    convertAllPixelData(pixels);
+	else
+	    copyPixelData(pixels);
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
@@ -357,7 +402,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-    virtual void startOnChannel(int channel)
+    void startOnChannel(int channel)
     {
         // -- Assign this channel and configure the RMT
         mRMT_channel = rmt_channel_t(channel);
@@ -372,7 +417,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             // -- Use the built-in RMT driver to send all the data in one shot
             rmt_register_tx_end_callback(doneOnChannel, 0);
-            // writeAllRMTItems();
+	    rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+            //writeAllRMTItems();
         } else {
             // -- Use our custom driver to send the data incrementally
 
@@ -382,8 +428,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
             mCurPulse = 0;
-            mRGB_channel = 0;
-            mCur = 0;
+            mCurByte = 0;
 
             // -- Fill both halves of the buffer
             fillHalfRMTBuffer();
@@ -453,28 +498,33 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-    virtual void fillHalfRMTBuffer()
+    void fillHalfRMTBuffer()
     {
         uint32_t one_val = mOne.val;
         uint32_t zero_val = mZero.val;
 
+	// -- Convert (up to) 32 bits of the raw pixel data into
+	//    into RMT pulses that encode the zeros and ones.
         int pulses = 0;
         uint32_t byteval;
-        while (pulses < 32 && mCur < mSize) {
-            byteval = mPixelData[mCur++];
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulses++;
-            }
+        while (pulses < 32 && mCurByte < mSize) {
+	    // -- Get one byte
+            byteval = mPixelData[mCurByte++];
+	    byteval <<= 24;
+	    // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+	    // rmt_item32_t value corresponding to the buffered bit value
+	    for (register uint32_t j = 0; j < 8; j++) {
+		uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+		byteval <<= 1;
+		mCurPulse++;
+	    }
+	    pulses += 8;
         }
 
-        if (mCur == mSize) {
+	// -- When we reach the end of the pixel data, fill the rest of the
+	//    RMT buffer with 0's, which signals to the device that we're done.
+        if (mCurByte == mSize) {
             while (pulses < 32) {
                 RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
                 mCurPulse++;
@@ -486,141 +536,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (mCurPulse >= MAX_PULSES*2)
             mCurPulse = 0;
     }
-
-    /*
-    virtual void fillHalfRMTBuffer()
-    {
-        // -- Fill half of the RMT pulse buffer
-
-        //    The buffer holds 64 total pulse items, so this loop converts
-        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
-        //    32 items). In our case, each pixel consists of three bytes,
-        //    each bit turns into one pulse item -- 24 items per pixel. So,
-        //    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-        //    The member variable mCurPulse keeps track of which of the 64
-        //    items we are writing. During the first call to this method it
-        //    fills 0-31; in the second call it fills 32-63, and then wraps
-        //    back around to zero.
-
-        //    When we run out of pixel data, just fill the remaining items
-        //    with zero pulses.
-
-        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
-        uint32_t byteval = 0;
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-        bool done_strip = false;
-
-        while (pulse_count < MAX_PULSES) {
-            if (! mPixels->has(1)) {
-                if (mCurPulse > 0) {
-                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
-                    //    sure if this is really necessary.
-                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-                }
-                done_strip = true;
-                break;
-            }
-
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-        
-        if (done_strip) {
-            // -- And fill the remaining items with zero pulses. The zero values triggers
-            //    the tx_done interrupt.
-            while (pulse_count < MAX_PULSES) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-
-    virtual void writeAllRMTItems()
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-        mBufferSize = mPixels->size() * 3 * 8;
-
-        // TODO: need a specific number here
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        mCurPulse = 0;
-        mRGB_channel = 0;
-        uint32_t byteval = 0;
-        while (mPixels->has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-
-        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-    }
-    */
 };
 
 FASTLED_NAMESPACE_END

From 801ed803e5b513546f059013b12710d0be9bc4c7 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 30 Jul 2018 10:17:25 -0400
Subject: [PATCH 039/204] Some rearranging of the code

Nothing major here. Added comments and put the functions is a better order. Added some defensive programming.
---
 platforms/esp/32/clockless_esp32.h | 218 ++++++++++++++++-------------
 1 file changed, 122 insertions(+), 96 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 4d97cb9717..248325ad3b 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -198,7 +198,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
 public:
 
-    virtual void init()
+    void init()
     {
         // -- Precompute rmt items corresponding to a zero bit and a one bit
         //    according to the timing values given in the template instantiation
@@ -278,70 +278,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gInitialized = true;
     }
 
-    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-	// -- Make sure we have a buffer of the right size
-	//    (3 bytes per pixel)
-	int size_needed = pixels.size() * 3;
-	if (size_needed > mSize) {
-	    if (mPixelData != NULL) free(mPixelData);
-	    mSize = size_needed;
-            mPixelData = (uint8_t *) malloc( mSize);
-        }
-
-	// -- Cycle through the R,G, and B values in the right order,
-	//    storing the resulting raw pixel data in the buffer.
-        int cur = 0;
-        while (pixels.has(1)) {
-            mPixelData[cur++] = pixels.loadAndScale0();
-            mPixelData[cur++] = pixels.loadAndScale1();
-            mPixelData[cur++] = pixels.loadAndScale2();
-            pixels.advanceData();
-            pixels.stepDithering();
-        }
-    }
-
-    void convertByte(uint32_t byteval)
-    {
-	// -- Write one byte's worth of RMT pulses to the big buffer
-	byteval <<= 24;
-	for (register uint32_t j = 0; j < 8; j++) {
-	    mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-	    byteval <<= 1;
-	    mCurPulse++;
-	}
-    }
-
-    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-        mBufferSize = pixels.size() * 3 * 8;
-
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-	// -- Cycle through the R,G, and B values in the right order,
-	//    storing the pulses in the big buffer
-	mCurPulse = 0;
-        int cur = 0;
-	uint32_t byteval;
-        while (pixels.has(1)) {
-            byteval = pixels.loadAndScale0();
-	    convertByte(byteval);
-            byteval = pixels.loadAndScale1();
-	    convertByte(byteval);
-            byteval = pixels.loadAndScale2();
-	    convertByte(byteval);
-            pixels.advanceData();
-            pixels.stepDithering();
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-    }
-
+    // -- Show pixels
+    //    This is the main entry point for the controller.
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
         if (gNumStarted == 0) {
@@ -358,9 +296,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //if (mPixels != NULL) delete mPixels;
         //mPixels = new PixelController<RGB_ORDER>(pixels);
         if (FASTLED_RMT_BUILTIN_DRIVER)
-	    convertAllPixelData(pixels);
-	else
-	    copyPixelData(pixels);
+            convertAllPixelData(pixels);
+        else
+            copyPixelData(pixels);
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
@@ -390,9 +328,81 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    // -- Copy pixel data
+    //    Make a safe copy of the pixel data, so that the FastLED show
+    //    function can continue to the next controller while the RMT
+    //    device starts sending this data asynchronously.
+    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Make sure we have a buffer of the right size
+        //    (3 bytes per pixel)
+        int size_needed = pixels.size() * 3;
+        if (size_needed > mSize) {
+            if (mPixelData != NULL) free(mPixelData);
+            mSize = size_needed;
+            mPixelData = (uint8_t *) malloc( mSize);
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the resulting raw pixel data in the buffer.
+        int cur = 0;
+        while (pixels.has(1)) {
+            mPixelData[cur++] = pixels.loadAndScale0();
+            mPixelData[cur++] = pixels.loadAndScale1();
+            mPixelData[cur++] = pixels.loadAndScale2();
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+    }
+
+    // -- Convert all pixels to RMT pulses
+    //    This function is only used when the user chooses to use the
+    //    built-in RMT driver, which needs all of the RMT pulses
+    //    up-front.
+    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+        mBufferSize = pixels.size() * 3 * 8;
+
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the pulses in the big buffer
+        mCurPulse = 0;
+        int cur = 0;
+        uint32_t byteval;
+        while (pixels.has(1)) {
+            byteval = pixels.loadAndScale0();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale1();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale2();
+            convertByte(byteval);
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+
+    void convertByte(uint32_t byteval)
+    {
+        // -- Write one byte's worth of RMT pulses to the big buffer
+        byteval <<= 24;
+        for (register uint32_t j = 0; j < 8; j++) {
+            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+            byteval <<= 1;
+            mCurPulse++;
+        }
+    }
+
     // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
+    //    This method is static so that it can dispatch to the
+    //    appropriate startOnChannel method of the given controller.
     static void startNext(int channel)
     {
         if (gNext < gNumControllers) {
@@ -402,6 +412,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    // -- Start this controller on the given channel
+    //    This function just initiates the RMT write; it does not wait
+    //    for it to finish.
     void startOnChannel(int channel)
     {
         // -- Assign this channel and configure the RMT
@@ -417,8 +430,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             // -- Use the built-in RMT driver to send all the data in one shot
             rmt_register_tx_end_callback(doneOnChannel, 0);
-	    rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-            //writeAllRMTItems();
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
         } else {
             // -- Use our custom driver to send the data incrementally
 
@@ -442,6 +454,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    // -- A controller is done 
+    //    This function is called when a controller finishes writing
+    //    its data. It is called either by the custom interrupt
+    //    handler (below), or as a callback from the built-in
+    //    interrupt handler. It is static because we don't know which
+    //    controller is done until we look it up.
     static void doneOnChannel(rmt_channel_t channel, void * arg)
     {
         ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
@@ -465,6 +483,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
     
+    // -- Custom interrupt handler
+    //    This interrupt handler handles two cases: a controller is
+    //    done writing its data, or a controller needs to fill the
+    //    next half of the RMT buffer with data.
     static IRAM_ATTR void interruptHandler(void *arg)
     {
         // -- The basic structure of this code is borrowed from the
@@ -478,52 +500,56 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
             if (gOnChannel[channel] != NULL) {
 
-                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
                 // -- More to send on this channel
                 if (intr_st & BIT(tx_next_bit)) {
                     RMT.int_clr.val |= BIT(tx_next_bit);
-
+                    
                     // -- Refill the half of the buffer that we just finished,
                     //    allowing the other half to proceed.
+                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
                     controller->fillHalfRMTBuffer();
-                }
-
-                // -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    doneOnChannel(rmt_channel_t(channel), 0);
+                } else {
+                    // -- Transmission is complete on this channel
+                    if (intr_st & BIT(tx_done_bit)) {
+                        RMT.int_clr.val |= BIT(tx_done_bit);
+                        doneOnChannel(rmt_channel_t(channel), 0);
+                    }
                 }
             }
         }
     }
 
+    // -- Fill the RMT buffer
+    //    This function fills the next 32 slots in the RMT write
+    //    buffer with pixel data. It also handles the case where the
+    //    pixel data is exhausted, so we need to fill the RMT buffer
+    //    with zeros to signal that it's done.
     void fillHalfRMTBuffer()
     {
         uint32_t one_val = mOne.val;
         uint32_t zero_val = mZero.val;
 
-	// -- Convert (up to) 32 bits of the raw pixel data into
-	//    into RMT pulses that encode the zeros and ones.
+        // -- Convert (up to) 32 bits of the raw pixel data into
+        //    into RMT pulses that encode the zeros and ones.
         int pulses = 0;
         uint32_t byteval;
         while (pulses < 32 && mCurByte < mSize) {
-	    // -- Get one byte
+            // -- Get one byte
             byteval = mPixelData[mCurByte++];
-	    byteval <<= 24;
-	    // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-	    // rmt_item32_t value corresponding to the buffered bit value
-	    for (register uint32_t j = 0; j < 8; j++) {
-		uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-		byteval <<= 1;
-		mCurPulse++;
-	    }
-	    pulses += 8;
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+            pulses += 8;
         }
 
-	// -- When we reach the end of the pixel data, fill the rest of the
-	//    RMT buffer with 0's, which signals to the device that we're done.
+        // -- When we reach the end of the pixel data, fill the rest of the
+        //    RMT buffer with 0's, which signals to the device that we're done.
         if (mCurByte == mSize) {
             while (pulses < 32) {
                 RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;

From c5bf6a320a0b5759fe1804f8fa95c8af591590aa Mon Sep 17 00:00:00 2001
From: Marc MERLIN <marc_soft@merlins.org>
Date: Sat, 5 Jan 2019 04:06:40 -0800
Subject: [PATCH 040/204] Delete delete confusing demo.

- standard demoreeel works on ESP32 now
- https://github.com/FastLED/FastLED/issues/711 says this demo worked
  badly.
- https://github.com/FastLED/FastLED/issues/713 says it's incompatible
  with Wifi.
---
 examples/DemoReelESP32/DemoReelESP32.ino | 181 -----------------------
 1 file changed, 181 deletions(-)
 delete mode 100644 examples/DemoReelESP32/DemoReelESP32.ino

diff --git a/examples/DemoReelESP32/DemoReelESP32.ino b/examples/DemoReelESP32/DemoReelESP32.ino
deleted file mode 100644
index 3a32d4c4d0..0000000000
--- a/examples/DemoReelESP32/DemoReelESP32.ino
+++ /dev/null
@@ -1,181 +0,0 @@
-#include "FastLED.h"
-
-FASTLED_USING_NAMESPACE
-
-// FastLED "100-lines-of-code" demo reel, showing just a few 
-// of the kinds of animation patterns you can quickly and easily 
-// compose using FastLED.  
-//
-// This example also shows one easy way to define multiple 
-// animations patterns and have them automatically rotate.
-//
-// -Mark Kriegsman, December 2014
-
-#if defined(FASTLED_VERSION) && (FASTLED_VERSION < 3001000)
-#warning "Requires FastLED 3.1 or later; check github for latest code."
-#endif
-
-#define DATA_PIN    12
-//#define CLK_PIN   4
-#define LED_TYPE    WS2811
-#define COLOR_ORDER GRB
-#define NUM_LEDS    27
-CRGB leds[NUM_LEDS];
-
-#define BRIGHTNESS          60
-#define FRAMES_PER_SECOND  120
-
-// -- The core to run FastLED.show()
-#define FASTLED_SHOW_CORE 0
-
-// -- Task handles for use in the notifications
-static TaskHandle_t FastLEDshowTaskHandle = 0;
-static TaskHandle_t userTaskHandle = 0;
-
-/** show() for ESP32
- *  Call this function instead of FastLED.show(). It signals core 0 to issue a show, 
- *  then waits for a notification that it is done.
- */
-void FastLEDshowESP32()
-{
-    if (userTaskHandle == 0) {
-        // -- Store the handle of the current task, so that the show task can
-        //    notify it when it's done
-        userTaskHandle = xTaskGetCurrentTaskHandle();
-
-        // -- Trigger the show task
-        xTaskNotifyGive(FastLEDshowTaskHandle);
-
-        // -- Wait to be notified that it's done
-        const TickType_t xMaxBlockTime = pdMS_TO_TICKS( 200 );
-        ulTaskNotifyTake(pdTRUE, xMaxBlockTime);
-        userTaskHandle = 0;
-    }
-}
-
-/** show Task
- *  This function runs on core 0 and just waits for requests to call FastLED.show()
- */
-void FastLEDshowTask(void *pvParameters)
-{
-    // -- Run forever...
-    for(;;) {
-        // -- Wait for the trigger
-        ulTaskNotifyTake(pdTRUE, portMAX_DELAY);
-
-        // -- Do the show (synchronously)
-        FastLED.show();
-
-        // -- Notify the calling task
-        xTaskNotifyGive(userTaskHandle);
-    }
-}
-
-void setup() {
-  delay(3000); // 3 second delay for recovery
-  Serial.begin(115200);
-  
-  // tell FastLED about the LED strip configuration
-  FastLED.addLeds<LED_TYPE,DATA_PIN,COLOR_ORDER>(leds, NUM_LEDS).setCorrection(TypicalLEDStrip);
-  //FastLED.addLeds<LED_TYPE,DATA_PIN,CLK_PIN,COLOR_ORDER>(leds, NUM_LEDS).setCorrection(TypicalLEDStrip);
-
-  // set master brightness control
-  FastLED.setBrightness(BRIGHTNESS);
-
-    int core = xPortGetCoreID();
-    Serial.print("Main code running on core ");
-    Serial.println(core);
-
-    // -- Create the FastLED show task
-    xTaskCreatePinnedToCore(FastLEDshowTask, "FastLEDshowTask", 2048, NULL, 2, &FastLEDshowTaskHandle, FASTLED_SHOW_CORE);
-}
-
-
-// List of patterns to cycle through.  Each is defined as a separate function below.
-typedef void (*SimplePatternList[])();
-SimplePatternList gPatterns = { rainbow, rainbowWithGlitter, confetti, sinelon, juggle, bpm };
-
-uint8_t gCurrentPatternNumber = 0; // Index number of which pattern is current
-uint8_t gHue = 0; // rotating "base color" used by many of the patterns
-  
-void loop()
-{
-  // Call the current pattern function once, updating the 'leds' array
-  gPatterns[gCurrentPatternNumber]();
-
-  // send the 'leds' array out to the actual LED strip
-  FastLEDshowESP32();
-  // FastLED.show();
-  // insert a delay to keep the framerate modest
-  FastLED.delay(1000/FRAMES_PER_SECOND); 
-
-  // do some periodic updates
-  EVERY_N_MILLISECONDS( 20 ) { gHue++; } // slowly cycle the "base color" through the rainbow
-  EVERY_N_SECONDS( 10 ) { nextPattern(); } // change patterns periodically
-}
-
-#define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0]))
-
-void nextPattern()
-{
-  // add one to the current pattern number, and wrap around at the end
-  gCurrentPatternNumber = (gCurrentPatternNumber + 1) % ARRAY_SIZE( gPatterns);
-}
-
-void rainbow() 
-{
-  // FastLED's built-in rainbow generator
-  fill_rainbow( leds, NUM_LEDS, gHue, 7);
-}
-
-void rainbowWithGlitter() 
-{
-  // built-in FastLED rainbow, plus some random sparkly glitter
-  rainbow();
-  addGlitter(80);
-}
-
-void addGlitter( fract8 chanceOfGlitter) 
-{
-  if( random8() < chanceOfGlitter) {
-    leds[ random16(NUM_LEDS) ] += CRGB::White;
-  }
-}
-
-void confetti() 
-{
-  // random colored speckles that blink in and fade smoothly
-  fadeToBlackBy( leds, NUM_LEDS, 10);
-  int pos = random16(NUM_LEDS);
-  leds[pos] += CHSV( gHue + random8(64), 200, 255);
-}
-
-void sinelon()
-{
-  // a colored dot sweeping back and forth, with fading trails
-  fadeToBlackBy( leds, NUM_LEDS, 20);
-  int pos = beatsin16( 13, 0, NUM_LEDS-1 );
-  leds[pos] += CHSV( gHue, 255, 192);
-}
-
-void bpm()
-{
-  // colored stripes pulsing at a defined Beats-Per-Minute (BPM)
-  uint8_t BeatsPerMinute = 62;
-  CRGBPalette16 palette = PartyColors_p;
-  uint8_t beat = beatsin8( BeatsPerMinute, 64, 255);
-  for( int i = 0; i < NUM_LEDS; i++) { //9948
-    leds[i] = ColorFromPalette(palette, gHue+(i*2), beat-gHue+(i*10));
-  }
-}
-
-void juggle() {
-  // eight colored dots, weaving in and out of sync with each other
-  fadeToBlackBy( leds, NUM_LEDS, 20);
-  byte dothue = 0;
-  for( int i = 0; i < 8; i++) {
-    leds[beatsin16( i+7, 0, NUM_LEDS-1 )] |= CHSV(dothue, 200, 255);
-    dothue += 32;
-  }
-}
-

From 19468df0ab094cecc5e5fecd31884baa1aba9299 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Mon, 18 Feb 2019 11:12:50 -0800
Subject: [PATCH 041/204] Delete preview_changes.txt

---
 preview_changes.txt | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 preview_changes.txt

diff --git a/preview_changes.txt b/preview_changes.txt
deleted file mode 100644
index dbbd94668a..0000000000
--- a/preview_changes.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-FastLED 3.1 preview changes:
-* UART in SPI mode support on AVR 
-* Support for using both hardware SPI pinsets on the Teensy 3.x - 11/13 and 7/14.
-* Added UCS1904 support
-* Better AVR cycle counting
-* Split WS2812/WS2811 timings
-* Added DOTSTAR definition for adafruit dotstar pixels (aka APA102)
-* 8-way parallel output on teensy 3, 3.1 (portc,portd), due/digix (porta, portb, portd)
-* 12-way parallel output on teensy 3, 3.1 (portc)
-* 16-way parallel output on teensy 3, 3.1 (portc & portd paired)
-* refresh rate limiting
-* interrupt friendly code on teensy 3/3.1
-* -interrupt friendly code on AVR- <-- disabled for now
-* interrupt friendly code on the due
-* code re-org for future wider platform support
-* Spark Core support
-* arduino zero support (no hardware spi yet)
-* greatly improved clockless output for avr
-* greatly improved clockless output for arm m0 boards

From f82129ac38ef03acc5f84497ac95b4d810baeca1 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 26 Apr 2019 22:47:37 -0400
Subject: [PATCH 042/204] New I2S driver for ESP32

---
 platforms/esp/32/clockless_esp32.h | 813 ++++++++++++++++-------------
 1 file changed, 457 insertions(+), 356 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 248325ad3b..51ceb8bd57 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -1,66 +1,5 @@
 /*
- * Integration into FastLED ClocklessController
- * Copyright (c) 2018 Samuel Z. Guyer
- * Copyright (c) 2017 Thomas Basler
- * Copyright (c) 2017 Martin F. Falatic
  *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently to send sequences
- * of high/low bits. Memory for each channel is limited, however, so
- * in order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * Since the RMT device only has 8 channels, we need a strategy to
- * allow more than 8 LED controllers. Our driver assigns controllers
- * to channels on the fly, queuing up controllers as necessary until a
- * channel is free. The main showPixels routine just fires off the
- * first 8 controllers; the interrupt handler starts new controllers
- * asynchronously as previous ones finish. So, for example, it can
- * send the data for 8 controllers simultaneously, but 16 controllers
- * would take approximately twice as much time.
- *
- * There is a #define that allows a program to control the total
- * number of channels that the driver is allowed to use. It defaults
- * to 8 -- use all the channels. Setting it to 1, for example, results
- * in fully serial output:
- *
- *     #define FASTLED_RMT_MAX_CHANNELS 1
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT interrupt
- * handler, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
- *
- * There may be a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
  *
  */
 /*
@@ -91,14 +30,16 @@ FASTLED_NAMESPACE_BEGIN
 extern "C" {
 #endif
 
-#include "esp32-hal.h"
-#include "esp_intr.h"
+#include "esp_heap_caps.h"
+#include "soc/soc.h"
+#include "soc/gpio_sig_map.h"
+#include "soc/i2s_reg.h"
+#include "soc/i2s_struct.h"
+#include "soc/io_mux_reg.h"
 #include "driver/gpio.h"
-#include "driver/rmt.h"
 #include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
+#include "rom/lldesc.h"
+#include "esp_intr.h"
 #include "esp_log.h"
 
 #ifdef __cplusplus
@@ -112,10 +53,20 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 }
 
 #define FASTLED_HAS_CLOCKLESS 1
+#define NUM_COLOR_CHANNELS 3
+
+// -- Choose which I2S device to use
+#ifndef I2S_DEVICE
+#define I2S_DEVICE 0
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_I2S_MAX_CONTROLLERS
+#define FASTLED_I2S_MAX_CONTROLLERS 24
+#endif
 
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+// -- I2S clock
+#define I2S_BASE_CLK (1600000000L)
 
 // -- Convert ESP32 cycles back into nanoseconds
 #define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
@@ -127,53 +78,43 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
 #define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
 
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_BUILTIN_DRIVER
-#define FASTLED_RMT_BUILTIN_DRIVER false
-#endif
-
-// -- Max number of controllers we can support
-#ifndef FASTLED_RMT_MAX_CONTROLLERS
-#define FASTLED_RMT_MAX_CONTROLLERS 32
-#endif
-
-// -- Number of RMT channels to use (up to 8)
-//    Redefine this value to 1 to force serial output
-#ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
-#endif
-
 // -- Array of all controllers
-static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
+static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
 static int gNumControllers = 0;
 static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
 
 // -- Global semaphore for the whole show process
 //    Semaphore is not given until all data has been sent
 static xSemaphoreHandle gTX_sem = NULL;
 
+// -- I2S global configuration stuff
 static bool gInitialized = false;
 
+static intr_handle_t gI2S_intr_handle = NULL;
+
+static i2s_dev_t * i2s;          // A pointer to the memory-mapped structure: I2S0 or I2S1
+static int i2s_base_pin_index;   // I2S goes to these pins until we remap them using the GPIO matrix
+
+// --- I2S DMA buffers
+struct DMABuffer {
+    lldesc_t descriptor;
+    uint8_t * buffer;
+};
+
+#define NUM_DMA_BUFFERS 2
+static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
+
+// -- Counters to track progress
+static int gCurBuffer = 0;
+static int gCurPixel = 0;
+static int gPixelsSent = 0;
+static int gMaxPixels = 0;
+
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
 {
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t  mRMT_channel;
+    // -- The index of this controller in the global gControllers array
+    int            m_index;
 
     // -- Store the GPIO pin
     gpio_num_t     mPin;
@@ -181,27 +122,20 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- This instantiation forces a check on the pin choice
     FastPin<DATA_PIN> mFastPin;
 
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
-    // -- State information for keeping track of where we are in the pixel data
-    uint8_t *      mPixelData = NULL;
+    // -- State information for keeping track of where we are in the
+    //    pixel data. For the I2S driver, it is more convenient to
+    //    store the data for each channel in a separate array.
+    uint8_t *      mPixelData[NUM_COLOR_CHANNELS];
     int            mSize = 0;
-    int            mCurByte;
-    uint16_t       mCurPulse;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t       mBufferSize;
 
 public:
 
     void init()
     {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
+        i2sInit();
+        
+        // TBD: Precompute the bit patterns based on the I2S sample rate
+        /*
         // T1H
         mOne.level0 = 1;
         mOne.duration0 = TO_RMT_CYCLES(T1+T2);
@@ -215,66 +149,165 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // T0L
         mZero.level1 = 0;
         mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+        */
 
         gControllers[gNumControllers] = this;
+        m_index = gNumControllers;
         gNumControllers++;
 
+        // -- Set up the pin We have to do two things: configure the
+        //    actual GPIO pin, and route the output from the default
+        //    pin (determined by the I2S device) to the pin we
+        //    want. We compute the default pin using the index of this
+        //    controller in the array. This order is crucial because
+        //    the bits must go into the DMA buffer in the same order.
         mPin = gpio_num_t(DATA_PIN);
+
+        PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
+        gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
+        pinMode(mPin,OUTPUT);
+        gpio_matrix_out(mPin, i2s_base_pin_index + m_index, false, false);
+
+        for (int i = 0; i < NUM_COLOR_CHANNELS; i++) {
+            mPixelData[i] = 0;
+        }
+
+        /*
+        Serial.print("Init controller ");
+        Serial.print(m_index);
+        Serial.print(" on pin ");
+        Serial.print(mPin);
+        Serial.print(" I2S signal ");
+        Serial.print(i2s_base_pin_index + m_index);
+        Serial.println();
+        */
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
-    void initRMT()
+    static DMABuffer * allocateDMABuffer(int bytes)
+    {
+        DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
+
+        b->buffer = (uint8_t *)heap_caps_malloc(bytes, MALLOC_CAP_DMA);
+        memset(b->buffer, 0, bytes);
+
+        b->descriptor.length = bytes;
+        b->descriptor.size = bytes;
+        b->descriptor.owner = 1;
+        b->descriptor.sosf = 1;
+        b->descriptor.buf = b->buffer;
+        b->descriptor.offset = 0;
+        b->descriptor.empty = 0;
+        b->descriptor.eof = 1;
+        b->descriptor.qe.stqe_next = 0;
+
+        return b;
+    }
+
+    static void i2sInit()
     {
         // -- Only need to do this once
         if (gInitialized) return;
 
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-            gOnChannel[i] = NULL;
-
-            // -- RMT configuration for transmission
-            rmt_config_t rmt_tx;
-            rmt_tx.channel = rmt_channel_t(i);
-            rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-            rmt_tx.mem_block_num = 1;
-            rmt_tx.clk_div = DIVIDER;
-            rmt_tx.tx_config.loop_en = false;
-            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-            rmt_tx.tx_config.carrier_en = false;
-            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-            rmt_tx.tx_config.idle_output_en = true;
-                
-            // -- Apply the configuration
-            rmt_config(&rmt_tx);
-
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                rmt_driver_install(rmt_channel_t(i), 0, 0);
-            } else {
-                // -- Set up the RMT to send 1/2 of the pulse buffer and then
-                //    generate an interrupt. When we get this interrupt we
-                //    fill the other half in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-            }
+        // -- Choose whether to use I2S device 0 or device 1
+        //    Set up the various device-specific parameters
+        int interruptSource;
+        if (I2S_DEVICE == 0) {
+            i2s = &I2S0;
+            periph_module_enable(PERIPH_I2S0_MODULE);
+            interruptSource = ETS_I2S0_INTR_SOURCE;
+            i2s_base_pin_index = I2S0O_DATA_OUT0_IDX;
+        } else {
+            i2s = &I2S1;
+            periph_module_enable(PERIPH_I2S1_MODULE);
+            interruptSource = ETS_I2S1_INTR_SOURCE;
+            i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
         }
 
+        // -- Reset i2s
+        i2s->conf.tx_reset = 1;
+        i2s->conf.tx_reset = 0;
+        i2s->conf.rx_reset = 1;
+        i2s->conf.rx_reset = 0;
+
+        // -- Reset DMA
+        i2s->lc_conf.in_rst = 1;
+        i2s->lc_conf.in_rst = 0;
+        i2s->lc_conf.out_rst = 1;
+        i2s->lc_conf.out_rst = 0;
+
+        // -- Reset FIFO (Do we need this?)
+        i2s->conf.rx_fifo_reset = 1;
+        i2s->conf.rx_fifo_reset = 0;
+        i2s->conf.tx_fifo_reset = 1;
+        i2s->conf.tx_fifo_reset = 0;
+
+        // -- Main configuration 
+        i2s->conf.tx_msb_right = 1;
+        i2s->conf.tx_mono = 0;
+        i2s->conf.tx_short_sync = 0;
+        i2s->conf.tx_msb_shift = 0;
+        i2s->conf.tx_right_first = 1; // 0;//1;
+        i2s->conf.tx_slave_mod = 0;
+
+        // -- Set parallel mode
+        i2s->conf2.val = 0;
+        i2s->conf2.lcd_en = 1;
+        i2s->conf2.lcd_tx_wrx2_en = 0; // 0 for 16 or 32 parallel output
+        i2s->conf2.lcd_tx_sdx2_en = 0; // HN
+
+        // -- Set up the clock rate and sampling
+        i2s->sample_rate_conf.val = 0;
+        i2s->sample_rate_conf.tx_bits_mod = 32; // Number of parallel bits/pins
+        i2s->sample_rate_conf.tx_bck_div_num = 1;
+        i2s->clkm_conf.val = 0;
+        i2s->clkm_conf.clka_en = 0;
+
+        // -- Data clock is computed as Base/(div_num + (div_b/div_a))
+        //    Base is 80Mhz, so 80/(25 + 0/1) = 3.2Mhz
+        //    One cycle is 312.5ns
+        i2s->clkm_conf.clkm_div_a = 1;
+        i2s->clkm_conf.clkm_div_b = 0;
+        i2s->clkm_conf.clkm_div_num = 25;
+    
+        i2s->fifo_conf.val = 0;
+        i2s->fifo_conf.tx_fifo_mod_force_en = 1;
+        i2s->fifo_conf.tx_fifo_mod = 3;  // 32-bit single channel data
+        i2s->fifo_conf.tx_data_num = 32; // fifo length
+        i2s->fifo_conf.dscr_en = 1;      // fifo will use dma
+
+        i2s->conf1.val = 0;
+        i2s->conf1.tx_stop_en = 0;
+        i2s->conf1.tx_pcm_bypass = 1;
+
+        i2s->conf_chan.val = 0;
+        i2s->conf_chan.tx_chan_mod = 1; // Mono mode, with tx_msb_right = 1, everything goes to right-channel
+
+        i2s->timing.val = 0;
+
+        // -- Allocate two DMA buffers
+        dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * 4);
+        dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * 4);
+
+        // -- Arrange them as a circularly linked list
+        dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
+        dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
+
+        //allocate disabled i2s interrupt
+        SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
+        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3 | ESP_INTR_FLAG_IRAM,
+                       &interruptHandler, 0, &gI2S_intr_handle);
+
         // -- Create a semaphore to block execution until all the controllers are done
         if (gTX_sem == NULL) {
             gTX_sem = xSemaphoreCreateBinary();
             xSemaphoreGive(gTX_sem);
         }
                 
-        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-        }
-
+        // Serial.println("Init I2S");
         gInitialized = true;
     }
 
@@ -284,36 +317,33 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         if (gNumStarted == 0) {
             // -- First controller: make sure everything is set up
-            initRMT();
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
         }
 
         // -- Initialize the local state, save a pointer to the pixel
         //    data. We need to make a copy because pixels is a local
         //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-
-        //if (mPixels != NULL) delete mPixels;
-        //mPixels = new PixelController<RGB_ORDER>(pixels);
-        if (FASTLED_RMT_BUILTIN_DRIVER)
-            convertAllPixelData(pixels);
-        else
-            copyPixelData(pixels);
+        //    needs to outlive this call to showPixels.]
+        copyPixelData(pixels);
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
 
+        // Serial.print("Show pixels ");
+        // Serial.println(gNumStarted);
+
         // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual worl
+        //    all of the actual work
         if (gNumStarted == gNumControllers) {
-            gNext = 0;
+            gCurPixel = 0;
+            gCurBuffer = 0;
+            gPixelsSent = 0;
 
-            // -- First, fill all the available channels
-            int channel = 0;
-            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                startNext(channel);
-                channel++;
-            }
+            // -- Prefill both buffers
+            fillBuffer();
+            fillBuffer();
+
+            i2sStart();
 
             // -- Wait here while the rest of the data is sent. The interrupt handler
             //    will keep refilling the RMT buffers until it is all sent; then it
@@ -321,10 +351,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
             xSemaphoreGive(gTX_sem);
 
+            i2sStop();
+            // Serial.println("...done");
+
             // -- Reset the counters
             gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
         }
     }
 
@@ -336,231 +367,301 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         // -- Make sure we have a buffer of the right size
         //    (3 bytes per pixel)
-        int size_needed = pixels.size() * 3;
+        int size_needed = pixels.size();
         if (size_needed > mSize) {
-            if (mPixelData != NULL) free(mPixelData);
             mSize = size_needed;
-            mPixelData = (uint8_t *) malloc( mSize);
+            for (int i = 0; i < NUM_COLOR_CHANNELS; i++) {
+                if (mPixelData[i] != NULL) free(mPixelData[i]);
+                mPixelData[i] = (uint8_t *) malloc( mSize);
+            }
+
+            if (gMaxPixels < mSize)
+                gMaxPixels = mSize;
         }
 
         // -- Cycle through the R,G, and B values in the right order,
         //    storing the resulting raw pixel data in the buffer.
         int cur = 0;
         while (pixels.has(1)) {
-            mPixelData[cur++] = pixels.loadAndScale0();
-            mPixelData[cur++] = pixels.loadAndScale1();
-            mPixelData[cur++] = pixels.loadAndScale2();
+            mPixelData[0][cur] = pixels.loadAndScale0();
+            mPixelData[1][cur] = pixels.loadAndScale1();
+            mPixelData[2][cur] = pixels.loadAndScale2();
             pixels.advanceData();
             pixels.stepDithering();
+            cur++;
         }
     }
 
-    // -- Convert all pixels to RMT pulses
-    //    This function is only used when the user chooses to use the
-    //    built-in RMT driver, which needs all of the RMT pulses
-    //    up-front.
-    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    // -- Custom interrupt handler
+    static IRAM_ATTR void interruptHandler(void *arg)
     {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-        mBufferSize = pixels.size() * 3 * 8;
+        if (i2s->int_st.out_eof) {
+            i2s->int_clr.val = i2s->int_raw.val;
 
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+            gPixelsSent++;
+            if (gCurPixel < gMaxPixels) {
+                fillBuffer();
+            } else {
+                if (gPixelsSent == gMaxPixels) {
+                    portBASE_TYPE HPTaskAwoken = 0;
+                    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+                }
+            }
         }
+    }
 
-        // -- Cycle through the R,G, and B values in the right order,
-        //    storing the pulses in the big buffer
-        mCurPulse = 0;
-        int cur = 0;
-        uint32_t byteval;
-        while (pixels.has(1)) {
-            byteval = pixels.loadAndScale0();
-            convertByte(byteval);
-            byteval = pixels.loadAndScale1();
-            convertByte(byteval);
-            byteval = pixels.loadAndScale2();
-            convertByte(byteval);
-            pixels.advanceData();
-            pixels.stepDithering();
+    static void fillBuffer()
+    {
+        int pixel_num = gCurPixel;
+        gCurPixel++;
+
+        volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
+        gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
+        // Serial.print("Fill "); Serial.print((uint32_t)buf); Serial.println();
+
+        uint8_t pixels[NUM_COLOR_CHANNELS][32];
+        memset(pixels, 0, NUM_COLOR_CHANNELS * 32);
+
+        // -- Get the requested pixel from each controller. Store the
+        //    data for each color channel in a separate array.
+        for (int i = 0; i < gNumControllers; i++) {
+            for (int j = 0; j < NUM_COLOR_CHANNELS; j++) {
+                ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+                pixels[j][23-i] = pController->mPixelData[j][pixel_num];
+            }
         }
 
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
+        // -- Transpose and encode the pixel data for the DMA buffer
+        uint8_t bits[NUM_COLOR_CHANNELS][8][4];
+
+        for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
+            // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
+            // transpose24x1_noinline(pixels[channel], bits[channel]);
+            transpose32(pixels[channel], & (bits[channel][0][0]) );
+
+            // -- Create the bit pattern in the actual DMA buffer. Each
+            //    bit in the data turns into 4 bits in the output. Those
+            //    four bits encode the timing of the signal to the LED
+            //    strip. The I2S device is set up so that each pulse is
+            //    312.5ns. Therefore, we can form the zero and one bit
+            //    timing for the WS2812 with the following bit patterns:
+            //
+            //    Zero bit: T0H is around 300-400ns, so we send 1000 (high for 312.5, low for the rest)
+            //    One bit:  T1H is around 700-900ns, so we send 1110 (high for 937.5)
+        
+            // Serial.print("Channel: "); Serial.println(channel);
+            for (int bitnum = 0; bitnum < 8; bitnum++) {
+                uint8_t * row = (uint8_t *) & (bits[channel][bitnum][0]);
+                uint32_t bit =  (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
+                // bit = bit >> 23;
+                // bit = bit << 1;
+                /*
+                Serial.print(bitnum); Serial.print(": ");
+                uint32_t bt = bit;
+                for (int k = 0; k < 32; k++) {
+                    if (bt & 0x80000000) Serial.print("1");
+                    else Serial.print("0");
+                    bt = bt << 1;
+                }
+                Serial.println();
+                */
+                // -- Now form the four-bit pattern: we can do this by
+                //    duplicating the bit we computed, and adding a 1
+                //    at the front and a zero at the back: 1bb0
+                buf[channel*32 + bitnum*4]   = 0xFFFFFFFF;
+                buf[channel*32 + bitnum*4+1] = bit;
+                buf[channel*32 + bitnum*4+2] = bit;
+                buf[channel*32 + bitnum*4+3] = 0x00000000;
+            }
+        }
     }
 
-    void convertByte(uint32_t byteval)
+    static void transpose32(uint8_t * pixels, uint8_t * bits)
     {
-        // -- Write one byte's worth of RMT pulses to the big buffer
-        byteval <<= 24;
-        for (register uint32_t j = 0; j < 8; j++) {
-            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-            byteval <<= 1;
-            mCurPulse++;
+        transpose8rS32(& pixels[0],  1, 4, & bits[0]);
+        transpose8rS32(& pixels[8],  1, 4, & bits[1]);
+        transpose8rS32(& pixels[16], 1, 4, & bits[2]);
+        //transpose8rS32(& pixels[24], 1, 4, & bits[3]);
+        /*
+        Serial.println("Pixels:");
+        for (int m = 0; m < 24; m++) {
+            Serial.print(m); Serial.print(": ");
+            uint8_t bt = pixels[m];
+            for (int k = 0; k < 8; k++) {
+                if (bt & 0x80) Serial.print("1");
+                else Serial.print("0");
+                bt = bt << 1;
+            }
+            Serial.println();
         }
-    }
 
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the
-    //    appropriate startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-        if (gNext < gNumControllers) {
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-            pController->startOnChannel(channel);
-            gNext++;
+        Serial.println("Bits:");
+        for (int bitnum = 0; bitnum < 8; bitnum++) {
+            Serial.print(bitnum); Serial.print(": ");
+            for (int w = 0; w < 4; w++) {
+                uint8_t bt = bits[ bitnum*4 + w ];
+                for (int k = 0; k < 8; k++) {
+                    if (bt & 0x80) Serial.print("1");
+                    else Serial.print("0");
+                    bt = bt << 1;
+                }
+                Serial.print(" ");
+            }
+            Serial.println();
         }
+        */
     }
 
-    // -- Start this controller on the given channel
-    //    This function just initiates the RMT write; it does not wait
-    //    for it to finish.
-    void startOnChannel(int channel)
+    static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B) 
     {
-        // -- Assign this channel and configure the RMT
-        mRMT_channel = rmt_channel_t(channel);
+        uint32_t x, y, t;
 
-        // -- Store a reference to this controller, so we can get it
-        //    inside the interrupt handler
-        gOnChannel[channel] = this;
+        // Load the array and pack it into x and y.
 
-        // -- Assign the pin to this channel
-        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+        x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
+        y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
 
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Use the built-in RMT driver to send all the data in one shot
-            rmt_register_tx_end_callback(doneOnChannel, 0);
-            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-        } else {
-            // -- Use our custom driver to send the data incrementally
+        t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+        t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
 
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        
-            // -- Initialize the counters that keep track of where we are in
-            //    the pixel data.
-            mCurPulse = 0;
-            mCurByte = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-            
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-        }
+        t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+        t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+
+        t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+        y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+        x = t;
+
+        B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x;
+        B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y;
     }
 
-    // -- A controller is done 
-    //    This function is called when a controller finishes writing
-    //    its data. It is called either by the custom interrupt
-    //    handler (below), or as a callback from the built-in
-    //    interrupt handler. It is static because we don't know which
-    //    controller is done until we look it up.
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    /** Transpose 24 * 8 bits --> 8 * 24 bits
+     *
+     *  Important notes: the result is actually 8 * 32 bits, where
+     *  each set of bits only occupy the low 24 bits. As with other
+     *  transpose functions, the sets of bits are also in reverse
+     *  order from what we want -- that is, the least significant bit
+     *  (the bit we want to send first) is actually the last set
+     *  (index 7).
+     *
+     **/
+    static void transpose24x1_noinline(unsigned char *A, uint32_t *B) 
     {
-        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        // -- Turn off output on the pin
-        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+        uint32_t  x, y, x1,y1,t,x2,y2;
+        
+        y = *(unsigned int*)(A);
+        x = *(unsigned int*)(A+4);
+        y1 = *(unsigned int*)(A+8);
+        x1 = *(unsigned int*)(A+12);
+        
+        y2 = *(unsigned int*)(A+16);
+        x2 = *(unsigned int*)(A+20);
+        
+        
+        // pre-transform x
+        t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+        t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+        
+        t = (x1 ^ (x1 >> 7)) & 0x00AA00AA;  x1 = x1 ^ t ^ (t << 7);
+        t = (x1 ^ (x1 >>14)) & 0x0000CCCC;  x1 = x1 ^ t ^ (t <<14);
+        
+        t = (x2 ^ (x2 >> 7)) & 0x00AA00AA;  x2 = x2 ^ t ^ (t << 7);
+        t = (x2 ^ (x2 >>14)) & 0x0000CCCC;  x2 = x2 ^ t ^ (t <<14);
+        
+        // pre-transform y
+        t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+        t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+        
+        t = (y1 ^ (y1 >> 7)) & 0x00AA00AA;  y1 = y1 ^ t ^ (t << 7);
+        t = (y1 ^ (y1 >>14)) & 0x0000CCCC;  y1 = y1 ^ t ^ (t <<14);
+        
+        t = (y2 ^ (y2 >> 7)) & 0x00AA00AA;  y2 = y2 ^ t ^ (t << 7);
+        t = (y2 ^ (y2 >>14)) & 0x0000CCCC;  y2 = y2 ^ t ^ (t <<14);
+        
+        // final transform
+        t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+        y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+        x = t;
+        
+        t = (x1 & 0xF0F0F0F0) | ((y1 >> 4) & 0x0F0F0F0F);
+        y1 = ((x1 << 4) & 0xF0F0F0F0) | (y1 & 0x0F0F0F0F);
+        x1 = t;
+        
+        t = (x2 & 0xF0F0F0F0) | ((y2 >> 4) & 0x0F0F0F0F);
+        y2 = ((x2 << 4) & 0xF0F0F0F0) | (y2 & 0x0F0F0F0F);
+        x2 = t;
+        
+        *((uint32_t*)B)     = (uint32_t)(  (y &       0xff)       | ((y1 &       0xff) <<8)  | ((y2 &       0xff) <<16) );
+        *((uint32_t*)(B+1)) = (uint32_t)( ((y &     0xff00) >>8)  |  (y1 &     0xff00)       | ((y2 &     0xff00) <<8)  );
+        *((uint32_t*)(B+2)) = (uint32_t)( ((y &   0xff0000) >>16) | ((y1 &   0xff0000) >>8)  |  (y2 &   0xff0000)       );
+        *((uint32_t*)(B+3)) = (uint32_t)( ((y & 0xff000000) >>24) | ((y1 & 0xff000000) >>16) | ((y2 & 0xff000000) >> 8) );
+        
+        *((uint32_t*)(B+4)) = (uint32_t)(  (x &       0xff)       | ((x1 &       0xff) <<8)  | ((x2 &       0xff) <<16) );
+        *((uint32_t*)(B+5)) = (uint32_t)( ((x &     0xff00) >>8)  |  (x1 &     0xff00)       | ((x2 &     0xff00) <<8)  );
+        *((uint32_t*)(B+6)) = (uint32_t)( ((x &   0xff0000) >>16) | ((x1 &   0xff0000) >>8)  |  (x2 &   0xff0000)       );
+        *((uint32_t*)(B+7)) = (uint32_t)( ((x & 0xff000000) >>24) | ((x1 & 0xff000000) >>16) | ((x2 & 0xff000000) >> 8) );
+    }
 
-        gOnChannel[channel] = NULL;
-        gNumDone++;
 
-        if (gNumDone == gNumControllers) {
-            // -- If this is the last controller, signal that we are all done
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-        } else {
-            // -- Otherwise, if there are still controllers waiting, then
-            //    start the next one on this channel
-            if (gNext < gNumControllers)
-                startNext(channel);
-        }
-    }
-    
-    // -- Custom interrupt handler
-    //    This interrupt handler handles two cases: a controller is
-    //    done writing its data, or a controller needs to fill the
-    //    next half of the RMT buffer with data.
-    static IRAM_ATTR void interruptHandler(void *arg)
+    /** Start I2S transmission
+     */
+    static void i2sStart()
     {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gOnChannel[channel] != NULL) {
-
-                // -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-                    
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-                    controller->fillHalfRMTBuffer();
-                } else {
-                    // -- Transmission is complete on this channel
-                    if (intr_st & BIT(tx_done_bit)) {
-                        RMT.int_clr.val |= BIT(tx_done_bit);
-                        doneOnChannel(rmt_channel_t(channel), 0);
-                    }
-                }
-            }
-        }
+        // esp_intr_disable(gI2S_intr_handle);
+        // Serial.println("I2S start");
+        i2sReset();
+        //Serial.println(dmaBuffers[0]->sampleCount());
+        i2s->lc_conf.val=I2S_OUT_DATA_BURST_EN | I2S_OUTDSCR_BURST_EN | I2S_OUT_DATA_BURST_EN;
+        i2s->out_link.addr = (uint32_t) & (dmaBuffers[0]->descriptor);
+        i2s->out_link.start = 1;
+        ////vTaskDelay(5);
+        i2s->int_clr.val = i2s->int_raw.val;
+        // //vTaskDelay(5);
+        i2s->int_ena.out_dscr_err = 1;
+        //enable interrupt
+        ////vTaskDelay(5);
+        esp_intr_enable(gI2S_intr_handle);
+        // //vTaskDelay(5);
+        i2s->int_ena.val = 0;
+        i2s->int_ena.out_eof = 1;
+
+        //start transmission
+        i2s->conf.tx_start = 1;
     }
 
-    // -- Fill the RMT buffer
-    //    This function fills the next 32 slots in the RMT write
-    //    buffer with pixel data. It also handles the case where the
-    //    pixel data is exhausted, so we need to fill the RMT buffer
-    //    with zeros to signal that it's done.
-    void fillHalfRMTBuffer()
+    static void i2sReset()
     {
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-
-        // -- Convert (up to) 32 bits of the raw pixel data into
-        //    into RMT pulses that encode the zeros and ones.
-        int pulses = 0;
-        uint32_t byteval;
-        while (pulses < 32 && mCurByte < mSize) {
-            // -- Get one byte
-            byteval = mPixelData[mCurByte++];
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-            pulses += 8;
+        // Serial.println("I2S reset");
+        const unsigned long lc_conf_reset_flags = I2S_IN_RST_M | I2S_OUT_RST_M | I2S_AHBM_RST_M | I2S_AHBM_FIFO_RST_M;
+        i2s->lc_conf.val |= lc_conf_reset_flags;
+        i2s->lc_conf.val &= ~lc_conf_reset_flags;
+
+        const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
+        i2s->conf.val |= conf_reset_flags;
+        i2s->conf.val &= ~conf_reset_flags;
+        //while (i2s->state.rx_fifo_reset_back)
+        //    ;
+        /*
+        static void dma_reset(i2s_dev_t *dev) {
+            dev->lc_conf.in_rst=1; dev->lc_conf.in_rst=0;
+            dev->lc_conf.out_rst=1; dev->lc_conf.out_rst=0;
         }
 
-        // -- When we reach the end of the pixel data, fill the rest of the
-        //    RMT buffer with 0's, which signals to the device that we're done.
-        if (mCurByte == mSize) {
-            while (pulses < 32) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulses++;
-            }
+        static void fifo_reset(i2s_dev_t *dev) {
+            dev->conf.rx_fifo_reset=1; dev->conf.rx_fifo_reset=0;
+            dev->conf.tx_fifo_reset=1; dev->conf.tx_fifo_reset=0;
         }
-        
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
+        */
+    }
+
+    static void i2sStop()
+    {
+        // Serial.println("I2S stop");
+        esp_intr_disable(gI2S_intr_handle);
+        i2sReset();
+        i2s->conf.rx_start = 0;
+        i2s->conf.tx_start = 0;
     }
 };
 

From 4666e06c7a3e7d43380c3fc0b78693fc99dbba2b Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 27 Apr 2019 18:20:43 -0400
Subject: [PATCH 043/204] Two updates: (1) avoid copying all the pixel data up
 front, and (2) use T1, T2, and T3 to encode thepulse patterns

---
 platforms/esp/32/clockless_esp32.h | 201 +++++++++++++++++------------
 1 file changed, 121 insertions(+), 80 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 51ceb8bd57..31bff30eb4 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -66,17 +66,15 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #endif
 
 // -- I2S clock
-#define I2S_BASE_CLK (1600000000L)
+#define I2S_BASE_CLK (800000000L)
 
 // -- Convert ESP32 cycles back into nanoseconds
 #define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
 
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+// -- I2S bit encoding
+//    For now, this stuff is hard-coded
+#define FASTLED_I2S_CLOCK_DIVIDER     10  // 80MHz --> 8MHz
+#define FASTLED_I2S_NS_PER_PULSE     125  // == 125ns per cycle
 
 // -- Array of all controllers
 static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
@@ -104,11 +102,25 @@ struct DMABuffer {
 #define NUM_DMA_BUFFERS 2
 static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
 
+// -- Bit patterns
+//    We configure the I2S data clock so that each pulse is
+//    125ns. Depending on the kind of LED we compute a pattern of
+//    pulses that match the timing. For example, a "1" bit for the
+//    WS2812 consists of 700-900ns high, followed by 300-500ns
+//    low. Using 125ns per pulse, we can send a "1" bit using this
+//    pattern: 1111111000 (a total of 10 bits, or 1250ns)
+//
+//    For now, we require all strips to be the same chipset, so these
+//    are global variables.
+
+static int      gPulsesPerBit = 0;
+static uint32_t gOneBit[10] = {0,0,0,0,0,0,0,0,0,0};
+static uint32_t gZeroBit[10]  = {0,0,0,0,0,0,0,0,0,0};
+
 // -- Counters to track progress
 static int gCurBuffer = 0;
-static int gCurPixel = 0;
-static int gPixelsSent = 0;
-static int gMaxPixels = 0;
+static bool gDoneFilling = false;
+static bool gDoneSending = false;
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
@@ -122,11 +134,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- This instantiation forces a check on the pin choice
     FastPin<DATA_PIN> mFastPin;
 
-    // -- State information for keeping track of where we are in the
-    //    pixel data. For the I2S driver, it is more convenient to
-    //    store the data for each channel in a separate array.
-    uint8_t *      mPixelData[NUM_COLOR_CHANNELS];
-    int            mSize = 0;
+    // -- Save the pixel controller
+    PixelController<RGB_ORDER> * mPixels;
 
 public:
 
@@ -134,22 +143,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         i2sInit();
         
-        // TBD: Precompute the bit patterns based on the I2S sample rate
-        /*
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-        */
+        // -- Allocate space to save the pixel controller
+        //    during parallel output
+        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
 
         gControllers[gNumControllers] = this;
         m_index = gNumControllers;
@@ -167,26 +163,48 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
         pinMode(mPin,OUTPUT);
         gpio_matrix_out(mPin, i2s_base_pin_index + m_index, false, false);
-
-        for (int i = 0; i < NUM_COLOR_CHANNELS; i++) {
-            mPixelData[i] = 0;
-        }
-
-        /*
-        Serial.print("Init controller ");
-        Serial.print(m_index);
-        Serial.print(" on pin ");
-        Serial.print(mPin);
-        Serial.print(" I2S signal ");
-        Serial.print(i2s_base_pin_index + m_index);
-        Serial.println();
-        */
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
+    static void initBitPatterns()
+    {
+        // Precompute the bit patterns based on the I2S sample rate
+        uint32_t T1ns = ESPCLKS_TO_NS(T1);
+        uint32_t T2ns = ESPCLKS_TO_NS(T2);
+        uint32_t T3ns = ESPCLKS_TO_NS(T3);
+
+        gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
+
+        Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
+
+        int ones_for_one  = (T1ns + T2ns)/FASTLED_I2S_NS_PER_PULSE;
+        Serial.print("One bit:  "); Serial.print(ones_for_one); Serial.println(" 1 bits");
+        int i = 0;
+        while ( i < ones_for_one ) {
+            gOneBit[i] = 0xFFFFFF00;
+            i++;
+        }
+        while ( i < gPulsesPerBit ) {
+            gOneBit[i] = 0x00000000;
+            i++;
+        }
+
+        int ones_for_zero = (T1ns)/FASTLED_I2S_NS_PER_PULSE;
+        Serial.print("Zero bit: "); Serial.print(ones_for_zero); Serial.println(" 1 bits");
+        i = 0;
+        while ( i < ones_for_zero ) {
+            gZeroBit[i] = 0xFFFFFF00;
+            i++;
+        }
+        while ( i < gPulsesPerBit ) {
+            gZeroBit[i] = 0x00000000;
+            i++;
+        }
+    }
+
     static DMABuffer * allocateDMABuffer(int bytes)
     {
         DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
@@ -212,6 +230,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Only need to do this once
         if (gInitialized) return;
 
+        // -- Construct the bit patterns for ones and zeros
+        initBitPatterns();
+
         // -- Choose whether to use I2S device 0 or device 1
         //    Set up the various device-specific parameters
         int interruptSource;
@@ -267,11 +288,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         i2s->clkm_conf.clka_en = 0;
 
         // -- Data clock is computed as Base/(div_num + (div_b/div_a))
-        //    Base is 80Mhz, so 80/(25 + 0/1) = 3.2Mhz
-        //    One cycle is 312.5ns
+        //    Base is 80Mhz, so 80/(10 + 0/1) = 8Mhz
+        //    One cycle is 125ns
         i2s->clkm_conf.clkm_div_a = 1;
         i2s->clkm_conf.clkm_div_b = 0;
-        i2s->clkm_conf.clkm_div_num = 25;
+        i2s->clkm_conf.clkm_div_num = FASTLED_I2S_CLOCK_DIVIDER;
     
         i2s->fifo_conf.val = 0;
         i2s->fifo_conf.tx_fifo_mod_force_en = 1;
@@ -289,8 +310,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         i2s->timing.val = 0;
 
         // -- Allocate two DMA buffers
-        dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * 4);
-        dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * 4);
+        dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+        dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
 
         // -- Arrange them as a circularly linked list
         dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
@@ -324,7 +345,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    data. We need to make a copy because pixels is a local
         //    variable in the calling function, and this data structure
         //    needs to outlive this call to showPixels.]
-        copyPixelData(pixels);
+        (*mPixels) = pixels;
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
@@ -335,9 +356,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- The last call to showPixels is the one responsible for doing
         //    all of the actual work
         if (gNumStarted == gNumControllers) {
-            gCurPixel = 0;
             gCurBuffer = 0;
-            gPixelsSent = 0;
+            gDoneFilling = false;
+            gDoneSending = false;
 
             // -- Prefill both buffers
             fillBuffer();
@@ -363,6 +384,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    Make a safe copy of the pixel data, so that the FastLED show
     //    function can continue to the next controller while the RMT
     //    device starts sending this data asynchronously.
+    /*
     virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
     {
         // -- Make sure we have a buffer of the right size
@@ -383,14 +405,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    storing the resulting raw pixel data in the buffer.
         int cur = 0;
         while (pixels.has(1)) {
-            mPixelData[0][cur] = pixels.loadAndScale0();
-            mPixelData[1][cur] = pixels.loadAndScale1();
-            mPixelData[2][cur] = pixels.loadAndScale2();
-            pixels.advanceData();
-            pixels.stepDithering();
             cur++;
         }
     }
+    */
 
     // -- Custom interrupt handler
     static IRAM_ATTR void interruptHandler(void *arg)
@@ -398,11 +416,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (i2s->int_st.out_eof) {
             i2s->int_clr.val = i2s->int_raw.val;
 
-            gPixelsSent++;
-            if (gCurPixel < gMaxPixels) {
+            if ( ! gDoneFilling) {
                 fillBuffer();
             } else {
-                if (gPixelsSent == gMaxPixels) {
+                if ( ! gDoneSending) {
+                    gDoneSending = true;
+                } else {
                     portBASE_TYPE HPTaskAwoken = 0;
                     xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
                     if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
@@ -413,9 +432,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     static void fillBuffer()
     {
-        int pixel_num = gCurPixel;
-        gCurPixel++;
-
         volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
         gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
         // Serial.print("Fill "); Serial.print((uint32_t)buf); Serial.println();
@@ -425,37 +441,52 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         // -- Get the requested pixel from each controller. Store the
         //    data for each color channel in a separate array.
+        uint32_t has_data_mask = 0;
         for (int i = 0; i < gNumControllers; i++) {
-            for (int j = 0; j < NUM_COLOR_CHANNELS; j++) {
-                ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
-                pixels[j][23-i] = pController->mPixelData[j][pixel_num];
+            int bit_index = 23-i;
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+            if (pController->mPixels->has(1)) {
+                pixels[0][bit_index] = pController->mPixels->loadAndScale0();
+                pixels[1][bit_index] = pController->mPixels->loadAndScale1();
+                pixels[2][bit_index] = pController->mPixels->loadAndScale2();
+                pController->mPixels->advanceData();
+                pController->mPixels->stepDithering();
+
+                // -- Record that this controller still has data to send
+                has_data_mask |= (1 << bit_index);
+                /*
+                if (i == 0) {
+                    Serial.print("Pixel: "); 
+                    Serial.print(pixels[0][bit_index]); Serial.print(" ");
+                    Serial.print(pixels[1][bit_index]); Serial.print(" ");
+                    Serial.print(pixels[2][bit_index]);
+                    Serial.println();
+                }
+                */
             }
         }
 
+        if (has_data_mask == 0) {
+            gDoneFilling = true;
+            return;
+        }
+
         // -- Transpose and encode the pixel data for the DMA buffer
         uint8_t bits[NUM_COLOR_CHANNELS][8][4];
 
+        int buf_index = 0;
+
         for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
+
             // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
             // transpose24x1_noinline(pixels[channel], bits[channel]);
             transpose32(pixels[channel], & (bits[channel][0][0]) );
 
-            // -- Create the bit pattern in the actual DMA buffer. Each
-            //    bit in the data turns into 4 bits in the output. Those
-            //    four bits encode the timing of the signal to the LED
-            //    strip. The I2S device is set up so that each pulse is
-            //    312.5ns. Therefore, we can form the zero and one bit
-            //    timing for the WS2812 with the following bit patterns:
-            //
-            //    Zero bit: T0H is around 300-400ns, so we send 1000 (high for 312.5, low for the rest)
-            //    One bit:  T1H is around 700-900ns, so we send 1110 (high for 937.5)
-        
-            // Serial.print("Channel: "); Serial.println(channel);
+            //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
             for (int bitnum = 0; bitnum < 8; bitnum++) {
                 uint8_t * row = (uint8_t *) & (bits[channel][bitnum][0]);
                 uint32_t bit =  (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
-                // bit = bit >> 23;
-                // bit = bit << 1;
+
                 /*
                 Serial.print(bitnum); Serial.print(": ");
                 uint32_t bt = bit;
@@ -466,14 +497,24 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 }
                 Serial.println();
                 */
+
+                for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
+                    buf[buf_index++] = /*has_data_mask &*/ (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]);
+                    //if (buf[buf_index-1] & 0x100) Serial.print("1");
+                    //else Serial.print("0");
+                }
+                //Serial.print(" ");
                 // -- Now form the four-bit pattern: we can do this by
                 //    duplicating the bit we computed, and adding a 1
                 //    at the front and a zero at the back: 1bb0
+                /*
                 buf[channel*32 + bitnum*4]   = 0xFFFFFFFF;
                 buf[channel*32 + bitnum*4+1] = bit;
                 buf[channel*32 + bitnum*4+2] = bit;
                 buf[channel*32 + bitnum*4+3] = 0x00000000;
+                */
             }
+            //Serial.println();
         }
     }
 

From 22881b310021e1e7674305b5cfaf539f5772ada5 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 27 Apr 2019 22:08:08 -0400
Subject: [PATCH 044/204] Trying to get the timing better.

---
 platforms/esp/32/clockless_esp32.h | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 31bff30eb4..a3688c9365 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -73,8 +73,8 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 
 // -- I2S bit encoding
 //    For now, this stuff is hard-coded
-#define FASTLED_I2S_CLOCK_DIVIDER     10  // 80MHz --> 8MHz
-#define FASTLED_I2S_NS_PER_PULSE     125  // == 125ns per cycle
+#define FASTLED_I2S_CLOCK_DIVIDER   25     //   10  // 80MHz --> 8MHz
+#define FASTLED_I2S_NS_PER_PULSE   312.5   //  125  // == 125ns per cycle
 
 // -- Array of all controllers
 static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
@@ -176,12 +176,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         uint32_t T2ns = ESPCLKS_TO_NS(T2);
         uint32_t T3ns = ESPCLKS_TO_NS(T3);
 
+        Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
+        Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
+        Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
+
         gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
 
         Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
 
-        int ones_for_one  = (T1ns + T2ns)/FASTLED_I2S_NS_PER_PULSE;
-        Serial.print("One bit:  "); Serial.print(ones_for_one); Serial.println(" 1 bits");
+        int ones_for_one  = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        Serial.print("One bit:  target "); 
+        Serial.print(T1ns+T2ns); Serial.print("ns --- "); 
+        Serial.print(ones_for_one); Serial.print(" 1 bits");
+        Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
         int i = 0;
         while ( i < ones_for_one ) {
             gOneBit[i] = 0xFFFFFF00;
@@ -192,8 +199,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             i++;
         }
 
-        int ones_for_zero = (T1ns)/FASTLED_I2S_NS_PER_PULSE;
-        Serial.print("Zero bit: "); Serial.print(ones_for_zero); Serial.println(" 1 bits");
+        int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        Serial.print("Zero bit:  target "); 
+        Serial.print(T1ns); Serial.print("ns --- "); 
+        Serial.print(ones_for_zero); Serial.print(" 1 bits");
+        Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
         i = 0;
         while ( i < ones_for_zero ) {
             gZeroBit[i] = 0xFFFFFF00;
@@ -436,8 +446,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
         // Serial.print("Fill "); Serial.print((uint32_t)buf); Serial.println();
 
-        uint8_t pixels[NUM_COLOR_CHANNELS][32];
-        memset(pixels, 0, NUM_COLOR_CHANNELS * 32);
+        static uint8_t pixels[NUM_COLOR_CHANNELS][32];
+        // memset(pixels, 0, NUM_COLOR_CHANNELS * 32);
 
         // -- Get the requested pixel from each controller. Store the
         //    data for each color channel in a separate array.
@@ -472,7 +482,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
 
         // -- Transpose and encode the pixel data for the DMA buffer
-        uint8_t bits[NUM_COLOR_CHANNELS][8][4];
+        static uint8_t bits[NUM_COLOR_CHANNELS][8][4];
 
         int buf_index = 0;
 

From e1f5e23426e789a373e3a3d816c10ec1a2aa8142 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 27 Apr 2019 23:05:51 -0400
Subject: [PATCH 045/204] This version seems pretty solid

---
 platforms/esp/32/clockless_esp32.h | 246 +++++------------------------
 1 file changed, 41 insertions(+), 205 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index a3688c9365..5e98ed4be7 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -120,7 +120,11 @@ static uint32_t gZeroBit[10]  = {0,0,0,0,0,0,0,0,0,0};
 // -- Counters to track progress
 static int gCurBuffer = 0;
 static bool gDoneFilling = false;
-static bool gDoneSending = false;
+
+// -- Temp buffers for pixels and bits being formatted for DMA
+static uint8_t gPixelRow[NUM_COLOR_CHANNELS][32];
+static uint8_t gPixelBits[NUM_COLOR_CHANNELS][8][4];
+
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
@@ -213,6 +217,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             gZeroBit[i] = 0x00000000;
             i++;
         }
+
+        memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
+        memset(gPixelBits, 0, NUM_COLOR_CHANNELS * 32);
     }
 
     static DMABuffer * allocateDMABuffer(int bytes)
@@ -258,23 +265,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
         }
 
-        // -- Reset i2s
-        i2s->conf.tx_reset = 1;
-        i2s->conf.tx_reset = 0;
-        i2s->conf.rx_reset = 1;
-        i2s->conf.rx_reset = 0;
-
-        // -- Reset DMA
-        i2s->lc_conf.in_rst = 1;
-        i2s->lc_conf.in_rst = 0;
-        i2s->lc_conf.out_rst = 1;
-        i2s->lc_conf.out_rst = 0;
-
-        // -- Reset FIFO (Do we need this?)
-        i2s->conf.rx_fifo_reset = 1;
-        i2s->conf.rx_fifo_reset = 0;
-        i2s->conf.tx_fifo_reset = 1;
-        i2s->conf.tx_fifo_reset = 0;
+        // -- Reset everything
+        i2sReset();
+        i2sReset_DMA();
+        i2sReset_FIFO();
 
         // -- Main configuration 
         i2s->conf.tx_msb_right = 1;
@@ -327,10 +321,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
         dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
 
-        //allocate disabled i2s interrupt
+        // -- Allocate i2s interrupt
         SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
-        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3 | ESP_INTR_FLAG_IRAM,
-                       &interruptHandler, 0, &gI2S_intr_handle);
+        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
+                                     &interruptHandler, 0, &gI2S_intr_handle);
 
         // -- Create a semaphore to block execution until all the controllers are done
         if (gTX_sem == NULL) {
@@ -354,7 +348,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Initialize the local state, save a pointer to the pixel
         //    data. We need to make a copy because pixels is a local
         //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.]
+        //    needs to outlive this call to showPixels.
         (*mPixels) = pixels;
 
         // -- Keep track of the number of strips we've seen
@@ -368,7 +362,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (gNumStarted == gNumControllers) {
             gCurBuffer = 0;
             gDoneFilling = false;
-            gDoneSending = false;
 
             // -- Prefill both buffers
             fillBuffer();
@@ -383,43 +376,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreGive(gTX_sem);
 
             i2sStop();
-            // Serial.println("...done");
 
             // -- Reset the counters
             gNumStarted = 0;
         }
     }
 
-    // -- Copy pixel data
-    //    Make a safe copy of the pixel data, so that the FastLED show
-    //    function can continue to the next controller while the RMT
-    //    device starts sending this data asynchronously.
-    /*
-    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Make sure we have a buffer of the right size
-        //    (3 bytes per pixel)
-        int size_needed = pixels.size();
-        if (size_needed > mSize) {
-            mSize = size_needed;
-            for (int i = 0; i < NUM_COLOR_CHANNELS; i++) {
-                if (mPixelData[i] != NULL) free(mPixelData[i]);
-                mPixelData[i] = (uint8_t *) malloc( mSize);
-            }
-
-            if (gMaxPixels < mSize)
-                gMaxPixels = mSize;
-        }
-
-        // -- Cycle through the R,G, and B values in the right order,
-        //    storing the resulting raw pixel data in the buffer.
-        int cur = 0;
-        while (pixels.has(1)) {
-            cur++;
-        }
-    }
-    */
-
     // -- Custom interrupt handler
     static IRAM_ATTR void interruptHandler(void *arg)
     {
@@ -429,13 +391,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             if ( ! gDoneFilling) {
                 fillBuffer();
             } else {
-                if ( ! gDoneSending) {
-                    gDoneSending = true;
-                } else {
-                    portBASE_TYPE HPTaskAwoken = 0;
-                    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-                    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-                }
+                portBASE_TYPE HPTaskAwoken = 0;
+                xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
             }
         }
     }
@@ -444,35 +402,25 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
         gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
-        // Serial.print("Fill "); Serial.print((uint32_t)buf); Serial.println();
-
-        static uint8_t pixels[NUM_COLOR_CHANNELS][32];
-        // memset(pixels, 0, NUM_COLOR_CHANNELS * 32);
 
         // -- Get the requested pixel from each controller. Store the
         //    data for each color channel in a separate array.
         uint32_t has_data_mask = 0;
         for (int i = 0; i < gNumControllers; i++) {
+            // -- Store the pixels in reverse controller order starting at index 23
+            //    This causes the bits to come out in the right position after we
+            //    transpose them.
             int bit_index = 23-i;
             ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
             if (pController->mPixels->has(1)) {
-                pixels[0][bit_index] = pController->mPixels->loadAndScale0();
-                pixels[1][bit_index] = pController->mPixels->loadAndScale1();
-                pixels[2][bit_index] = pController->mPixels->loadAndScale2();
+                gPixelRow[0][bit_index] = pController->mPixels->loadAndScale0();
+                gPixelRow[1][bit_index] = pController->mPixels->loadAndScale1();
+                gPixelRow[2][bit_index] = pController->mPixels->loadAndScale2();
                 pController->mPixels->advanceData();
                 pController->mPixels->stepDithering();
 
                 // -- Record that this controller still has data to send
-                has_data_mask |= (1 << bit_index);
-                /*
-                if (i == 0) {
-                    Serial.print("Pixel: "); 
-                    Serial.print(pixels[0][bit_index]); Serial.print(" ");
-                    Serial.print(pixels[1][bit_index]); Serial.print(" ");
-                    Serial.print(pixels[2][bit_index]);
-                    Serial.println();
-                }
-                */
+                has_data_mask |= (1 << (i+8));
             }
         }
 
@@ -482,20 +430,16 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
 
         // -- Transpose and encode the pixel data for the DMA buffer
-        static uint8_t bits[NUM_COLOR_CHANNELS][8][4];
-
         int buf_index = 0;
-
         for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
 
             // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
-            // transpose24x1_noinline(pixels[channel], bits[channel]);
-            transpose32(pixels[channel], & (bits[channel][0][0]) );
+            transpose32(gPixelRow[channel], gPixelBits[channel][0] );
 
             //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
             for (int bitnum = 0; bitnum < 8; bitnum++) {
-                uint8_t * row = (uint8_t *) & (bits[channel][bitnum][0]);
-                uint32_t bit =  (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
+                uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
+                uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
 
                 /*
                 Serial.print(bitnum); Serial.print(": ");
@@ -509,22 +453,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 */
 
                 for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
-                    buf[buf_index++] = /*has_data_mask &*/ (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]);
+                    buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
                     //if (buf[buf_index-1] & 0x100) Serial.print("1");
                     //else Serial.print("0");
                 }
-                //Serial.print(" ");
-                // -- Now form the four-bit pattern: we can do this by
-                //    duplicating the bit we computed, and adding a 1
-                //    at the front and a zero at the back: 1bb0
-                /*
-                buf[channel*32 + bitnum*4]   = 0xFFFFFFFF;
-                buf[channel*32 + bitnum*4+1] = bit;
-                buf[channel*32 + bitnum*4+2] = bit;
-                buf[channel*32 + bitnum*4+3] = 0x00000000;
-                */
             }
-            //Serial.println();
         }
     }
 
@@ -534,34 +467,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         transpose8rS32(& pixels[8],  1, 4, & bits[1]);
         transpose8rS32(& pixels[16], 1, 4, & bits[2]);
         //transpose8rS32(& pixels[24], 1, 4, & bits[3]);
-        /*
-        Serial.println("Pixels:");
-        for (int m = 0; m < 24; m++) {
-            Serial.print(m); Serial.print(": ");
-            uint8_t bt = pixels[m];
-            for (int k = 0; k < 8; k++) {
-                if (bt & 0x80) Serial.print("1");
-                else Serial.print("0");
-                bt = bt << 1;
-            }
-            Serial.println();
-        }
-
-        Serial.println("Bits:");
-        for (int bitnum = 0; bitnum < 8; bitnum++) {
-            Serial.print(bitnum); Serial.print(": ");
-            for (int w = 0; w < 4; w++) {
-                uint8_t bt = bits[ bitnum*4 + w ];
-                for (int k = 0; k < 8; k++) {
-                    if (bt & 0x80) Serial.print("1");
-                    else Serial.print("0");
-                    bt = bt << 1;
-                }
-                Serial.print(" ");
-            }
-            Serial.println();
-        }
-        */
     }
 
     static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B) 
@@ -587,74 +492,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y;
     }
 
-    /** Transpose 24 * 8 bits --> 8 * 24 bits
-     *
-     *  Important notes: the result is actually 8 * 32 bits, where
-     *  each set of bits only occupy the low 24 bits. As with other
-     *  transpose functions, the sets of bits are also in reverse
-     *  order from what we want -- that is, the least significant bit
-     *  (the bit we want to send first) is actually the last set
-     *  (index 7).
-     *
-     **/
-    static void transpose24x1_noinline(unsigned char *A, uint32_t *B) 
-    {
-        uint32_t  x, y, x1,y1,t,x2,y2;
-        
-        y = *(unsigned int*)(A);
-        x = *(unsigned int*)(A+4);
-        y1 = *(unsigned int*)(A+8);
-        x1 = *(unsigned int*)(A+12);
-        
-        y2 = *(unsigned int*)(A+16);
-        x2 = *(unsigned int*)(A+20);
-        
-        
-        // pre-transform x
-        t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
-        t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
-        
-        t = (x1 ^ (x1 >> 7)) & 0x00AA00AA;  x1 = x1 ^ t ^ (t << 7);
-        t = (x1 ^ (x1 >>14)) & 0x0000CCCC;  x1 = x1 ^ t ^ (t <<14);
-        
-        t = (x2 ^ (x2 >> 7)) & 0x00AA00AA;  x2 = x2 ^ t ^ (t << 7);
-        t = (x2 ^ (x2 >>14)) & 0x0000CCCC;  x2 = x2 ^ t ^ (t <<14);
-        
-        // pre-transform y
-        t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
-        t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
-        
-        t = (y1 ^ (y1 >> 7)) & 0x00AA00AA;  y1 = y1 ^ t ^ (t << 7);
-        t = (y1 ^ (y1 >>14)) & 0x0000CCCC;  y1 = y1 ^ t ^ (t <<14);
-        
-        t = (y2 ^ (y2 >> 7)) & 0x00AA00AA;  y2 = y2 ^ t ^ (t << 7);
-        t = (y2 ^ (y2 >>14)) & 0x0000CCCC;  y2 = y2 ^ t ^ (t <<14);
-        
-        // final transform
-        t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
-        y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
-        x = t;
-        
-        t = (x1 & 0xF0F0F0F0) | ((y1 >> 4) & 0x0F0F0F0F);
-        y1 = ((x1 << 4) & 0xF0F0F0F0) | (y1 & 0x0F0F0F0F);
-        x1 = t;
-        
-        t = (x2 & 0xF0F0F0F0) | ((y2 >> 4) & 0x0F0F0F0F);
-        y2 = ((x2 << 4) & 0xF0F0F0F0) | (y2 & 0x0F0F0F0F);
-        x2 = t;
-        
-        *((uint32_t*)B)     = (uint32_t)(  (y &       0xff)       | ((y1 &       0xff) <<8)  | ((y2 &       0xff) <<16) );
-        *((uint32_t*)(B+1)) = (uint32_t)( ((y &     0xff00) >>8)  |  (y1 &     0xff00)       | ((y2 &     0xff00) <<8)  );
-        *((uint32_t*)(B+2)) = (uint32_t)( ((y &   0xff0000) >>16) | ((y1 &   0xff0000) >>8)  |  (y2 &   0xff0000)       );
-        *((uint32_t*)(B+3)) = (uint32_t)( ((y & 0xff000000) >>24) | ((y1 & 0xff000000) >>16) | ((y2 & 0xff000000) >> 8) );
-        
-        *((uint32_t*)(B+4)) = (uint32_t)(  (x &       0xff)       | ((x1 &       0xff) <<8)  | ((x2 &       0xff) <<16) );
-        *((uint32_t*)(B+5)) = (uint32_t)( ((x &     0xff00) >>8)  |  (x1 &     0xff00)       | ((x2 &     0xff00) <<8)  );
-        *((uint32_t*)(B+6)) = (uint32_t)( ((x &   0xff0000) >>16) | ((x1 &   0xff0000) >>8)  |  (x2 &   0xff0000)       );
-        *((uint32_t*)(B+7)) = (uint32_t)( ((x & 0xff000000) >>24) | ((x1 & 0xff000000) >>16) | ((x2 & 0xff000000) >> 8) );
-    }
-
-
     /** Start I2S transmission
      */
     static void i2sStart()
@@ -691,19 +528,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
         i2s->conf.val |= conf_reset_flags;
         i2s->conf.val &= ~conf_reset_flags;
-        //while (i2s->state.rx_fifo_reset_back)
-        //    ;
-        /*
-        static void dma_reset(i2s_dev_t *dev) {
-            dev->lc_conf.in_rst=1; dev->lc_conf.in_rst=0;
-            dev->lc_conf.out_rst=1; dev->lc_conf.out_rst=0;
-        }
+    }
 
-        static void fifo_reset(i2s_dev_t *dev) {
-            dev->conf.rx_fifo_reset=1; dev->conf.rx_fifo_reset=0;
-            dev->conf.tx_fifo_reset=1; dev->conf.tx_fifo_reset=0;
-        }
-        */
+    static void i2sReset_DMA()
+    {
+        i2s->lc_conf.in_rst=1; i2s->lc_conf.in_rst=0;
+        i2s->lc_conf.out_rst=1; i2s->lc_conf.out_rst=0;
+    }
+
+    static void i2sReset_FIFO()
+    {
+        i2s->conf.rx_fifo_reset=1; i2s->conf.rx_fifo_reset=0;
+        i2s->conf.tx_fifo_reset=1; i2s->conf.tx_fifo_reset=0;
     }
 
     static void i2sStop()

From dd97e80d57cf9832f97d8bb7e165f8a80f5f9bd6 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 28 Apr 2019 13:48:55 -0400
Subject: [PATCH 046/204] Yves' very cool changes to improve performance and
 accuracy

---
 platforms/esp/32/clockless_esp32.h | 367 ++++++++++++++++++++---------
 1 file changed, 258 insertions(+), 109 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 5e98ed4be7..55e7c8b390 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -29,7 +29,7 @@ FASTLED_NAMESPACE_BEGIN
 #ifdef __cplusplus
 extern "C" {
 #endif
-
+    
 #include "esp_heap_caps.h"
 #include "soc/soc.h"
 #include "soc/gpio_sig_map.h"
@@ -41,15 +41,15 @@ extern "C" {
 #include "rom/lldesc.h"
 #include "esp_intr.h"
 #include "esp_log.h"
-
+    
 #ifdef __cplusplus
 }
 #endif
 
 __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
+    uint32_t cyc;
+    __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+    return cyc;
 }
 
 #define FASTLED_HAS_CLOCKLESS 1
@@ -66,8 +66,9 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #endif
 
 // -- I2S clock
-#define I2S_BASE_CLK (800000000L)
-
+#define I2S_BASE_CLK (80000000L)
+#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
+#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
 // -- Convert ESP32 cycles back into nanoseconds
 #define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
 
@@ -114,17 +115,20 @@ static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
 //    are global variables.
 
 static int      gPulsesPerBit = 0;
-static uint32_t gOneBit[10] = {0,0,0,0,0,0,0,0,0,0};
-static uint32_t gZeroBit[10]  = {0,0,0,0,0,0,0,0,0,0};
+static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static uint32_t gZeroBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
 // -- Counters to track progress
 static int gCurBuffer = 0;
 static bool gDoneFilling = false;
-
+static int ones_for_one;
+static int ones_for_zero;
 // -- Temp buffers for pixels and bits being formatted for DMA
 static uint8_t gPixelRow[NUM_COLOR_CHANNELS][32];
 static uint8_t gPixelBits[NUM_COLOR_CHANNELS][8][4];
-
+static int CLOCK_DIVIDER_N;
+static int CLOCK_DIVIDER_A;
+static int CLOCK_DIVIDER_B;
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
@@ -132,15 +136,16 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- The index of this controller in the global gControllers array
     int            m_index;
 
+    
     // -- Store the GPIO pin
     gpio_num_t     mPin;
-
+    
     // -- This instantiation forces a check on the pin choice
     FastPin<DATA_PIN> mFastPin;
-
+    
     // -- Save the pixel controller
     PixelController<RGB_ORDER> * mPixels;
-
+    
 public:
 
     void init()
@@ -150,11 +155,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Allocate space to save the pixel controller
         //    during parallel output
         mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
-
+        
         gControllers[gNumControllers] = this;
         m_index = gNumControllers;
         gNumControllers++;
-
+        
         // -- Set up the pin We have to do two things: configure the
         //    actual GPIO pin, and route the output from the default
         //    pin (determined by the I2S device) to the pin we
@@ -162,37 +167,160 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    controller in the array. This order is crucial because
         //    the bits must go into the DMA buffer in the same order.
         mPin = gpio_num_t(DATA_PIN);
-
+        
         PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
         gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
         pinMode(mPin,OUTPUT);
         gpio_matrix_out(mPin, i2s_base_pin_index + m_index, false, false);
     }
-
+    
     virtual uint16_t getMaxRefreshRate() const { return 400; }
-
+    
 protected:
-
+   
+   static int pgcd(int smallest,int precision,int a,int b,int c)
+    {
+        int pgc_=1;
+        for( int i=smallest;i>0;i--)
+        {
+            
+            if( a%i<=precision && b%i<=precision && c%i<=precision)
+            {
+                pgc_=i;
+                break;
+            }
+        }
+        return pgc_;
+    }
+    
+    
+    
     static void initBitPatterns()
     {
+        
+        
+        
+
         // Precompute the bit patterns based on the I2S sample rate
         uint32_t T1ns = ESPCLKS_TO_NS(T1);
         uint32_t T2ns = ESPCLKS_TO_NS(T2);
         uint32_t T3ns = ESPCLKS_TO_NS(T3);
-
+        
         Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
         Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
         Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
+        
+        /*
+         We calculate the best pcgd to the timing
+         ie
+         WS2811 77 77 154 => 1  1 2 => nb pulses= 4
+         WS2812 60 150 90 => 2 5 3 => nb pulses=10
+         */
+        int smallest=0;
+        if (T1>T2)
+            smallest=T2;
+        else
+            smallest=T1;
+        if(smallest>T3)
+            smallest=T3;
+        double freq=(double)1/(double)(T1ns + T2ns + T3ns);
+        Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
+       // Serial.printf("smallest %d\n",smallest);
+        int pgc_=1;
+        int precision=0;
+        pgc_=pgcd(smallest,precision,T1,T2,T3);
+//Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
+        while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
+        {
+            precision++;
+            pgc_=pgcd(smallest,precision,T1,T2,T3);
+            //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
+        }
+        pgc_=pgcd(smallest,precision,T1,T2,T3);
+        Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
+        Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
+        gPulsesPerBit=(int)T1/pgc_ +(int)T2/pgc_ +(int)T3/pgc_;
+        /*
+         we calculate the duration of one pulse nd htre base frequency of the led
+         ie WS2812B F=1/(250+625+375)=800kHz or 1250ns
+         as we need 10 pulses each pulse is 125ns => frequency 800Khz*10=8MHz
+         WS2811 T=320+320+641=1281ns qnd we need 4 pulses => pulse duration 320.25ns =>frequency 3.1225605Mhz
+         
+         */
+        
 
-        gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
-
-        Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
-
-        int ones_for_one  = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
-        Serial.print("One bit:  target "); 
-        Serial.print(T1ns+T2ns); Serial.print("ns --- "); 
-        Serial.print(ones_for_one); Serial.print(" 1 bits");
-        Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        freq=1000000000L*freq*gPulsesPerBit;
+        Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
+        
+        /*
+         we do calculate the needed N a and b
+         as f=basefred/(N+b/a);
+         as a is max 63 the precision for the decimal is 1/63
+         
+         */
+        
+         CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
+        double v=I2S_BASE_CLK/freq-CLOCK_DIVIDER_N;
+   
+         
+        
+        double prec=(double)1/63;
+        int a=1;
+       int b=0;
+        CLOCK_DIVIDER_A=1;
+        CLOCK_DIVIDER_B=0;
+        for(a=1;a<64;a++)
+        {
+            for(b=0;b<a;b++)
+            {
+                //printf("%d %d %f %f %f\n",b,a,v,(double)v*(double)a,fabsf(v-(double)b/a));
+                if(fabsf(v-(double)b/a) <= prec/2)
+                    break;
+            }
+            if(fabsf(v-(double)b/a) ==0)
+            {
+                CLOCK_DIVIDER_A=a;
+                CLOCK_DIVIDER_B=b;
+                break;
+            }
+            if(fabsf(v-(double)b/a) < prec/2)
+            {
+                if (fabsf(v-(double)b/a) <fabsf(v-(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A))
+                {
+                    CLOCK_DIVIDER_A=a;
+                    CLOCK_DIVIDER_B=b;
+                }
+                
+            }
+        }
+        //top take care of an issue with double 0.9999999999
+        if(CLOCK_DIVIDER_A==CLOCK_DIVIDER_B)
+        {
+            CLOCK_DIVIDER_A=1;
+            CLOCK_DIVIDER_B=0;
+            CLOCK_DIVIDER_N++;
+        }
+        
+        //printf("%d %d %f %f %d\n",CLOCK_DIVIDER_B,CLOCK_DIVIDER_A,(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A,v,CLOCK_DIVIDER_N);
+        //Serial.printf("freq %f %f\n",freq,I2S_BASE_CLK/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A));
+        freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
+        freq=freq*I2S_BASE_CLK;
+        Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
+        double pulseduration=1000000000/freq;
+        Serial.printf("Pulse duration: %f ns\n",pulseduration);
+       // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
+        
+        //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
+        
+        //int ones_for_one  = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        ones_for_one  = T1/pgc_ +T2/pgc_;
+        //Serial.print("One bit:  target ");
+        //Serial.print(T1ns+T2ns); Serial.print("ns --- ");
+        //Serial.print(ones_for_one); Serial.print(" 1 bits");
+        //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
+        
+        
         int i = 0;
         while ( i < ones_for_one ) {
             gOneBit[i] = 0xFFFFFF00;
@@ -202,12 +330,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             gOneBit[i] = 0x00000000;
             i++;
         }
-
-        int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
-        Serial.print("Zero bit:  target "); 
-        Serial.print(T1ns); Serial.print("ns --- "); 
-        Serial.print(ones_for_zero); Serial.print(" 1 bits");
-        Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        
+        //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        ones_for_zero =T1/pgc_  ;
+       // Serial.print("Zero bit:  target ");
+       // Serial.print(T1ns); Serial.print("ns --- ");
+        //Serial.print(ones_for_zero); Serial.print(" 1 bits");
+        //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
         i = 0;
         while ( i < ones_for_zero ) {
             gZeroBit[i] = 0xFFFFFF00;
@@ -217,18 +347,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             gZeroBit[i] = 0x00000000;
             i++;
         }
-
+        
         memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
         memset(gPixelBits, 0, NUM_COLOR_CHANNELS * 32);
     }
-
+    
     static DMABuffer * allocateDMABuffer(int bytes)
     {
         DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
-
+        
         b->buffer = (uint8_t *)heap_caps_malloc(bytes, MALLOC_CAP_DMA);
         memset(b->buffer, 0, bytes);
-
+        
         b->descriptor.length = bytes;
         b->descriptor.size = bytes;
         b->descriptor.owner = 1;
@@ -238,18 +368,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         b->descriptor.empty = 0;
         b->descriptor.eof = 1;
         b->descriptor.qe.stqe_next = 0;
-
+        
         return b;
     }
-
+    
     static void i2sInit()
     {
         // -- Only need to do this once
         if (gInitialized) return;
-
+        
         // -- Construct the bit patterns for ones and zeros
         initBitPatterns();
-
+        
         // -- Choose whether to use I2S device 0 or device 1
         //    Set up the various device-specific parameters
         int interruptSource;
@@ -264,78 +394,92 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             interruptSource = ETS_I2S1_INTR_SOURCE;
             i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
         }
-
+        
         // -- Reset everything
         i2sReset();
         i2sReset_DMA();
         i2sReset_FIFO();
-
-        // -- Main configuration 
+        
+        // -- Main configuration
         i2s->conf.tx_msb_right = 1;
         i2s->conf.tx_mono = 0;
         i2s->conf.tx_short_sync = 0;
         i2s->conf.tx_msb_shift = 0;
         i2s->conf.tx_right_first = 1; // 0;//1;
         i2s->conf.tx_slave_mod = 0;
-
+        
         // -- Set parallel mode
         i2s->conf2.val = 0;
         i2s->conf2.lcd_en = 1;
         i2s->conf2.lcd_tx_wrx2_en = 0; // 0 for 16 or 32 parallel output
         i2s->conf2.lcd_tx_sdx2_en = 0; // HN
-
+        
         // -- Set up the clock rate and sampling
         i2s->sample_rate_conf.val = 0;
         i2s->sample_rate_conf.tx_bits_mod = 32; // Number of parallel bits/pins
         i2s->sample_rate_conf.tx_bck_div_num = 1;
         i2s->clkm_conf.val = 0;
         i2s->clkm_conf.clka_en = 0;
-
+        
         // -- Data clock is computed as Base/(div_num + (div_b/div_a))
         //    Base is 80Mhz, so 80/(10 + 0/1) = 8Mhz
         //    One cycle is 125ns
-        i2s->clkm_conf.clkm_div_a = 1;
-        i2s->clkm_conf.clkm_div_b = 0;
-        i2s->clkm_conf.clkm_div_num = FASTLED_I2S_CLOCK_DIVIDER;
-    
+        i2s->clkm_conf.clkm_div_a = CLOCK_DIVIDER_A;
+        i2s->clkm_conf.clkm_div_b = CLOCK_DIVIDER_B;
+        i2s->clkm_conf.clkm_div_num = CLOCK_DIVIDER_N;
+        
         i2s->fifo_conf.val = 0;
         i2s->fifo_conf.tx_fifo_mod_force_en = 1;
         i2s->fifo_conf.tx_fifo_mod = 3;  // 32-bit single channel data
         i2s->fifo_conf.tx_data_num = 32; // fifo length
         i2s->fifo_conf.dscr_en = 1;      // fifo will use dma
-
+        
         i2s->conf1.val = 0;
         i2s->conf1.tx_stop_en = 0;
         i2s->conf1.tx_pcm_bypass = 1;
-
+        
         i2s->conf_chan.val = 0;
         i2s->conf_chan.tx_chan_mod = 1; // Mono mode, with tx_msb_right = 1, everything goes to right-channel
-
+        
         i2s->timing.val = 0;
-
+        
         // -- Allocate two DMA buffers
         dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
         dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
-
+        
         // -- Arrange them as a circularly linked list
         dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
         dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
-
+       
         // -- Allocate i2s interrupt
         SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
         esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
                                      &interruptHandler, 0, &gI2S_intr_handle);
-
+        
         // -- Create a semaphore to block execution until all the controllers are done
         if (gTX_sem == NULL) {
             gTX_sem = xSemaphoreCreateBinary();
             xSemaphoreGive(gTX_sem);
         }
-                
+        
         // Serial.println("Init I2S");
         gInitialized = true;
     }
-
+    
+    
+    static void empty( uint32_t *buf)
+    {
+        for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
+        {
+            int offset=gPulsesPerBit*i;
+            for(int j=0;j<ones_for_zero;j++)
+                buf[offset+j]=0xffffffff;
+            
+            for(int j=ones_for_one;j<gPulsesPerBit;j++)
+                buf[offset+j]=0;
+        }
+    }
+    
     // -- Show pixels
     //    This is the main entry point for the controller.
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
@@ -344,50 +488,52 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // -- First controller: make sure everything is set up
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
         }
-
+        
         // -- Initialize the local state, save a pointer to the pixel
         //    data. We need to make a copy because pixels is a local
         //    variable in the calling function, and this data structure
         //    needs to outlive this call to showPixels.
         (*mPixels) = pixels;
-
+        
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
 
         // Serial.print("Show pixels ");
         // Serial.println(gNumStarted);
-
+        
         // -- The last call to showPixels is the one responsible for doing
         //    all of the actual work
         if (gNumStarted == gNumControllers) {
+            empty((uint32_t*)dmaBuffers[0]->buffer);
+            empty((uint32_t*)dmaBuffers[1]->buffer);
             gCurBuffer = 0;
             gDoneFilling = false;
-
+            
             // -- Prefill both buffers
             fillBuffer();
             fillBuffer();
-
+            
             i2sStart();
-
+            
             // -- Wait here while the rest of the data is sent. The interrupt handler
             //    will keep refilling the RMT buffers until it is all sent; then it
             //    gives the semaphore back.
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
             xSemaphoreGive(gTX_sem);
-
+            
             i2sStop();
-
+            
             // -- Reset the counters
             gNumStarted = 0;
         }
     }
-
+    
     // -- Custom interrupt handler
     static IRAM_ATTR void interruptHandler(void *arg)
     {
         if (i2s->int_st.out_eof) {
             i2s->int_clr.val = i2s->int_raw.val;
-
+            
             if ( ! gDoneFilling) {
                 fillBuffer();
             } else {
@@ -397,12 +543,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             }
         }
     }
-
+    
     static void fillBuffer()
     {
         volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
         gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
-
+        
         // -- Get the requested pixel from each controller. Store the
         //    data for each color channel in a separate array.
         uint32_t has_data_mask = 0;
@@ -418,49 +564,52 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 gPixelRow[2][bit_index] = pController->mPixels->loadAndScale2();
                 pController->mPixels->advanceData();
                 pController->mPixels->stepDithering();
-
+                
                 // -- Record that this controller still has data to send
                 has_data_mask |= (1 << (i+8));
             }
         }
-
+        
         if (has_data_mask == 0) {
             gDoneFilling = true;
             return;
         }
-
+        
         // -- Transpose and encode the pixel data for the DMA buffer
         int buf_index = 0;
         for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
-
+            
             // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
             transpose32(gPixelRow[channel], gPixelBits[channel][0] );
-
+            
             //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
             for (int bitnum = 0; bitnum < 8; bitnum++) {
                 uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
                 uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
-
+                
                 /*
-                Serial.print(bitnum); Serial.print(": ");
-                uint32_t bt = bit;
-                for (int k = 0; k < 32; k++) {
-                    if (bt & 0x80000000) Serial.print("1");
-                    else Serial.print("0");
-                    bt = bt << 1;
-                }
-                Serial.println();
-                */
-
-                for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
-                    buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
+                 Serial.print(bitnum); Serial.print(": ");
+                 uint32_t bt = bit;
+                 for (int k = 0; k < 32; k++) {
+                 if (bt & 0x80000000) Serial.print("1");
+                 else Serial.print("0");
+                 bt = bt << 1;
+                 }
+                 Serial.println();
+                 */
+                
+               /* for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
+                    buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );*/
+                //when the loop is too big  => issues in timing hence i only fill the the 1
+                for(int pulse_num=ones_for_zero;pulse_num<ones_for_one;pulse_num++) {
+                    buf[bitnum*gPulsesPerBit+channel*8*gPulsesPerBit+pulse_num] = has_data_mask & bit;
                     //if (buf[buf_index-1] & 0x100) Serial.print("1");
                     //else Serial.print("0");
                 }
             }
         }
     }
-
+    
     static void transpose32(uint8_t * pixels, uint8_t * bits)
     {
         transpose8rS32(& pixels[0],  1, 4, & bits[0]);
@@ -468,30 +617,30 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         transpose8rS32(& pixels[16], 1, 4, & bits[2]);
         //transpose8rS32(& pixels[24], 1, 4, & bits[3]);
     }
-
-    static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B) 
+    
+    static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B)
     {
         uint32_t x, y, t;
-
+        
         // Load the array and pack it into x and y.
-
+        
         x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
         y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
-
+        
         t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
         t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
-
+        
         t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
         t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
-
+        
         t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
         y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
         x = t;
-
+        
         B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x;
         B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y;
     }
-
+    
     /** Start I2S transmission
      */
     static void i2sStart()
@@ -513,35 +662,35 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // //vTaskDelay(5);
         i2s->int_ena.val = 0;
         i2s->int_ena.out_eof = 1;
-
+        
         //start transmission
         i2s->conf.tx_start = 1;
     }
-
+    
     static void i2sReset()
     {
         // Serial.println("I2S reset");
         const unsigned long lc_conf_reset_flags = I2S_IN_RST_M | I2S_OUT_RST_M | I2S_AHBM_RST_M | I2S_AHBM_FIFO_RST_M;
         i2s->lc_conf.val |= lc_conf_reset_flags;
         i2s->lc_conf.val &= ~lc_conf_reset_flags;
-
+        
         const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
         i2s->conf.val |= conf_reset_flags;
         i2s->conf.val &= ~conf_reset_flags;
     }
-
+    
     static void i2sReset_DMA()
     {
         i2s->lc_conf.in_rst=1; i2s->lc_conf.in_rst=0;
         i2s->lc_conf.out_rst=1; i2s->lc_conf.out_rst=0;
     }
-
+    
     static void i2sReset_FIFO()
     {
         i2s->conf.rx_fifo_reset=1; i2s->conf.rx_fifo_reset=0;
         i2s->conf.tx_fifo_reset=1; i2s->conf.tx_fifo_reset=0;
     }
-
+    
     static void i2sStop()
     {
         // Serial.println("I2S stop");

From 6197141e0aa6d66ae56e71096c46795d51d35125 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 29 Apr 2019 22:12:03 -0400
Subject: [PATCH 047/204] First attempt at merging the two drivers

---
 platforms/esp/32/clockless_esp32.h | 640 +++++++++++++++++++++++++----
 1 file changed, 550 insertions(+), 90 deletions(-)

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
index 55e7c8b390..91e8362571 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_esp32.h
@@ -1,7 +1,97 @@
 /*
+ * ESP32 driver for clockless LED chips
  *
+ * There are two different drivers available for the ESP32: RMT and I2S.
  *
+ * The RMT driver is the default. It uses the remote control
+ * peripheral (RMT) to drive up to 8 strips in parallel, queueing any
+ * additional strips, which are output as RMT channels become
+ * available. The reason it is the default is that it can handle any
+ * mix of strips with different chips, different timing, different
+ * lengths, etc.
+ *
+ * The I2S implementation can drive up to 24 strips in parallel, but
+ * with the following limitation: all the strips must have the same
+ * timing (i.e., they must all use the same chip).
+ *
+ * To enable the I2S driver, add the following line *before* including
+ * FastLED.h (no other changes are necessary):
+ *
+ * #define FASTLED_ESP32_I2S true
+ *
+ * === DETAILS ===
+ *
+ * RMT Integration into FastLED ClocklessController
+ * Copyright (c) 2018 Samuel Z. Guyer
+ * Copyright (c) 2017 Thomas Basler
+ * Copyright (c) 2017 Martin F. Falatic
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently to send sequences
+ * of high/low bits. Memory for each channel is limited, however, so
+ * in order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * Since the RMT device only has 8 channels, we need a strategy to
+ * allow more than 8 LED controllers. Our driver assigns controllers
+ * to channels on the fly, queuing up controllers as necessary until a
+ * channel is free. The main showPixels routine just fires off the
+ * first 8 controllers; the interrupt handler starts new controllers
+ * asynchronously as previous ones finish. So, for example, it can
+ * send the data for 8 controllers simultaneously, but 16 controllers
+ * would take approximately twice as much time.
+ *
+ * There is a #define that allows a program to control the total
+ * number of channels that the driver is allowed to use. It defaults
+ * to 8 -- use all the channels. Setting it to 1, for example, results
+ * in fully serial output:
+ *
+ *     #define FASTLED_RMT_MAX_CHANNELS 1
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT interrupt
+ * handler, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
+ *
+ * There may be a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ * I2S Integration
+ * Copyright (c) 2019 Yves Bazin
+ * Copyright (c) 2019 Samuel Z. Guyer
+ *
+ * 
  */
+
+
 /*
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -29,19 +119,27 @@ FASTLED_NAMESPACE_BEGIN
 #ifdef __cplusplus
 extern "C" {
 #endif
-    
+
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "esp_log.h"
+#include "driver/periph_ctrl.h"
+
+#ifdef FASTLED_ESP32_I2S
 #include "esp_heap_caps.h"
 #include "soc/soc.h"
 #include "soc/gpio_sig_map.h"
 #include "soc/i2s_reg.h"
 #include "soc/i2s_struct.h"
 #include "soc/io_mux_reg.h"
-#include "driver/gpio.h"
-#include "driver/periph_ctrl.h"
 #include "rom/lldesc.h"
-#include "esp_intr.h"
-#include "esp_log.h"
-    
+#else
+#include "esp32-hal.h"
+#include "driver/rmt.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+#endif
+
 #ifdef __cplusplus
 }
 #endif
@@ -52,33 +150,22 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
     return cyc;
 }
 
+
+// === Common stuff ==========================================
+
 #define FASTLED_HAS_CLOCKLESS 1
 #define NUM_COLOR_CHANNELS 3
 
-// -- Choose which I2S device to use
-#ifndef I2S_DEVICE
-#define I2S_DEVICE 0
-#endif
-
 // -- Max number of controllers we can support
-#ifndef FASTLED_I2S_MAX_CONTROLLERS
-#define FASTLED_I2S_MAX_CONTROLLERS 24
+#ifndef FASTLED_ESP32_MAX_CONTROLLERS
+#define FASTLED_ESP32_MAX_CONTROLLERS 24
 #endif
 
-// -- I2S clock
-#define I2S_BASE_CLK (80000000L)
-#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
-#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
 // -- Convert ESP32 cycles back into nanoseconds
 #define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
 
-// -- I2S bit encoding
-//    For now, this stuff is hard-coded
-#define FASTLED_I2S_CLOCK_DIVIDER   25     //   10  // 80MHz --> 8MHz
-#define FASTLED_I2S_NS_PER_PULSE   312.5   //  125  // == 125ns per cycle
-
 // -- Array of all controllers
-static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
+static CLEDController * gControllers[FASTLED_ESP32_MAX_CONTROLLERS];
 static int gNumControllers = 0;
 static int gNumStarted = 0;
 
@@ -86,9 +173,21 @@ static int gNumStarted = 0;
 //    Semaphore is not given until all data has been sent
 static xSemaphoreHandle gTX_sem = NULL;
 
-// -- I2S global configuration stuff
+// -- One-time initialiation (both I2S and RMT)
 static bool gInitialized = false;
 
+// === I2S specific stuff ====================================
+
+// -- Choose which I2S device to use
+#ifndef I2S_DEVICE
+#define I2S_DEVICE 0
+#endif
+
+// -- I2S clock
+#define I2S_BASE_CLK (80000000L)
+#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
+#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
+
 static intr_handle_t gI2S_intr_handle = NULL;
 
 static i2s_dev_t * i2s;          // A pointer to the memory-mapped structure: I2S0 or I2S1
@@ -100,9 +199,14 @@ struct DMABuffer {
     uint8_t * buffer;
 };
 
+// -- We use two DMA buffers: one is being sent while we fill the other
 #define NUM_DMA_BUFFERS 2
 static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
 
+// -- Counters to track progress
+static int gCurBuffer = 0;
+static bool gDoneFilling = false;
+
 // -- Bit patterns
 //    We configure the I2S data clock so that each pulse is
 //    125ns. Depending on the kind of LED we compute a pattern of
@@ -118,9 +222,6 @@ static int      gPulsesPerBit = 0;
 static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 static uint32_t gZeroBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
-// -- Counters to track progress
-static int gCurBuffer = 0;
-static bool gDoneFilling = false;
 static int ones_for_one;
 static int ones_for_zero;
 // -- Temp buffers for pixels and bits being formatted for DMA
@@ -130,13 +231,52 @@ static int CLOCK_DIVIDER_N;
 static int CLOCK_DIVIDER_A;
 static int CLOCK_DIVIDER_B;
 
+#endif
+
+// === RMT specific stuff ====================================
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to signal the strip to latch
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
+#endif
+
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
+
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+
+static int gNumDone = 0;
+static int gNext = 0;
+
+static intr_handle_t gRMT_intr_handle = NULL;
+
+// =====================================================================
+
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
 class ClocklessController : public CPixelLEDController<RGB_ORDER>
 {
-    // -- The index of this controller in the global gControllers array
-    int            m_index;
-
-    
     // -- Store the GPIO pin
     gpio_num_t     mPin;
     
@@ -145,43 +285,145 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     
     // -- Save the pixel controller
     PixelController<RGB_ORDER> * mPixels;
+
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t  mRMT_channel;
     
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+    // -- State information for keeping track of where we are in the pixel data
+    uint8_t *      mPixelData = NULL;
+    int            mSize = 0;
+    int            mCurByte;
+    uint16_t       mCurPulse;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t       mBufferSize;
+
 public:
 
     void init()
     {
-        i2sInit();
-        
         // -- Allocate space to save the pixel controller
         //    during parallel output
         mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
         
         gControllers[gNumControllers] = this;
-        m_index = gNumControllers;
+        int my_index = gNumControllers;
         gNumControllers++;
-        
-        // -- Set up the pin We have to do two things: configure the
-        //    actual GPIO pin, and route the output from the default
-        //    pin (determined by the I2S device) to the pin we
-        //    want. We compute the default pin using the index of this
-        //    controller in the array. This order is crucial because
-        //    the bits must go into the DMA buffer in the same order.
+
+        // -- Store the pin
         mPin = gpio_num_t(DATA_PIN);
+
+        if (FASTLED_ESP32_I2S) {
+            // -- One-time initialization of I2S system
+            i2sInit();
+        
+            // -- Set up the pin We have to do two things: configure the
+            //    actual GPIO pin, and route the output from the default
+            //    pin (determined by the I2S device) to the pin we
+            //    want. We compute the default pin using the index of this
+            //    controller in the array. This order is crucial because
+            //    the bits must go into the DMA buffer in the same order.
         
-        PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
-        gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
-        pinMode(mPin,OUTPUT);
-        gpio_matrix_out(mPin, i2s_base_pin_index + m_index, false, false);
+            PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
+            gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
+            pinMode(mPin,OUTPUT);
+            gpio_matrix_out(mPin, i2s_base_pin_index + my_index, false, false);
+        } else {
+            // -- One-time initialization of RMT system
+            rmtInit();
+
+            // -- RMT: Precompute rmt items corresponding to a zero bit and a one bit
+            //         according to the timing values given in the template instantiation
+            // T1H
+            mOne.level0 = 1;
+            mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+            // T1L
+            mOne.level1 = 0;
+            mOne.duration1 = TO_RMT_CYCLES(T3);
+
+            // T0H
+            mZero.level0 = 1;
+            mZero.duration0 = TO_RMT_CYCLES(T1);
+            // T0L
+            mZero.level1 = 0;
+            mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+        }
     }
     
     virtual uint16_t getMaxRefreshRate() const { return 400; }
     
 protected:
    
-   static int pgcd(int smallest,int precision,int a,int b,int c)
+    // ----------------------------------------------------------------------
+    //  RMT Initialization
+    // ----------------------------------------------------------------------
+
+    static void rmtInit()
+    {
+        // -- Only need to do this once
+        if (gInitialized) return;
+
+        for (int i = 0; i < FASTLED_ESP32_MAX_CHANNELS; i++) {
+            gOnChannel[i] = NULL;
+
+            // -- RMT configuration for transmission
+            rmt_config_t rmt_tx;
+            rmt_tx.channel = rmt_channel_t(i);
+            rmt_tx.rmt_mode = RMT_MODE_TX;
+            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.mem_block_num = 1;
+            rmt_tx.clk_div = DIVIDER;
+            rmt_tx.tx_config.loop_en = false;
+            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+            rmt_tx.tx_config.carrier_en = false;
+            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+            rmt_tx.tx_config.idle_output_en = true;
+                
+            // -- Apply the configuration
+            rmt_config(&rmt_tx);
+
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                rmt_driver_install(rmt_channel_t(i), 0, 0);
+            } else {
+                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                //    generate an interrupt. When we get this interrupt we
+                //    fill the other half in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+            }
+        }
+
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+                
+        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, rmtInterruptHandler, 0, &gRMT_intr_handle);
+        }
+
+        gInitialized = true;
+    }
+
+    // ----------------------------------------------------------------------
+    //  I2S Initialization
+    // ----------------------------------------------------------------------
+    
+    static int pgcd(int smallest,int precision,int a,int b,int c)
     {
         int pgc_=1;
-        for( int i=smallest;i>0;i--)
+        for( int i=smallest;i>0;i--) 
         {
             
             if( a%i<=precision && b%i<=precision && c%i<=precision)
@@ -193,14 +435,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         return pgc_;
     }
     
-    
-    
     static void initBitPatterns()
     {
-        
-        
-        
-
         // Precompute the bit patterns based on the I2S sample rate
         uint32_t T1ns = ESPCLKS_TO_NS(T1);
         uint32_t T2ns = ESPCLKS_TO_NS(T2);
@@ -225,11 +461,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             smallest=T3;
         double freq=(double)1/(double)(T1ns + T2ns + T3ns);
         Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
-       // Serial.printf("smallest %d\n",smallest);
+        // Serial.printf("smallest %d\n",smallest);
         int pgc_=1;
         int precision=0;
         pgc_=pgcd(smallest,precision,T1,T2,T3);
-//Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
+        //Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
         while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
         {
             precision++;
@@ -247,7 +483,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
          WS2811 T=320+320+641=1281ns qnd we need 4 pulses => pulse duration 320.25ns =>frequency 3.1225605Mhz
          
          */
-        
 
         freq=1000000000L*freq*gPulsesPerBit;
         Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
@@ -255,18 +490,15 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         /*
          we do calculate the needed N a and b
          as f=basefred/(N+b/a);
-         as a is max 63 the precision for the decimal is 1/63
-         
+         as a is max 63 the precision for the decimal is 1/63 
          */
         
-         CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
+        CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
         double v=I2S_BASE_CLK/freq-CLOCK_DIVIDER_N;
-   
-         
-        
+
         double prec=(double)1/63;
         int a=1;
-       int b=0;
+        int b=0;
         CLOCK_DIVIDER_A=1;
         CLOCK_DIVIDER_B=0;
         for(a=1;a<64;a++)
@@ -308,7 +540,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
         double pulseduration=1000000000/freq;
         Serial.printf("Pulse duration: %f ns\n",pulseduration);
-       // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
+        // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
         
         //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
         
@@ -320,7 +552,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
         Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
         
-        
         int i = 0;
         while ( i < ones_for_one ) {
             gOneBit[i] = 0xFFFFFF00;
@@ -333,8 +564,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
         //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
         ones_for_zero =T1/pgc_  ;
-       // Serial.print("Zero bit:  target ");
-       // Serial.print(T1ns); Serial.print("ns --- ");
+        // Serial.print("Zero bit:  target ");
+        // Serial.print(T1ns); Serial.print("ns --- ");
         //Serial.print(ones_for_zero); Serial.print(" 1 bits");
         //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
         Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
@@ -454,7 +685,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Allocate i2s interrupt
         SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
         esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
-                                     &interruptHandler, 0, &gI2S_intr_handle);
+                                     &i2sInterruptHandler, 0, &gI2S_intr_handle);
         
         // -- Create a semaphore to block execution until all the controllers are done
         if (gTX_sem == NULL) {
@@ -466,7 +697,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gInitialized = true;
     }
     
-    
+    // -- Clear the I2S DMA buffer 
+    //    Yves' trick: fill in all the bits we know ahead of time;
+    //    i.e., the parts of the zero-bit and one-bit signals where
+    //    they are *both* high or *both* low.
     static void empty( uint32_t *buf)
     {
         for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
@@ -480,6 +714,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
     
+    // ----------------------------------------------------------------------
+    //  Common entry point
+    // ----------------------------------------------------------------------
+
     // -- Show pixels
     //    This is the main entry point for the controller.
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
@@ -495,6 +733,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    needs to outlive this call to showPixels.
         (*mPixels) = pixels;
         
+        if (FASTLED_RMT_BUILTIN_DRIVER)
+            convertAllPixelData(pixels);
+
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
 
@@ -504,16 +745,27 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- The last call to showPixels is the one responsible for doing
         //    all of the actual work
         if (gNumStarted == gNumControllers) {
-            empty((uint32_t*)dmaBuffers[0]->buffer);
-            empty((uint32_t*)dmaBuffers[1]->buffer);
-            gCurBuffer = 0;
-            gDoneFilling = false;
+            if (FASTLED_ESP32_I2S) {
+                empty((uint32_t*)dmaBuffers[0]->buffer);
+                empty((uint32_t*)dmaBuffers[1]->buffer);
+                gCurBuffer = 0;
+                gDoneFilling = false;
             
-            // -- Prefill both buffers
-            fillBuffer();
-            fillBuffer();
+                // -- Prefill both buffers
+                fillBuffer();
+                fillBuffer();
             
-            i2sStart();
+                i2sStart();
+            } else {
+                // -- First, fill all the available channels
+                gNext = 0;
+                int channel = 0;
+                while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                    startNext(channel);
+                    channel++;
+                }
+            }
+                
             
             // -- Wait here while the rest of the data is sent. The interrupt handler
             //    will keep refilling the RMT buffers until it is all sent; then it
@@ -521,15 +773,234 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
             xSemaphoreGive(gTX_sem);
             
-            i2sStop();
+            if (FASTLED_ESP32_I2S)
+                i2sStop();
             
             // -- Reset the counters
             gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+        }
+    }
+    
+    // ----------------------------------------------------------------------
+    //  RMT Methods
+    // ----------------------------------------------------------------------
+
+    // -- Convert all pixels to RMT pulses
+    //    This function is only used when the user chooses to use the
+    //    built-in RMT driver, which needs all of the RMT pulses
+    //    up-front.
+    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+        mBufferSize = pixels.size() * 3 * 8;
+
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the pulses in the big buffer
+        mCurPulse = 0;
+        int cur = 0;
+        uint32_t byteval;
+        while (pixels.has(1)) {
+            byteval = pixels.loadAndScale0();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale1();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale2();
+            convertByte(byteval);
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+
+    void convertByte(uint32_t byteval)
+    {
+        // -- Write one byte's worth of RMT pulses to the big buffer
+        byteval <<= 24;
+        for (register uint32_t j = 0; j < 8; j++) {
+            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+            byteval <<= 1;
+            mCurPulse++;
+        }
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the
+    //    appropriate startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+        if (gNext < gNumControllers) {
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            pController->startOnChannel(channel);
+            gNext++;
+        }
+    }
+
+    // -- Start this controller on the given channel
+    //    This function just initiates the RMT write; it does not wait
+    //    for it to finish.
+    void startOnChannel(int channel)
+    {
+        // -- Assign this channel and configure the RMT
+        mRMT_channel = rmt_channel_t(channel);
+
+        // -- Store a reference to this controller, so we can get it
+        //    inside the interrupt handler
+        gOnChannel[channel] = this;
+
+        // -- Assign the pin to this channel
+        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Use the built-in RMT driver to send all the data in one shot
+            rmt_register_tx_end_callback(doneOnChannel, 0);
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+        } else {
+            // -- Use our custom driver to send the data incrementally
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+        
+            // -- Initialize the counters that keep track of where we are in
+            //    the pixel data.
+            mCurPulse = 0;
+            mCurByte = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+            
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+        }
+    }
+
+    // -- A controller is done 
+    //    This function is called when a controller finishes writing
+    //    its data. It is called either by the custom interrupt
+    //    handler (below), or as a callback from the built-in
+    //    interrupt handler. It is static because we don't know which
+    //    controller is done until we look it up.
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        // -- Turn off output on the pin
+        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+        gOnChannel[channel] = NULL;
+        gNumDone++;
+
+        if (gNumDone == gNumControllers) {
+            // -- If this is the last controller, signal that we are all done
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        } else {
+            // -- Otherwise, if there are still controllers waiting, then
+            //    start the next one on this channel
+            if (gNext < gNumControllers)
+                startNext(channel);
         }
     }
     
     // -- Custom interrupt handler
-    static IRAM_ATTR void interruptHandler(void *arg)
+    //    This interrupt handler handles two cases: a controller is
+    //    done writing its data, or a controller needs to fill the
+    //    next half of the RMT buffer with data.
+    static IRAM_ATTR void rmtInterruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (gOnChannel[channel] != NULL) {
+
+                // -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+                    
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+                    controller->fillHalfRMTBuffer();
+                } else {
+                    // -- Transmission is complete on this channel
+                    if (intr_st & BIT(tx_done_bit)) {
+                        RMT.int_clr.val |= BIT(tx_done_bit);
+                        doneOnChannel(rmt_channel_t(channel), 0);
+                    }
+                }
+            }
+        }
+    }
+
+    // -- Fill the RMT buffer
+    //    This function fills the next 32 slots in the RMT write
+    //    buffer with pixel data. It also handles the case where the
+    //    pixel data is exhausted, so we need to fill the RMT buffer
+    //    with zeros to signal that it's done.
+    void fillHalfRMTBuffer()
+    {
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+
+        // -- Convert (up to) 32 bits of the raw pixel data into
+        //    into RMT pulses that encode the zeros and ones.
+        int pulses = 0;
+        uint32_t byteval;
+        while (pulses < 32 && mCurByte < mSize) {
+            // -- Get one byte
+            byteval = mPixelData[mCurByte++];
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+            pulses += 8;
+        }
+
+        // -- When we reach the end of the pixel data, fill the rest of the
+        //    RMT buffer with 0's, which signals to the device that we're done.
+        if (mCurByte == mSize) {
+            while (pulses < 32) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulses++;
+            }
+        }
+        
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+
+    // ----------------------------------------------------------------------
+    //  I2S Methods
+    // ----------------------------------------------------------------------
+
+    // -- Custom interrupt handler
+    static IRAM_ATTR void i2sInterruptHandler(void *arg)
     {
         if (i2s->int_st.out_eof) {
             i2s->int_clr.val = i2s->int_raw.val;
@@ -587,17 +1058,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
                 uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
                 
-                /*
-                 Serial.print(bitnum); Serial.print(": ");
-                 uint32_t bt = bit;
-                 for (int k = 0; k < 32; k++) {
-                 if (bt & 0x80000000) Serial.print("1");
-                 else Serial.print("0");
-                 bt = bt << 1;
-                 }
-                 Serial.println();
-                 */
-                
                /* for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
                     buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );*/
                 //when the loop is too big  => issues in timing hence i only fill the the 1

From 52f64aca8709306cfcfda2c381f6bd8aec58a7b3 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 30 Apr 2019 12:01:22 -0400
Subject: [PATCH 048/204] Complete I2S implementation, with switch to choose it
 over the RMT

---
 platforms/esp/32/clockless_i2s_esp32.h | 767 +++++++++++++++++++++++++
 platforms/esp/32/clockless_rmt_esp32.h | 567 ++++++++++++++++++
 platforms/esp/32/fastled_esp32.h       |   8 +-
 3 files changed, 1341 insertions(+), 1 deletion(-)
 create mode 100644 platforms/esp/32/clockless_i2s_esp32.h
 create mode 100644 platforms/esp/32/clockless_rmt_esp32.h

diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
new file mode 100644
index 0000000000..07b2ed5d67
--- /dev/null
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -0,0 +1,767 @@
+/*
+ * I2S Driver
+ *
+ * Copyright (c) 2019 Yves Bazin
+ * Copyright (c) 2019 Samuel Z. Guyer
+ * Derived from lots of code examples from other people.
+ *
+ * The I2S implementation can drive up to 24 strips in parallel, but
+ * with the following limitation: all the strips must have the same
+ * timing (i.e., they must all use the same chip).
+ *
+ * To enable the I2S driver, add the following line *before* including
+ * FastLED.h (no other changes are necessary):
+ *
+ * #define FASTLED_ESP32_I2S true
+ *
+ * The overall strategy is to use the parallel mode of the I2S "audio"
+ * peripheral to send up to 24 bits in parallel to 24 different pins.
+ * Unlike the RMT peripheral the I2S system cannot send bits of
+ * different lengths. Instead, we set the I2S data clock fairly high
+ * and then encode a signal as a series of bits. 
+ *
+ * For example, with a clock divider of 10 the data clock will be
+ * 8MHz, so each bit is 125ns. The WS2812 expects a "1" bit to be
+ * encoded as a HIGH signal for around 875ns, followed by LOW for
+ * 375ns. Sending the following pattern results in the right shape
+ * signal:
+ *
+ *    1111111000        WS2812 "1" bit encoded as 10 125ns pulses
+ *
+ * The I2S peripheral expects the bits for all 24 outputs to be packed
+ * into a single 32-bit word. The complete signal is a series of these
+ * 32-bit values -- one for each bit for each strip. The pixel data,
+ * however, is stored "serially" as a series of RGB values separately
+ * for each strip. To prepare the data we need to do three things: (1)
+ * take 1 pixel from each strip, and (2) tranpose the bits so that
+ * they are in the parallel form, (3) translate each data bit into the
+ * bit pattern that encodes the signal for that bit. This code is in
+ * the fillBuffer() method:
+ *
+ *   1. Read 1 pixel from each strip into an array; store this data by
+ *      color channel (e.g., all the red bytes, then all the green
+ *      bytes, then all the blue bytes). For three color channels, the
+ *      array is 3 X 24 X 8 bits.
+ *
+ *   2. Tranpose the array so that it is 3 X 8 X 24 bits. The hardware
+ *      wants the data in 32-bit chunks, so the actual form is 3 X 8 X
+ *      32, with the low 8 bits unused.
+ *
+ *   3. Take each group of 24 parallel bits and "expand" them into a
+ *      pattern according to the encoding. For example, with a 8MHz
+ *      data clock, each data bit turns into 10 I2s pulses, so 24
+ *      parallel data bits turn into 10 X 24 pulses.
+ *
+ * We send data to the I2S peripheral using the DMA interface. We use
+ * two DMA buffers, so that we can fill one buffer while the other
+ * buffer is being sent. Each DMA buffer holds the fully-expanded
+ * pulse pattern for one pixel on up to 24 strips. The exact amount of
+ * memory required depends on the number of color channels and the
+ * number of pulses used to encode each bit.
+ *
+ * We get an interrupt each time a buffer is sent; we then fill that
+ * buffer while the next one is being sent. The DMA interface allows
+ * us to configure the buffers as a circularly linked list, so that it
+ * can automatically start on the next buffer.
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#pragma message "NOTE: ESP32 support using I2S parallel driver. All strips must use the same chipset"
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+#include "esp_heap_caps.h"
+#include "soc/soc.h"
+#include "soc/gpio_sig_map.h"
+#include "soc/i2s_reg.h"
+#include "soc/i2s_struct.h"
+#include "soc/io_mux_reg.h"
+#include "driver/gpio.h"
+#include "driver/periph_ctrl.h"
+#include "rom/lldesc.h"
+#include "esp_intr.h"
+#include "esp_log.h"
+    
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+    uint32_t cyc;
+    __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+    return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+#define NUM_COLOR_CHANNELS 3
+
+// -- Choose which I2S device to use
+#ifndef I2S_DEVICE
+#define I2S_DEVICE 0
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_I2S_MAX_CONTROLLERS
+#define FASTLED_I2S_MAX_CONTROLLERS 24
+#endif
+
+// -- I2S clock
+#define I2S_BASE_CLK (80000000L)
+#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
+#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+// -- One-time I2S initialization
+static bool gInitialized = false;
+
+// -- Interrupt handler
+static intr_handle_t gI2S_intr_handle = NULL;
+
+// -- A pointer to the memory-mapped structure: I2S0 or I2S1
+static i2s_dev_t * i2s;
+
+// -- I2S goes to these pins until we remap them using the GPIO matrix
+static int i2s_base_pin_index;
+
+// --- I2S DMA buffers
+struct DMABuffer {
+    lldesc_t descriptor;
+    uint8_t * buffer;
+};
+
+#define NUM_DMA_BUFFERS 2
+static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
+
+// -- Bit patterns
+//    For now, we require all strips to be the same chipset, so these
+//    are global variables.
+
+static int      gPulsesPerBit = 0;
+static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static uint32_t gZeroBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+// -- Counters to track progress
+static int gCurBuffer = 0;
+static bool gDoneFilling = false;
+static int ones_for_one;
+static int ones_for_zero;
+
+// -- Temp buffers for pixels and bits being formatted for DMA
+static uint8_t gPixelRow[NUM_COLOR_CHANNELS][32];
+static uint8_t gPixelBits[NUM_COLOR_CHANNELS][8][4];
+static int CLOCK_DIVIDER_N;
+static int CLOCK_DIVIDER_A;
+static int CLOCK_DIVIDER_B;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- Store the GPIO pin
+    gpio_num_t     mPin;
+    
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+    
+    // -- Save the pixel controller
+    PixelController<RGB_ORDER> * mPixels;
+    
+public:
+
+    void init()
+    {
+        i2sInit();
+        
+        // -- Allocate space to save the pixel controller
+        //    during parallel output
+        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
+        
+        gControllers[gNumControllers] = this;
+        int my_index = gNumControllers;
+        gNumControllers++;
+        
+        // -- Set up the pin We have to do two things: configure the
+        //    actual GPIO pin, and route the output from the default
+        //    pin (determined by the I2S device) to the pin we
+        //    want. We compute the default pin using the index of this
+        //    controller in the array. This order is crucial because
+        //    the bits must go into the DMA buffer in the same order.
+        mPin = gpio_num_t(DATA_PIN);
+        
+        PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
+        gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
+        pinMode(mPin,OUTPUT);
+        gpio_matrix_out(mPin, i2s_base_pin_index + my_index, false, false);
+    }
+    
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+    
+protected:
+   
+   static int pgcd(int smallest,int precision,int a,int b,int c)
+    {
+        int pgc_=1;
+        for( int i=smallest;i>0;i--)
+        {
+            
+            if( a%i<=precision && b%i<=precision && c%i<=precision)
+            {
+                pgc_=i;
+                break;
+            }
+        }
+        return pgc_;
+    }
+    
+    /** Compute pules/bit patterns
+     *
+     *  This is Yves Bazin's mad code for computing the pulse pattern
+     *  and clock timing given the target signal given by T1, T2, and
+     *  T3. In general, these parameters are interpreted as follows:
+     *
+     *  a "1" bit is encoded by setting the pin HIGH to T1+T2 ns, then LOW for T3 ns
+     *  a "0" bit is encoded by setting the pin HIGH to T1 ns, then LOW for T2+T3 ns
+     *
+     */
+    static void initBitPatterns()
+    {
+        // Precompute the bit patterns based on the I2S sample rate
+        Serial.println("Setting up fastled using I2S");
+
+        // -- First, convert back to ns from CPU clocks
+        uint32_t T1ns = ESPCLKS_TO_NS(T1);
+        uint32_t T2ns = ESPCLKS_TO_NS(T2);
+        uint32_t T3ns = ESPCLKS_TO_NS(T3);
+        
+        Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
+        Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
+        Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
+        
+        /*
+         We calculate the best pcgd to the timing
+         ie
+         WS2811 77 77 154 => 1  1 2 => nb pulses= 4
+         WS2812 60 150 90 => 2 5 3 => nb pulses=10
+         */
+        int smallest=0;
+        if (T1>T2)
+            smallest=T2;
+        else
+            smallest=T1;
+        if(smallest>T3)
+            smallest=T3;
+        double freq=(double)1/(double)(T1ns + T2ns + T3ns);
+        Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
+       // Serial.printf("smallest %d\n",smallest);
+        int pgc_=1;
+        int precision=0;
+        pgc_=pgcd(smallest,precision,T1,T2,T3);
+        //Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
+        while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
+        {
+            precision++;
+            pgc_=pgcd(smallest,precision,T1,T2,T3);
+            //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
+        }
+        pgc_=pgcd(smallest,precision,T1,T2,T3);
+        Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
+        Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
+        gPulsesPerBit=(int)T1/pgc_ +(int)T2/pgc_ +(int)T3/pgc_;
+        /*
+         we calculate the duration of one pulse nd htre base frequency of the led
+         ie WS2812B F=1/(250+625+375)=800kHz or 1250ns
+         as we need 10 pulses each pulse is 125ns => frequency 800Khz*10=8MHz
+         WS2811 T=320+320+641=1281ns qnd we need 4 pulses => pulse duration 320.25ns =>frequency 3.1225605Mhz
+         
+         */
+
+        freq=1000000000L*freq*gPulsesPerBit;
+        Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
+        
+        /*
+         we do calculate the needed N a and b
+         as f=basefred/(N+b/a);
+         as a is max 63 the precision for the decimal is 1/63
+         
+         */
+        
+         CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
+        double v=I2S_BASE_CLK/freq-CLOCK_DIVIDER_N;
+
+        double prec=(double)1/63;
+        int a=1;
+        int b=0;
+        CLOCK_DIVIDER_A=1;
+        CLOCK_DIVIDER_B=0;
+        for(a=1;a<64;a++)
+        {
+            for(b=0;b<a;b++)
+            {
+                //printf("%d %d %f %f %f\n",b,a,v,(double)v*(double)a,fabsf(v-(double)b/a));
+                if(fabsf(v-(double)b/a) <= prec/2)
+                    break;
+            }
+            if(fabsf(v-(double)b/a) ==0)
+            {
+                CLOCK_DIVIDER_A=a;
+                CLOCK_DIVIDER_B=b;
+                break;
+            }
+            if(fabsf(v-(double)b/a) < prec/2)
+            {
+                if (fabsf(v-(double)b/a) <fabsf(v-(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A))
+                {
+                    CLOCK_DIVIDER_A=a;
+                    CLOCK_DIVIDER_B=b;
+                }
+                
+            }
+        }
+        //top take care of an issue with double 0.9999999999
+        if(CLOCK_DIVIDER_A==CLOCK_DIVIDER_B)
+        {
+            CLOCK_DIVIDER_A=1;
+            CLOCK_DIVIDER_B=0;
+            CLOCK_DIVIDER_N++;
+        }
+        
+        //printf("%d %d %f %f %d\n",CLOCK_DIVIDER_B,CLOCK_DIVIDER_A,(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A,v,CLOCK_DIVIDER_N);
+        //Serial.printf("freq %f %f\n",freq,I2S_BASE_CLK/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A));
+        freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
+        freq=freq*I2S_BASE_CLK;
+        Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
+        double pulseduration=1000000000/freq;
+        Serial.printf("Pulse duration: %f ns\n",pulseduration);
+        // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
+        
+        //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
+        
+        //int ones_for_one  = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        ones_for_one  = T1/pgc_ +T2/pgc_;
+        //Serial.print("One bit:  target ");
+        //Serial.print(T1ns+T2ns); Serial.print("ns --- ");
+        //Serial.print(ones_for_one); Serial.print(" 1 bits");
+        //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
+        
+        
+        int i = 0;
+        while ( i < ones_for_one ) {
+            gOneBit[i] = 0xFFFFFF00;
+            i++;
+        }
+        while ( i < gPulsesPerBit ) {
+            gOneBit[i] = 0x00000000;
+            i++;
+        }
+        
+        //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        ones_for_zero =T1/pgc_  ;
+       // Serial.print("Zero bit:  target ");
+       // Serial.print(T1ns); Serial.print("ns --- ");
+        //Serial.print(ones_for_zero); Serial.print(" 1 bits");
+        //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
+        i = 0;
+        while ( i < ones_for_zero ) {
+            gZeroBit[i] = 0xFFFFFF00;
+            i++;
+        }
+        while ( i < gPulsesPerBit ) {
+            gZeroBit[i] = 0x00000000;
+            i++;
+        }
+        
+        memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
+        memset(gPixelBits, 0, NUM_COLOR_CHANNELS * 32);
+    }
+    
+    static DMABuffer * allocateDMABuffer(int bytes)
+    {
+        DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
+        
+        b->buffer = (uint8_t *)heap_caps_malloc(bytes, MALLOC_CAP_DMA);
+        memset(b->buffer, 0, bytes);
+        
+        b->descriptor.length = bytes;
+        b->descriptor.size = bytes;
+        b->descriptor.owner = 1;
+        b->descriptor.sosf = 1;
+        b->descriptor.buf = b->buffer;
+        b->descriptor.offset = 0;
+        b->descriptor.empty = 0;
+        b->descriptor.eof = 1;
+        b->descriptor.qe.stqe_next = 0;
+        
+        return b;
+    }
+    
+    static void i2sInit()
+    {
+        // -- Only need to do this once
+        if (gInitialized) return;
+        
+        // -- Construct the bit patterns for ones and zeros
+        initBitPatterns();
+        
+        // -- Choose whether to use I2S device 0 or device 1
+        //    Set up the various device-specific parameters
+        int interruptSource;
+        if (I2S_DEVICE == 0) {
+            i2s = &I2S0;
+            periph_module_enable(PERIPH_I2S0_MODULE);
+            interruptSource = ETS_I2S0_INTR_SOURCE;
+            i2s_base_pin_index = I2S0O_DATA_OUT0_IDX;
+        } else {
+            i2s = &I2S1;
+            periph_module_enable(PERIPH_I2S1_MODULE);
+            interruptSource = ETS_I2S1_INTR_SOURCE;
+            i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
+        }
+        
+        // -- Reset everything
+        i2sReset();
+        i2sReset_DMA();
+        i2sReset_FIFO();
+        
+        // -- Main configuration
+        i2s->conf.tx_msb_right = 1;
+        i2s->conf.tx_mono = 0;
+        i2s->conf.tx_short_sync = 0;
+        i2s->conf.tx_msb_shift = 0;
+        i2s->conf.tx_right_first = 1; // 0;//1;
+        i2s->conf.tx_slave_mod = 0;
+        
+        // -- Set parallel mode
+        i2s->conf2.val = 0;
+        i2s->conf2.lcd_en = 1;
+        i2s->conf2.lcd_tx_wrx2_en = 0; // 0 for 16 or 32 parallel output
+        i2s->conf2.lcd_tx_sdx2_en = 0; // HN
+        
+        // -- Set up the clock rate and sampling
+        i2s->sample_rate_conf.val = 0;
+        i2s->sample_rate_conf.tx_bits_mod = 32; // Number of parallel bits/pins
+        i2s->sample_rate_conf.tx_bck_div_num = 1;
+        i2s->clkm_conf.val = 0;
+        i2s->clkm_conf.clka_en = 0;
+        
+        // -- Data clock is computed as Base/(div_num + (div_b/div_a))
+        //    Base is 80Mhz, so 80/(10 + 0/1) = 8Mhz
+        //    One cycle is 125ns
+        i2s->clkm_conf.clkm_div_a = CLOCK_DIVIDER_A;
+        i2s->clkm_conf.clkm_div_b = CLOCK_DIVIDER_B;
+        i2s->clkm_conf.clkm_div_num = CLOCK_DIVIDER_N;
+        
+        i2s->fifo_conf.val = 0;
+        i2s->fifo_conf.tx_fifo_mod_force_en = 1;
+        i2s->fifo_conf.tx_fifo_mod = 3;  // 32-bit single channel data
+        i2s->fifo_conf.tx_data_num = 32; // fifo length
+        i2s->fifo_conf.dscr_en = 1;      // fifo will use dma
+        
+        i2s->conf1.val = 0;
+        i2s->conf1.tx_stop_en = 0;
+        i2s->conf1.tx_pcm_bypass = 1;
+        
+        i2s->conf_chan.val = 0;
+        i2s->conf_chan.tx_chan_mod = 1; // Mono mode, with tx_msb_right = 1, everything goes to right-channel
+        
+        i2s->timing.val = 0;
+        
+        // -- Allocate two DMA buffers
+        dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+        dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+        
+        // -- Arrange them as a circularly linked list
+        dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
+        dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
+       
+        // -- Allocate i2s interrupt
+        SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
+        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
+                                     &interruptHandler, 0, &gI2S_intr_handle);
+        
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+        
+        // Serial.println("Init I2S");
+        gInitialized = true;
+    }
+    
+    /** Clear DMA buffer
+     *
+     *  Yves' clever trick: initialize the bits that we know must be 0
+     *  or 1 regardless of what bit they encode.
+     */
+    static void empty( uint32_t *buf)
+    {
+        for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
+        {
+            int offset=gPulsesPerBit*i;
+            for(int j=0;j<ones_for_zero;j++)
+                buf[offset+j]=0xffffffff;
+            
+            for(int j=ones_for_one;j<gPulsesPerBit;j++)
+                buf[offset+j]=0;
+        }
+    }
+    
+    // -- Show pixels
+    //    This is the main entry point for the controller.
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+        
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+        (*mPixels) = pixels;
+        
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // Serial.print("Show pixels ");
+        // Serial.println(gNumStarted);
+        
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual work
+        if (gNumStarted == gNumControllers) {
+            empty((uint32_t*)dmaBuffers[0]->buffer);
+            empty((uint32_t*)dmaBuffers[1]->buffer);
+            gCurBuffer = 0;
+            gDoneFilling = false;
+            
+            // -- Prefill both buffers
+            fillBuffer();
+            fillBuffer();
+            
+            i2sStart();
+            
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the DMA buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+            
+            i2sStop();
+            
+            // -- Reset the counters
+            gNumStarted = 0;
+        }
+    }
+    
+    // -- Custom interrupt handler
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        if (i2s->int_st.out_eof) {
+            i2s->int_clr.val = i2s->int_raw.val;
+            
+            if ( ! gDoneFilling) {
+                fillBuffer();
+            } else {
+                portBASE_TYPE HPTaskAwoken = 0;
+                xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+            }
+        }
+    }
+    
+    /** Fill DMA buffer
+     *
+     *  This is where the real work happens: take a row of pixels (one
+     *  from each strip), transpose and encode the bits, and store
+     *  them in the DMA buffer for the I2S peripheral to read.
+     */
+    static void fillBuffer()
+    {
+        // -- Alternate between buffers
+        volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
+        gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
+        
+        // -- Get the requested pixel from each controller. Store the
+        //    data for each color channel in a separate array.
+        uint32_t has_data_mask = 0;
+        for (int i = 0; i < gNumControllers; i++) {
+            // -- Store the pixels in reverse controller order starting at index 23
+            //    This causes the bits to come out in the right position after we
+            //    transpose them.
+            int bit_index = 23-i;
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+            if (pController->mPixels->has(1)) {
+                gPixelRow[0][bit_index] = pController->mPixels->loadAndScale0();
+                gPixelRow[1][bit_index] = pController->mPixels->loadAndScale1();
+                gPixelRow[2][bit_index] = pController->mPixels->loadAndScale2();
+                pController->mPixels->advanceData();
+                pController->mPixels->stepDithering();
+                
+                // -- Record that this controller still has data to send
+                has_data_mask |= (1 << (i+8));
+            }
+        }
+        
+        // -- None of the strips has data? We are done.
+        if (has_data_mask == 0) {
+            gDoneFilling = true;
+            return;
+        }
+        
+        // -- Transpose and encode the pixel data for the DMA buffer
+        int buf_index = 0;
+        for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
+            
+            // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
+            transpose32(gPixelRow[channel], gPixelBits[channel][0] );
+            
+            //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
+            for (int bitnum = 0; bitnum < 8; bitnum++) {
+                uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
+                uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
+                
+               /* SZG: More general, but too slow:
+                    for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
+                        buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
+                     }
+               */
+
+                // -- Only fill in the pulses that are different between the "0" and "1" encodings
+                for(int pulse_num = ones_for_zero; pulse_num < ones_for_one; pulse_num++) {
+                    buf[bitnum*gPulsesPerBit+channel*8*gPulsesPerBit+pulse_num] = has_data_mask & bit;
+                }
+            }
+        }
+    }
+    
+    static void transpose32(uint8_t * pixels, uint8_t * bits)
+    {
+        transpose8rS32(& pixels[0],  1, 4, & bits[0]);
+        transpose8rS32(& pixels[8],  1, 4, & bits[1]);
+        transpose8rS32(& pixels[16], 1, 4, & bits[2]);
+        //transpose8rS32(& pixels[24], 1, 4, & bits[3]);  Can only use 24 bits
+    }
+    
+    /** Transpose 8x8 bit matrix
+     *  From Hacker's Delight
+     */
+    static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B)
+    {
+        uint32_t x, y, t;
+        
+        // Load the array and pack it into x and y.
+        
+        x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
+        y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
+        
+        t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+        t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+        
+        t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+        t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+        
+        t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+        y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+        x = t;
+        
+        B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x;
+        B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y;
+    }
+    
+    /** Start I2S transmission
+     */
+    static void i2sStart()
+    {
+        // esp_intr_disable(gI2S_intr_handle);
+        // Serial.println("I2S start");
+        i2sReset();
+        //Serial.println(dmaBuffers[0]->sampleCount());
+        i2s->lc_conf.val=I2S_OUT_DATA_BURST_EN | I2S_OUTDSCR_BURST_EN | I2S_OUT_DATA_BURST_EN;
+        i2s->out_link.addr = (uint32_t) & (dmaBuffers[0]->descriptor);
+        i2s->out_link.start = 1;
+        ////vTaskDelay(5);
+        i2s->int_clr.val = i2s->int_raw.val;
+        // //vTaskDelay(5);
+        i2s->int_ena.out_dscr_err = 1;
+        //enable interrupt
+        ////vTaskDelay(5);
+        esp_intr_enable(gI2S_intr_handle);
+        // //vTaskDelay(5);
+        i2s->int_ena.val = 0;
+        i2s->int_ena.out_eof = 1;
+        
+        //start transmission
+        i2s->conf.tx_start = 1;
+    }
+    
+    static void i2sReset()
+    {
+        // Serial.println("I2S reset");
+        const unsigned long lc_conf_reset_flags = I2S_IN_RST_M | I2S_OUT_RST_M | I2S_AHBM_RST_M | I2S_AHBM_FIFO_RST_M;
+        i2s->lc_conf.val |= lc_conf_reset_flags;
+        i2s->lc_conf.val &= ~lc_conf_reset_flags;
+        
+        const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
+        i2s->conf.val |= conf_reset_flags;
+        i2s->conf.val &= ~conf_reset_flags;
+    }
+    
+    static void i2sReset_DMA()
+    {
+        i2s->lc_conf.in_rst=1; i2s->lc_conf.in_rst=0;
+        i2s->lc_conf.out_rst=1; i2s->lc_conf.out_rst=0;
+    }
+    
+    static void i2sReset_FIFO()
+    {
+        i2s->conf.rx_fifo_reset=1; i2s->conf.rx_fifo_reset=0;
+        i2s->conf.tx_fifo_reset=1; i2s->conf.tx_fifo_reset=0;
+    }
+    
+    static void i2sStop()
+    {
+        // Serial.println("I2S stop");
+        esp_intr_disable(gI2S_intr_handle);
+        i2sReset();
+        i2s->conf.rx_start = 0;
+        i2s->conf.tx_start = 0;
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
new file mode 100644
index 0000000000..248325ad3b
--- /dev/null
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -0,0 +1,567 @@
+/*
+ * Integration into FastLED ClocklessController
+ * Copyright (c) 2018 Samuel Z. Guyer
+ * Copyright (c) 2017 Thomas Basler
+ * Copyright (c) 2017 Martin F. Falatic
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently to send sequences
+ * of high/low bits. Memory for each channel is limited, however, so
+ * in order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * Since the RMT device only has 8 channels, we need a strategy to
+ * allow more than 8 LED controllers. Our driver assigns controllers
+ * to channels on the fly, queuing up controllers as necessary until a
+ * channel is free. The main showPixels routine just fires off the
+ * first 8 controllers; the interrupt handler starts new controllers
+ * asynchronously as previous ones finish. So, for example, it can
+ * send the data for 8 controllers simultaneously, but 16 controllers
+ * would take approximately twice as much time.
+ *
+ * There is a #define that allows a program to control the total
+ * number of channels that the driver is allowed to use. It defaults
+ * to 8 -- use all the channels. Setting it to 1, for example, results
+ * in fully serial output:
+ *
+ *     #define FASTLED_RMT_MAX_CHANNELS 1
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT interrupt
+ * handler, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
+ *
+ * There may be a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to signal the strip to latch
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_RMT_MAX_CONTROLLERS
+#define FASTLED_RMT_MAX_CONTROLLERS 32
+#endif
+
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
+
+static intr_handle_t gRMT_intr_handle = NULL;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+static bool gInitialized = false;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t  mRMT_channel;
+
+    // -- Store the GPIO pin
+    gpio_num_t     mPin;
+
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+    // -- State information for keeping track of where we are in the pixel data
+    uint8_t *      mPixelData = NULL;
+    int            mSize = 0;
+    int            mCurByte;
+    uint16_t       mCurPulse;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t       mBufferSize;
+
+public:
+
+    void init()
+    {
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+        gControllers[gNumControllers] = this;
+        gNumControllers++;
+
+        mPin = gpio_num_t(DATA_PIN);
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    void initRMT()
+    {
+        // -- Only need to do this once
+        if (gInitialized) return;
+
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+            gOnChannel[i] = NULL;
+
+            // -- RMT configuration for transmission
+            rmt_config_t rmt_tx;
+            rmt_tx.channel = rmt_channel_t(i);
+            rmt_tx.rmt_mode = RMT_MODE_TX;
+            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.mem_block_num = 1;
+            rmt_tx.clk_div = DIVIDER;
+            rmt_tx.tx_config.loop_en = false;
+            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+            rmt_tx.tx_config.carrier_en = false;
+            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+            rmt_tx.tx_config.idle_output_en = true;
+                
+            // -- Apply the configuration
+            rmt_config(&rmt_tx);
+
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                rmt_driver_install(rmt_channel_t(i), 0, 0);
+            } else {
+                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                //    generate an interrupt. When we get this interrupt we
+                //    fill the other half in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+            }
+        }
+
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+                
+        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+        }
+
+        gInitialized = true;
+    }
+
+    // -- Show pixels
+    //    This is the main entry point for the controller.
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            initRMT();
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+
+        //if (mPixels != NULL) delete mPixels;
+        //mPixels = new PixelController<RGB_ORDER>(pixels);
+        if (FASTLED_RMT_BUILTIN_DRIVER)
+            convertAllPixelData(pixels);
+        else
+            copyPixelData(pixels);
+
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual worl
+        if (gNumStarted == gNumControllers) {
+            gNext = 0;
+
+            // -- First, fill all the available channels
+            int channel = 0;
+            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                startNext(channel);
+                channel++;
+            }
+
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the RMT buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+
+            // -- Reset the counters
+            gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+        }
+    }
+
+    // -- Copy pixel data
+    //    Make a safe copy of the pixel data, so that the FastLED show
+    //    function can continue to the next controller while the RMT
+    //    device starts sending this data asynchronously.
+    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Make sure we have a buffer of the right size
+        //    (3 bytes per pixel)
+        int size_needed = pixels.size() * 3;
+        if (size_needed > mSize) {
+            if (mPixelData != NULL) free(mPixelData);
+            mSize = size_needed;
+            mPixelData = (uint8_t *) malloc( mSize);
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the resulting raw pixel data in the buffer.
+        int cur = 0;
+        while (pixels.has(1)) {
+            mPixelData[cur++] = pixels.loadAndScale0();
+            mPixelData[cur++] = pixels.loadAndScale1();
+            mPixelData[cur++] = pixels.loadAndScale2();
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+    }
+
+    // -- Convert all pixels to RMT pulses
+    //    This function is only used when the user chooses to use the
+    //    built-in RMT driver, which needs all of the RMT pulses
+    //    up-front.
+    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+        mBufferSize = pixels.size() * 3 * 8;
+
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the pulses in the big buffer
+        mCurPulse = 0;
+        int cur = 0;
+        uint32_t byteval;
+        while (pixels.has(1)) {
+            byteval = pixels.loadAndScale0();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale1();
+            convertByte(byteval);
+            byteval = pixels.loadAndScale2();
+            convertByte(byteval);
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+    }
+
+    void convertByte(uint32_t byteval)
+    {
+        // -- Write one byte's worth of RMT pulses to the big buffer
+        byteval <<= 24;
+        for (register uint32_t j = 0; j < 8; j++) {
+            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+            byteval <<= 1;
+            mCurPulse++;
+        }
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the
+    //    appropriate startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+        if (gNext < gNumControllers) {
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            pController->startOnChannel(channel);
+            gNext++;
+        }
+    }
+
+    // -- Start this controller on the given channel
+    //    This function just initiates the RMT write; it does not wait
+    //    for it to finish.
+    void startOnChannel(int channel)
+    {
+        // -- Assign this channel and configure the RMT
+        mRMT_channel = rmt_channel_t(channel);
+
+        // -- Store a reference to this controller, so we can get it
+        //    inside the interrupt handler
+        gOnChannel[channel] = this;
+
+        // -- Assign the pin to this channel
+        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Use the built-in RMT driver to send all the data in one shot
+            rmt_register_tx_end_callback(doneOnChannel, 0);
+            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+        } else {
+            // -- Use our custom driver to send the data incrementally
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+        
+            // -- Initialize the counters that keep track of where we are in
+            //    the pixel data.
+            mCurPulse = 0;
+            mCurByte = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+            
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+        }
+    }
+
+    // -- A controller is done 
+    //    This function is called when a controller finishes writing
+    //    its data. It is called either by the custom interrupt
+    //    handler (below), or as a callback from the built-in
+    //    interrupt handler. It is static because we don't know which
+    //    controller is done until we look it up.
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        // -- Turn off output on the pin
+        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+        gOnChannel[channel] = NULL;
+        gNumDone++;
+
+        if (gNumDone == gNumControllers) {
+            // -- If this is the last controller, signal that we are all done
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        } else {
+            // -- Otherwise, if there are still controllers waiting, then
+            //    start the next one on this channel
+            if (gNext < gNumControllers)
+                startNext(channel);
+        }
+    }
+    
+    // -- Custom interrupt handler
+    //    This interrupt handler handles two cases: a controller is
+    //    done writing its data, or a controller needs to fill the
+    //    next half of the RMT buffer with data.
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (gOnChannel[channel] != NULL) {
+
+                // -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+                    
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+                    controller->fillHalfRMTBuffer();
+                } else {
+                    // -- Transmission is complete on this channel
+                    if (intr_st & BIT(tx_done_bit)) {
+                        RMT.int_clr.val |= BIT(tx_done_bit);
+                        doneOnChannel(rmt_channel_t(channel), 0);
+                    }
+                }
+            }
+        }
+    }
+
+    // -- Fill the RMT buffer
+    //    This function fills the next 32 slots in the RMT write
+    //    buffer with pixel data. It also handles the case where the
+    //    pixel data is exhausted, so we need to fill the RMT buffer
+    //    with zeros to signal that it's done.
+    void fillHalfRMTBuffer()
+    {
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+
+        // -- Convert (up to) 32 bits of the raw pixel data into
+        //    into RMT pulses that encode the zeros and ones.
+        int pulses = 0;
+        uint32_t byteval;
+        while (pulses < 32 && mCurByte < mSize) {
+            // -- Get one byte
+            byteval = mPixelData[mCurByte++];
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; j++) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                mCurPulse++;
+            }
+            pulses += 8;
+        }
+
+        // -- When we reach the end of the pixel data, fill the rest of the
+        //    RMT buffer with 0's, which signals to the device that we're done.
+        if (mCurByte == mSize) {
+            while (pulses < 32) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                mCurPulse++;
+                pulses++;
+            }
+        }
+        
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/fastled_esp32.h b/platforms/esp/32/fastled_esp32.h
index fabbfeda3d..edf27e7d99 100644
--- a/platforms/esp/32/fastled_esp32.h
+++ b/platforms/esp/32/fastled_esp32.h
@@ -1,5 +1,11 @@
 #pragma once
 
 #include "fastpin_esp32.h"
-#include "clockless_esp32.h"
+
+#ifdef FASTLED_ESP32_I2S
+#include "clockless_i2s_esp32.h"
+#else
+#include "clockless_rmt_esp32.h"
+#endif
+
 // #include "clockless_block_esp32.h"

From 6ab845cda246a36cd1117fb99910cb5a3e73a3d0 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 30 Apr 2019 12:02:04 -0400
Subject: [PATCH 049/204] Removed the old header

---
 platforms/esp/32/clockless_esp32.h | 1164 ----------------------------
 1 file changed, 1164 deletions(-)
 delete mode 100644 platforms/esp/32/clockless_esp32.h

diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_esp32.h
deleted file mode 100644
index 91e8362571..0000000000
--- a/platforms/esp/32/clockless_esp32.h
+++ /dev/null
@@ -1,1164 +0,0 @@
-/*
- * ESP32 driver for clockless LED chips
- *
- * There are two different drivers available for the ESP32: RMT and I2S.
- *
- * The RMT driver is the default. It uses the remote control
- * peripheral (RMT) to drive up to 8 strips in parallel, queueing any
- * additional strips, which are output as RMT channels become
- * available. The reason it is the default is that it can handle any
- * mix of strips with different chips, different timing, different
- * lengths, etc.
- *
- * The I2S implementation can drive up to 24 strips in parallel, but
- * with the following limitation: all the strips must have the same
- * timing (i.e., they must all use the same chip).
- *
- * To enable the I2S driver, add the following line *before* including
- * FastLED.h (no other changes are necessary):
- *
- * #define FASTLED_ESP32_I2S true
- *
- * === DETAILS ===
- *
- * RMT Integration into FastLED ClocklessController
- * Copyright (c) 2018 Samuel Z. Guyer
- * Copyright (c) 2017 Thomas Basler
- * Copyright (c) 2017 Martin F. Falatic
- *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently to send sequences
- * of high/low bits. Memory for each channel is limited, however, so
- * in order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * Since the RMT device only has 8 channels, we need a strategy to
- * allow more than 8 LED controllers. Our driver assigns controllers
- * to channels on the fly, queuing up controllers as necessary until a
- * channel is free. The main showPixels routine just fires off the
- * first 8 controllers; the interrupt handler starts new controllers
- * asynchronously as previous ones finish. So, for example, it can
- * send the data for 8 controllers simultaneously, but 16 controllers
- * would take approximately twice as much time.
- *
- * There is a #define that allows a program to control the total
- * number of channels that the driver is allowed to use. It defaults
- * to 8 -- use all the channels. Setting it to 1, for example, results
- * in fully serial output:
- *
- *     #define FASTLED_RMT_MAX_CHANNELS 1
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT interrupt
- * handler, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
- *
- * There may be a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- * I2S Integration
- * Copyright (c) 2019 Yves Bazin
- * Copyright (c) 2019 Samuel Z. Guyer
- *
- * 
- */
-
-
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "esp_log.h"
-#include "driver/periph_ctrl.h"
-
-#ifdef FASTLED_ESP32_I2S
-#include "esp_heap_caps.h"
-#include "soc/soc.h"
-#include "soc/gpio_sig_map.h"
-#include "soc/i2s_reg.h"
-#include "soc/i2s_struct.h"
-#include "soc/io_mux_reg.h"
-#include "rom/lldesc.h"
-#else
-#include "esp32-hal.h"
-#include "driver/rmt.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-    uint32_t cyc;
-    __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-    return cyc;
-}
-
-
-// === Common stuff ==========================================
-
-#define FASTLED_HAS_CLOCKLESS 1
-#define NUM_COLOR_CHANNELS 3
-
-// -- Max number of controllers we can support
-#ifndef FASTLED_ESP32_MAX_CONTROLLERS
-#define FASTLED_ESP32_MAX_CONTROLLERS 24
-#endif
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Array of all controllers
-static CLEDController * gControllers[FASTLED_ESP32_MAX_CONTROLLERS];
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-// -- One-time initialiation (both I2S and RMT)
-static bool gInitialized = false;
-
-// === I2S specific stuff ====================================
-
-// -- Choose which I2S device to use
-#ifndef I2S_DEVICE
-#define I2S_DEVICE 0
-#endif
-
-// -- I2S clock
-#define I2S_BASE_CLK (80000000L)
-#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
-#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
-
-static intr_handle_t gI2S_intr_handle = NULL;
-
-static i2s_dev_t * i2s;          // A pointer to the memory-mapped structure: I2S0 or I2S1
-static int i2s_base_pin_index;   // I2S goes to these pins until we remap them using the GPIO matrix
-
-// --- I2S DMA buffers
-struct DMABuffer {
-    lldesc_t descriptor;
-    uint8_t * buffer;
-};
-
-// -- We use two DMA buffers: one is being sent while we fill the other
-#define NUM_DMA_BUFFERS 2
-static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
-
-// -- Counters to track progress
-static int gCurBuffer = 0;
-static bool gDoneFilling = false;
-
-// -- Bit patterns
-//    We configure the I2S data clock so that each pulse is
-//    125ns. Depending on the kind of LED we compute a pattern of
-//    pulses that match the timing. For example, a "1" bit for the
-//    WS2812 consists of 700-900ns high, followed by 300-500ns
-//    low. Using 125ns per pulse, we can send a "1" bit using this
-//    pattern: 1111111000 (a total of 10 bits, or 1250ns)
-//
-//    For now, we require all strips to be the same chipset, so these
-//    are global variables.
-
-static int      gPulsesPerBit = 0;
-static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-static uint32_t gZeroBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-
-static int ones_for_one;
-static int ones_for_zero;
-// -- Temp buffers for pixels and bits being formatted for DMA
-static uint8_t gPixelRow[NUM_COLOR_CHANNELS][32];
-static uint8_t gPixelBits[NUM_COLOR_CHANNELS][8][4];
-static int CLOCK_DIVIDER_N;
-static int CLOCK_DIVIDER_A;
-static int CLOCK_DIVIDER_B;
-
-#endif
-
-// === RMT specific stuff ====================================
-
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_BUILTIN_DRIVER
-#define FASTLED_RMT_BUILTIN_DRIVER false
-#endif
-
-// -- Number of RMT channels to use (up to 8)
-//    Redefine this value to 1 to force serial output
-#ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
-#endif
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
-
-// =====================================================================
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    // -- Store the GPIO pin
-    gpio_num_t     mPin;
-    
-    // -- This instantiation forces a check on the pin choice
-    FastPin<DATA_PIN> mFastPin;
-    
-    // -- Save the pixel controller
-    PixelController<RGB_ORDER> * mPixels;
-
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t  mRMT_channel;
-    
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
-    // -- State information for keeping track of where we are in the pixel data
-    uint8_t *      mPixelData = NULL;
-    int            mSize = 0;
-    int            mCurByte;
-    uint16_t       mCurPulse;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t       mBufferSize;
-
-public:
-
-    void init()
-    {
-        // -- Allocate space to save the pixel controller
-        //    during parallel output
-        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
-        
-        gControllers[gNumControllers] = this;
-        int my_index = gNumControllers;
-        gNumControllers++;
-
-        // -- Store the pin
-        mPin = gpio_num_t(DATA_PIN);
-
-        if (FASTLED_ESP32_I2S) {
-            // -- One-time initialization of I2S system
-            i2sInit();
-        
-            // -- Set up the pin We have to do two things: configure the
-            //    actual GPIO pin, and route the output from the default
-            //    pin (determined by the I2S device) to the pin we
-            //    want. We compute the default pin using the index of this
-            //    controller in the array. This order is crucial because
-            //    the bits must go into the DMA buffer in the same order.
-        
-            PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
-            gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
-            pinMode(mPin,OUTPUT);
-            gpio_matrix_out(mPin, i2s_base_pin_index + my_index, false, false);
-        } else {
-            // -- One-time initialization of RMT system
-            rmtInit();
-
-            // -- RMT: Precompute rmt items corresponding to a zero bit and a one bit
-            //         according to the timing values given in the template instantiation
-            // T1H
-            mOne.level0 = 1;
-            mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-            // T1L
-            mOne.level1 = 0;
-            mOne.duration1 = TO_RMT_CYCLES(T3);
-
-            // T0H
-            mZero.level0 = 1;
-            mZero.duration0 = TO_RMT_CYCLES(T1);
-            // T0L
-            mZero.level1 = 0;
-            mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-        }
-    }
-    
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-    
-protected:
-   
-    // ----------------------------------------------------------------------
-    //  RMT Initialization
-    // ----------------------------------------------------------------------
-
-    static void rmtInit()
-    {
-        // -- Only need to do this once
-        if (gInitialized) return;
-
-        for (int i = 0; i < FASTLED_ESP32_MAX_CHANNELS; i++) {
-            gOnChannel[i] = NULL;
-
-            // -- RMT configuration for transmission
-            rmt_config_t rmt_tx;
-            rmt_tx.channel = rmt_channel_t(i);
-            rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-            rmt_tx.mem_block_num = 1;
-            rmt_tx.clk_div = DIVIDER;
-            rmt_tx.tx_config.loop_en = false;
-            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-            rmt_tx.tx_config.carrier_en = false;
-            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-            rmt_tx.tx_config.idle_output_en = true;
-                
-            // -- Apply the configuration
-            rmt_config(&rmt_tx);
-
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                rmt_driver_install(rmt_channel_t(i), 0, 0);
-            } else {
-                // -- Set up the RMT to send 1/2 of the pulse buffer and then
-                //    generate an interrupt. When we get this interrupt we
-                //    fill the other half in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-            }
-        }
-
-        // -- Create a semaphore to block execution until all the controllers are done
-        if (gTX_sem == NULL) {
-            gTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(gTX_sem);
-        }
-                
-        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, rmtInterruptHandler, 0, &gRMT_intr_handle);
-        }
-
-        gInitialized = true;
-    }
-
-    // ----------------------------------------------------------------------
-    //  I2S Initialization
-    // ----------------------------------------------------------------------
-    
-    static int pgcd(int smallest,int precision,int a,int b,int c)
-    {
-        int pgc_=1;
-        for( int i=smallest;i>0;i--) 
-        {
-            
-            if( a%i<=precision && b%i<=precision && c%i<=precision)
-            {
-                pgc_=i;
-                break;
-            }
-        }
-        return pgc_;
-    }
-    
-    static void initBitPatterns()
-    {
-        // Precompute the bit patterns based on the I2S sample rate
-        uint32_t T1ns = ESPCLKS_TO_NS(T1);
-        uint32_t T2ns = ESPCLKS_TO_NS(T2);
-        uint32_t T3ns = ESPCLKS_TO_NS(T3);
-        
-        Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
-        Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
-        Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
-        
-        /*
-         We calculate the best pcgd to the timing
-         ie
-         WS2811 77 77 154 => 1  1 2 => nb pulses= 4
-         WS2812 60 150 90 => 2 5 3 => nb pulses=10
-         */
-        int smallest=0;
-        if (T1>T2)
-            smallest=T2;
-        else
-            smallest=T1;
-        if(smallest>T3)
-            smallest=T3;
-        double freq=(double)1/(double)(T1ns + T2ns + T3ns);
-        Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
-        // Serial.printf("smallest %d\n",smallest);
-        int pgc_=1;
-        int precision=0;
-        pgc_=pgcd(smallest,precision,T1,T2,T3);
-        //Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
-        while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
-        {
-            precision++;
-            pgc_=pgcd(smallest,precision,T1,T2,T3);
-            //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
-        }
-        pgc_=pgcd(smallest,precision,T1,T2,T3);
-        Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
-        Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
-        gPulsesPerBit=(int)T1/pgc_ +(int)T2/pgc_ +(int)T3/pgc_;
-        /*
-         we calculate the duration of one pulse nd htre base frequency of the led
-         ie WS2812B F=1/(250+625+375)=800kHz or 1250ns
-         as we need 10 pulses each pulse is 125ns => frequency 800Khz*10=8MHz
-         WS2811 T=320+320+641=1281ns qnd we need 4 pulses => pulse duration 320.25ns =>frequency 3.1225605Mhz
-         
-         */
-
-        freq=1000000000L*freq*gPulsesPerBit;
-        Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
-        
-        /*
-         we do calculate the needed N a and b
-         as f=basefred/(N+b/a);
-         as a is max 63 the precision for the decimal is 1/63 
-         */
-        
-        CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
-        double v=I2S_BASE_CLK/freq-CLOCK_DIVIDER_N;
-
-        double prec=(double)1/63;
-        int a=1;
-        int b=0;
-        CLOCK_DIVIDER_A=1;
-        CLOCK_DIVIDER_B=0;
-        for(a=1;a<64;a++)
-        {
-            for(b=0;b<a;b++)
-            {
-                //printf("%d %d %f %f %f\n",b,a,v,(double)v*(double)a,fabsf(v-(double)b/a));
-                if(fabsf(v-(double)b/a) <= prec/2)
-                    break;
-            }
-            if(fabsf(v-(double)b/a) ==0)
-            {
-                CLOCK_DIVIDER_A=a;
-                CLOCK_DIVIDER_B=b;
-                break;
-            }
-            if(fabsf(v-(double)b/a) < prec/2)
-            {
-                if (fabsf(v-(double)b/a) <fabsf(v-(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A))
-                {
-                    CLOCK_DIVIDER_A=a;
-                    CLOCK_DIVIDER_B=b;
-                }
-                
-            }
-        }
-        //top take care of an issue with double 0.9999999999
-        if(CLOCK_DIVIDER_A==CLOCK_DIVIDER_B)
-        {
-            CLOCK_DIVIDER_A=1;
-            CLOCK_DIVIDER_B=0;
-            CLOCK_DIVIDER_N++;
-        }
-        
-        //printf("%d %d %f %f %d\n",CLOCK_DIVIDER_B,CLOCK_DIVIDER_A,(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A,v,CLOCK_DIVIDER_N);
-        //Serial.printf("freq %f %f\n",freq,I2S_BASE_CLK/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A));
-        freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
-        freq=freq*I2S_BASE_CLK;
-        Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
-        double pulseduration=1000000000/freq;
-        Serial.printf("Pulse duration: %f ns\n",pulseduration);
-        // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
-        
-        //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
-        
-        //int ones_for_one  = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
-        ones_for_one  = T1/pgc_ +T2/pgc_;
-        //Serial.print("One bit:  target ");
-        //Serial.print(T1ns+T2ns); Serial.print("ns --- ");
-        //Serial.print(ones_for_one); Serial.print(" 1 bits");
-        //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
-        Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
-        
-        int i = 0;
-        while ( i < ones_for_one ) {
-            gOneBit[i] = 0xFFFFFF00;
-            i++;
-        }
-        while ( i < gPulsesPerBit ) {
-            gOneBit[i] = 0x00000000;
-            i++;
-        }
-        
-        //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
-        ones_for_zero =T1/pgc_  ;
-        // Serial.print("Zero bit:  target ");
-        // Serial.print(T1ns); Serial.print("ns --- ");
-        //Serial.print(ones_for_zero); Serial.print(" 1 bits");
-        //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
-        Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
-        i = 0;
-        while ( i < ones_for_zero ) {
-            gZeroBit[i] = 0xFFFFFF00;
-            i++;
-        }
-        while ( i < gPulsesPerBit ) {
-            gZeroBit[i] = 0x00000000;
-            i++;
-        }
-        
-        memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
-        memset(gPixelBits, 0, NUM_COLOR_CHANNELS * 32);
-    }
-    
-    static DMABuffer * allocateDMABuffer(int bytes)
-    {
-        DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
-        
-        b->buffer = (uint8_t *)heap_caps_malloc(bytes, MALLOC_CAP_DMA);
-        memset(b->buffer, 0, bytes);
-        
-        b->descriptor.length = bytes;
-        b->descriptor.size = bytes;
-        b->descriptor.owner = 1;
-        b->descriptor.sosf = 1;
-        b->descriptor.buf = b->buffer;
-        b->descriptor.offset = 0;
-        b->descriptor.empty = 0;
-        b->descriptor.eof = 1;
-        b->descriptor.qe.stqe_next = 0;
-        
-        return b;
-    }
-    
-    static void i2sInit()
-    {
-        // -- Only need to do this once
-        if (gInitialized) return;
-        
-        // -- Construct the bit patterns for ones and zeros
-        initBitPatterns();
-        
-        // -- Choose whether to use I2S device 0 or device 1
-        //    Set up the various device-specific parameters
-        int interruptSource;
-        if (I2S_DEVICE == 0) {
-            i2s = &I2S0;
-            periph_module_enable(PERIPH_I2S0_MODULE);
-            interruptSource = ETS_I2S0_INTR_SOURCE;
-            i2s_base_pin_index = I2S0O_DATA_OUT0_IDX;
-        } else {
-            i2s = &I2S1;
-            periph_module_enable(PERIPH_I2S1_MODULE);
-            interruptSource = ETS_I2S1_INTR_SOURCE;
-            i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
-        }
-        
-        // -- Reset everything
-        i2sReset();
-        i2sReset_DMA();
-        i2sReset_FIFO();
-        
-        // -- Main configuration
-        i2s->conf.tx_msb_right = 1;
-        i2s->conf.tx_mono = 0;
-        i2s->conf.tx_short_sync = 0;
-        i2s->conf.tx_msb_shift = 0;
-        i2s->conf.tx_right_first = 1; // 0;//1;
-        i2s->conf.tx_slave_mod = 0;
-        
-        // -- Set parallel mode
-        i2s->conf2.val = 0;
-        i2s->conf2.lcd_en = 1;
-        i2s->conf2.lcd_tx_wrx2_en = 0; // 0 for 16 or 32 parallel output
-        i2s->conf2.lcd_tx_sdx2_en = 0; // HN
-        
-        // -- Set up the clock rate and sampling
-        i2s->sample_rate_conf.val = 0;
-        i2s->sample_rate_conf.tx_bits_mod = 32; // Number of parallel bits/pins
-        i2s->sample_rate_conf.tx_bck_div_num = 1;
-        i2s->clkm_conf.val = 0;
-        i2s->clkm_conf.clka_en = 0;
-        
-        // -- Data clock is computed as Base/(div_num + (div_b/div_a))
-        //    Base is 80Mhz, so 80/(10 + 0/1) = 8Mhz
-        //    One cycle is 125ns
-        i2s->clkm_conf.clkm_div_a = CLOCK_DIVIDER_A;
-        i2s->clkm_conf.clkm_div_b = CLOCK_DIVIDER_B;
-        i2s->clkm_conf.clkm_div_num = CLOCK_DIVIDER_N;
-        
-        i2s->fifo_conf.val = 0;
-        i2s->fifo_conf.tx_fifo_mod_force_en = 1;
-        i2s->fifo_conf.tx_fifo_mod = 3;  // 32-bit single channel data
-        i2s->fifo_conf.tx_data_num = 32; // fifo length
-        i2s->fifo_conf.dscr_en = 1;      // fifo will use dma
-        
-        i2s->conf1.val = 0;
-        i2s->conf1.tx_stop_en = 0;
-        i2s->conf1.tx_pcm_bypass = 1;
-        
-        i2s->conf_chan.val = 0;
-        i2s->conf_chan.tx_chan_mod = 1; // Mono mode, with tx_msb_right = 1, everything goes to right-channel
-        
-        i2s->timing.val = 0;
-        
-        // -- Allocate two DMA buffers
-        dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
-        dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
-        
-        // -- Arrange them as a circularly linked list
-        dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
-        dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
-       
-        // -- Allocate i2s interrupt
-        SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
-        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
-                                     &i2sInterruptHandler, 0, &gI2S_intr_handle);
-        
-        // -- Create a semaphore to block execution until all the controllers are done
-        if (gTX_sem == NULL) {
-            gTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(gTX_sem);
-        }
-        
-        // Serial.println("Init I2S");
-        gInitialized = true;
-    }
-    
-    // -- Clear the I2S DMA buffer 
-    //    Yves' trick: fill in all the bits we know ahead of time;
-    //    i.e., the parts of the zero-bit and one-bit signals where
-    //    they are *both* high or *both* low.
-    static void empty( uint32_t *buf)
-    {
-        for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
-        {
-            int offset=gPulsesPerBit*i;
-            for(int j=0;j<ones_for_zero;j++)
-                buf[offset+j]=0xffffffff;
-            
-            for(int j=ones_for_one;j<gPulsesPerBit;j++)
-                buf[offset+j]=0;
-        }
-    }
-    
-    // ----------------------------------------------------------------------
-    //  Common entry point
-    // ----------------------------------------------------------------------
-
-    // -- Show pixels
-    //    This is the main entry point for the controller.
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-        if (gNumStarted == 0) {
-            // -- First controller: make sure everything is set up
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-        }
-        
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-        (*mPixels) = pixels;
-        
-        if (FASTLED_RMT_BUILTIN_DRIVER)
-            convertAllPixelData(pixels);
-
-        // -- Keep track of the number of strips we've seen
-        gNumStarted++;
-
-        // Serial.print("Show pixels ");
-        // Serial.println(gNumStarted);
-        
-        // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual work
-        if (gNumStarted == gNumControllers) {
-            if (FASTLED_ESP32_I2S) {
-                empty((uint32_t*)dmaBuffers[0]->buffer);
-                empty((uint32_t*)dmaBuffers[1]->buffer);
-                gCurBuffer = 0;
-                gDoneFilling = false;
-            
-                // -- Prefill both buffers
-                fillBuffer();
-                fillBuffer();
-            
-                i2sStart();
-            } else {
-                // -- First, fill all the available channels
-                gNext = 0;
-                int channel = 0;
-                while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                    startNext(channel);
-                    channel++;
-                }
-            }
-                
-            
-            // -- Wait here while the rest of the data is sent. The interrupt handler
-            //    will keep refilling the RMT buffers until it is all sent; then it
-            //    gives the semaphore back.
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-            xSemaphoreGive(gTX_sem);
-            
-            if (FASTLED_ESP32_I2S)
-                i2sStop();
-            
-            // -- Reset the counters
-            gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
-        }
-    }
-    
-    // ----------------------------------------------------------------------
-    //  RMT Methods
-    // ----------------------------------------------------------------------
-
-    // -- Convert all pixels to RMT pulses
-    //    This function is only used when the user chooses to use the
-    //    built-in RMT driver, which needs all of the RMT pulses
-    //    up-front.
-    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-        mBufferSize = pixels.size() * 3 * 8;
-
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        // -- Cycle through the R,G, and B values in the right order,
-        //    storing the pulses in the big buffer
-        mCurPulse = 0;
-        int cur = 0;
-        uint32_t byteval;
-        while (pixels.has(1)) {
-            byteval = pixels.loadAndScale0();
-            convertByte(byteval);
-            byteval = pixels.loadAndScale1();
-            convertByte(byteval);
-            byteval = pixels.loadAndScale2();
-            convertByte(byteval);
-            pixels.advanceData();
-            pixels.stepDithering();
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-    }
-
-    void convertByte(uint32_t byteval)
-    {
-        // -- Write one byte's worth of RMT pulses to the big buffer
-        byteval <<= 24;
-        for (register uint32_t j = 0; j < 8; j++) {
-            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-            byteval <<= 1;
-            mCurPulse++;
-        }
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the
-    //    appropriate startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-        if (gNext < gNumControllers) {
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-            pController->startOnChannel(channel);
-            gNext++;
-        }
-    }
-
-    // -- Start this controller on the given channel
-    //    This function just initiates the RMT write; it does not wait
-    //    for it to finish.
-    void startOnChannel(int channel)
-    {
-        // -- Assign this channel and configure the RMT
-        mRMT_channel = rmt_channel_t(channel);
-
-        // -- Store a reference to this controller, so we can get it
-        //    inside the interrupt handler
-        gOnChannel[channel] = this;
-
-        // -- Assign the pin to this channel
-        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Use the built-in RMT driver to send all the data in one shot
-            rmt_register_tx_end_callback(doneOnChannel, 0);
-            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-        } else {
-            // -- Use our custom driver to send the data incrementally
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        
-            // -- Initialize the counters that keep track of where we are in
-            //    the pixel data.
-            mCurPulse = 0;
-            mCurByte = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-            
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-        }
-    }
-
-    // -- A controller is done 
-    //    This function is called when a controller finishes writing
-    //    its data. It is called either by the custom interrupt
-    //    handler (below), or as a callback from the built-in
-    //    interrupt handler. It is static because we don't know which
-    //    controller is done until we look it up.
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        // -- Turn off output on the pin
-        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-        gOnChannel[channel] = NULL;
-        gNumDone++;
-
-        if (gNumDone == gNumControllers) {
-            // -- If this is the last controller, signal that we are all done
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-        } else {
-            // -- Otherwise, if there are still controllers waiting, then
-            //    start the next one on this channel
-            if (gNext < gNumControllers)
-                startNext(channel);
-        }
-    }
-    
-    // -- Custom interrupt handler
-    //    This interrupt handler handles two cases: a controller is
-    //    done writing its data, or a controller needs to fill the
-    //    next half of the RMT buffer with data.
-    static IRAM_ATTR void rmtInterruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gOnChannel[channel] != NULL) {
-
-                // -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-                    
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-                    controller->fillHalfRMTBuffer();
-                } else {
-                    // -- Transmission is complete on this channel
-                    if (intr_st & BIT(tx_done_bit)) {
-                        RMT.int_clr.val |= BIT(tx_done_bit);
-                        doneOnChannel(rmt_channel_t(channel), 0);
-                    }
-                }
-            }
-        }
-    }
-
-    // -- Fill the RMT buffer
-    //    This function fills the next 32 slots in the RMT write
-    //    buffer with pixel data. It also handles the case where the
-    //    pixel data is exhausted, so we need to fill the RMT buffer
-    //    with zeros to signal that it's done.
-    void fillHalfRMTBuffer()
-    {
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-
-        // -- Convert (up to) 32 bits of the raw pixel data into
-        //    into RMT pulses that encode the zeros and ones.
-        int pulses = 0;
-        uint32_t byteval;
-        while (pulses < 32 && mCurByte < mSize) {
-            // -- Get one byte
-            byteval = mPixelData[mCurByte++];
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-            pulses += 8;
-        }
-
-        // -- When we reach the end of the pixel data, fill the rest of the
-        //    RMT buffer with 0's, which signals to the device that we're done.
-        if (mCurByte == mSize) {
-            while (pulses < 32) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulses++;
-            }
-        }
-        
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-
-    // ----------------------------------------------------------------------
-    //  I2S Methods
-    // ----------------------------------------------------------------------
-
-    // -- Custom interrupt handler
-    static IRAM_ATTR void i2sInterruptHandler(void *arg)
-    {
-        if (i2s->int_st.out_eof) {
-            i2s->int_clr.val = i2s->int_raw.val;
-            
-            if ( ! gDoneFilling) {
-                fillBuffer();
-            } else {
-                portBASE_TYPE HPTaskAwoken = 0;
-                xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-            }
-        }
-    }
-    
-    static void fillBuffer()
-    {
-        volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
-        gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
-        
-        // -- Get the requested pixel from each controller. Store the
-        //    data for each color channel in a separate array.
-        uint32_t has_data_mask = 0;
-        for (int i = 0; i < gNumControllers; i++) {
-            // -- Store the pixels in reverse controller order starting at index 23
-            //    This causes the bits to come out in the right position after we
-            //    transpose them.
-            int bit_index = 23-i;
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
-            if (pController->mPixels->has(1)) {
-                gPixelRow[0][bit_index] = pController->mPixels->loadAndScale0();
-                gPixelRow[1][bit_index] = pController->mPixels->loadAndScale1();
-                gPixelRow[2][bit_index] = pController->mPixels->loadAndScale2();
-                pController->mPixels->advanceData();
-                pController->mPixels->stepDithering();
-                
-                // -- Record that this controller still has data to send
-                has_data_mask |= (1 << (i+8));
-            }
-        }
-        
-        if (has_data_mask == 0) {
-            gDoneFilling = true;
-            return;
-        }
-        
-        // -- Transpose and encode the pixel data for the DMA buffer
-        int buf_index = 0;
-        for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
-            
-            // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
-            transpose32(gPixelRow[channel], gPixelBits[channel][0] );
-            
-            //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
-            for (int bitnum = 0; bitnum < 8; bitnum++) {
-                uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
-                uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
-                
-               /* for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
-                    buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );*/
-                //when the loop is too big  => issues in timing hence i only fill the the 1
-                for(int pulse_num=ones_for_zero;pulse_num<ones_for_one;pulse_num++) {
-                    buf[bitnum*gPulsesPerBit+channel*8*gPulsesPerBit+pulse_num] = has_data_mask & bit;
-                    //if (buf[buf_index-1] & 0x100) Serial.print("1");
-                    //else Serial.print("0");
-                }
-            }
-        }
-    }
-    
-    static void transpose32(uint8_t * pixels, uint8_t * bits)
-    {
-        transpose8rS32(& pixels[0],  1, 4, & bits[0]);
-        transpose8rS32(& pixels[8],  1, 4, & bits[1]);
-        transpose8rS32(& pixels[16], 1, 4, & bits[2]);
-        //transpose8rS32(& pixels[24], 1, 4, & bits[3]);
-    }
-    
-    static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B)
-    {
-        uint32_t x, y, t;
-        
-        // Load the array and pack it into x and y.
-        
-        x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
-        y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
-        
-        t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
-        t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
-        
-        t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
-        t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
-        
-        t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
-        y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
-        x = t;
-        
-        B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x;
-        B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y;
-    }
-    
-    /** Start I2S transmission
-     */
-    static void i2sStart()
-    {
-        // esp_intr_disable(gI2S_intr_handle);
-        // Serial.println("I2S start");
-        i2sReset();
-        //Serial.println(dmaBuffers[0]->sampleCount());
-        i2s->lc_conf.val=I2S_OUT_DATA_BURST_EN | I2S_OUTDSCR_BURST_EN | I2S_OUT_DATA_BURST_EN;
-        i2s->out_link.addr = (uint32_t) & (dmaBuffers[0]->descriptor);
-        i2s->out_link.start = 1;
-        ////vTaskDelay(5);
-        i2s->int_clr.val = i2s->int_raw.val;
-        // //vTaskDelay(5);
-        i2s->int_ena.out_dscr_err = 1;
-        //enable interrupt
-        ////vTaskDelay(5);
-        esp_intr_enable(gI2S_intr_handle);
-        // //vTaskDelay(5);
-        i2s->int_ena.val = 0;
-        i2s->int_ena.out_eof = 1;
-        
-        //start transmission
-        i2s->conf.tx_start = 1;
-    }
-    
-    static void i2sReset()
-    {
-        // Serial.println("I2S reset");
-        const unsigned long lc_conf_reset_flags = I2S_IN_RST_M | I2S_OUT_RST_M | I2S_AHBM_RST_M | I2S_AHBM_FIFO_RST_M;
-        i2s->lc_conf.val |= lc_conf_reset_flags;
-        i2s->lc_conf.val &= ~lc_conf_reset_flags;
-        
-        const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
-        i2s->conf.val |= conf_reset_flags;
-        i2s->conf.val &= ~conf_reset_flags;
-    }
-    
-    static void i2sReset_DMA()
-    {
-        i2s->lc_conf.in_rst=1; i2s->lc_conf.in_rst=0;
-        i2s->lc_conf.out_rst=1; i2s->lc_conf.out_rst=0;
-    }
-    
-    static void i2sReset_FIFO()
-    {
-        i2s->conf.rx_fifo_reset=1; i2s->conf.rx_fifo_reset=0;
-        i2s->conf.tx_fifo_reset=1; i2s->conf.tx_fifo_reset=0;
-    }
-    
-    static void i2sStop()
-    {
-        // Serial.println("I2S stop");
-        esp_intr_disable(gI2S_intr_handle);
-        i2sReset();
-        i2s->conf.rx_start = 0;
-        i2s->conf.tx_start = 0;
-    }
-};
-
-FASTLED_NAMESPACE_END

From e5a68f195371a626c75da4006b87492c2e930dff Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 30 Apr 2019 12:03:09 -0400
Subject: [PATCH 050/204] This was added by accident

---
 platforms/esp/32/clockless_esp32.h.orig | 786 ------------------------
 1 file changed, 786 deletions(-)
 delete mode 100644 platforms/esp/32/clockless_esp32.h.orig

diff --git a/platforms/esp/32/clockless_esp32.h.orig b/platforms/esp/32/clockless_esp32.h.orig
deleted file mode 100644
index e0cd00dae9..0000000000
--- a/platforms/esp/32/clockless_esp32.h.orig
+++ /dev/null
@@ -1,786 +0,0 @@
-/*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Modifications Copyright (c) 2018 Samuel Z. Guyer
- *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently to send sequences
- * of high/low bits. Memory for each channel is limited, however, so
- * in order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * Since the RMT device only has 8 channels, we need a strategy to
- * allow more than 8 LED controllers. Our driver assigns controllers
- * to channels on the fly, queuing up controllers as necessary until a
- * channel is free. The main showPixels routine just fires off the
- * first 8 controllers; the interrupt handler starts new controllers
- * asynchronously as previous ones finish. So, for example, it can
- * send the data for 8 controllers simultaneously, but 16 controllers
- * would take approximately twice as much time.
- *
- * There is a #define that allows a program to control the total
- * number of channels that the driver is allowed to use. It defaults
- * to 8 -- use all the channels. Setting it to 1, for example, results
- * in fully serial output:
- *
- *     #define FASTLED_RMT_MAX_CHANNELS 1
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT interrupt
- * handler, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
- *
- * There may be a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- */
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp32-hal.h"
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "driver/rmt.h"
-#include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
-#include "esp_log.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_BUILTIN_DRIVER
-#define FASTLED_RMT_BUILTIN_DRIVER false
-#endif
-
-// -- Max number of controllers we can support
-#ifndef FASTLED_RMT_MAX_CONTROLLERS
-#define FASTLED_RMT_MAX_CONTROLLERS 32
-#endif
-
-// -- Number of RMT channels to use (up to 8)
-//    Redefine this value to 1 to force serial output
-#ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
-#endif
-
-// -- Array of all controllers
-static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-static bool gInitialized = false;
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t  mRMT_channel;
-
-    // -- Store the GPIO pin
-    gpio_num_t     mPin;
-<<<<<<< HEAD
-
-    // -- This instantiation forces a check on the pin choice
-    FastPin<DATA_PIN> mFastPin;
-
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
-=======
-
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
->>>>>>> upstream/master
-    // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels = NULL;
-    void *         mPixelSpace = NULL;
-    uint8_t        mRGB_channel;
-    uint16_t       mCurPulse;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t       mBufferSize;
-
-public:
-
-    virtual void init()
-    {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-<<<<<<< HEAD
-        gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-        mPin = gpio_num_t(DATA_PIN);
-=======
-	gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-	mPin = gpio_num_t(DATA_PIN);
->>>>>>> upstream/master
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    void initRMT()
-    {
-<<<<<<< HEAD
-        // -- Only need to do this once
-        if (gInitialized) return;
-
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-            gOnChannel[i] = NULL;
-
-            // -- RMT configuration for transmission
-            rmt_config_t rmt_tx;
-            rmt_tx.channel = rmt_channel_t(i);
-            rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-            rmt_tx.mem_block_num = 1;
-            rmt_tx.clk_div = DIVIDER;
-            rmt_tx.tx_config.loop_en = false;
-            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-            rmt_tx.tx_config.carrier_en = false;
-            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-            rmt_tx.tx_config.idle_output_en = true;
-                
-            // -- Apply the configuration
-            rmt_config(&rmt_tx);
-
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                rmt_driver_install(rmt_channel_t(i), 0, 0);
-            } else {
-                // -- Set up the RMT to send 1/2 of the pulse buffer and then
-                //    generate an interrupt. When we get this interrupt we
-                //    fill the other half in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-            }
-        }
-
-        // -- Create a semaphore to block execution until all the controllers are done
-        if (gTX_sem == NULL) {
-            gTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(gTX_sem);
-        }
-                
-        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-        }
-
-        gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-        if (gNumStarted == 0) {
-            // -- First controller: make sure everything is set up
-            initRMT();
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-        }
-
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-
-        if (mPixels != NULL) delete mPixels;
-        mPixels = new PixelController<RGB_ORDER>(pixels);
-        
-        // -- Keep track of the number of strips we've seen
-        gNumStarted++;
-
-        // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual worl
-        if (gNumStarted == gNumControllers) {
-            gNext = 0;
-
-            // -- First, fill all the available channels
-            int channel = 0;
-            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                startNext(channel);
-                channel++;
-            }
-
-            // -- Wait here while the rest of the data is sent. The interrupt handler
-            //    will keep refilling the RMT buffers until it is all sent; then it
-            //    gives the semaphore back.
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-            xSemaphoreGive(gTX_sem);
-
-            // -- Reset the counters
-            gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
-        }
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-        if (gNext < gNumControllers) {
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-            pController->startOnChannel(channel);
-            gNext++;
-        }
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-        // -- Assign this channel and configure the RMT
-        mRMT_channel = rmt_channel_t(channel);
-
-        // -- Store a reference to this controller, so we can get it
-        //    inside the interrupt handler
-        gOnChannel[channel] = this;
-
-        // -- Assign the pin to this channel
-        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Use the built-in RMT driver to send all the data in one shot
-            rmt_register_tx_end_callback(doneOnChannel, 0);
-            writeAllRMTItems();
-        } else {
-            // -- Use our custom driver to send the data incrementally
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        
-            // -- Initialize the counters that keep track of where we are in
-            //    the pixel data.
-            mCurPulse = 0;
-            mRGB_channel = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-            
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-        }
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        // -- Turn off output on the pin
-        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-        gOnChannel[channel] = NULL;
-        gNumDone++;
-
-        if (gNumDone == gNumControllers) {
-            // -- If this is the last controller, signal that we are all done
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-        } else {
-            // -- Otherwise, if there are still controllers waiting, then
-            //    start the next one on this channel
-            if (gNext < gNumControllers)
-                startNext(channel);
-        }
-=======
-	// -- Only need to do this once
-	if (gInitialized) return;
-
-	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-	    gOnChannel[i] = NULL;
-
-	    // -- RMT configuration for transmission
-	    rmt_config_t rmt_tx;
-	    rmt_tx.channel = rmt_channel_t(i);
-	    rmt_tx.rmt_mode = RMT_MODE_TX;
-	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-	    rmt_tx.mem_block_num = 1;
-	    rmt_tx.clk_div = DIVIDER;
-	    rmt_tx.tx_config.loop_en = false;
-	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-	    rmt_tx.tx_config.carrier_en = false;
-	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-	    rmt_tx.tx_config.idle_output_en = true;
-		
-	    // -- Apply the configuration
-	    rmt_config(&rmt_tx);
-
-	    if (FASTLED_RMT_BUILTIN_DRIVER) {
-		rmt_driver_install(rmt_channel_t(i), 0, 0);
-	    } else {
-		// -- Set up the RMT to send 1/2 of the pulse buffer and then
-		//    generate an interrupt. When we get this interrupt we
-		//    fill the other half in preparation (kind of like double-buffering)
-		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-	    }
-	}
-
-	// -- Create a semaphore to block execution until all the controllers are done
-	if (gTX_sem == NULL) {
-	    gTX_sem = xSemaphoreCreateBinary();
-	    xSemaphoreGive(gTX_sem);
-	}
-		
-	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Allocate the interrupt if we have not done so yet. This
-	    //    interrupt handler must work for all different kinds of
-	    //    strips, so it delegates to the refill function for each
-	    //    specific instantiation of ClocklessController.
-	    if (gRMT_intr_handle == NULL)
-		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-	}
-
-	gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-	if (gNumStarted == 0) {
-	    // -- First controller: make sure everything is set up
-	    initRMT();
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	}
-
-	// -- Initialize the local state, save a pointer to the pixel
-	//    data. We need to make a copy because pixels is a local
-	//    variable in the calling function, and this data structure
-	//    needs to outlive this call to showPixels.
-
-	if (mPixels != NULL) delete mPixels;
-	mPixels = new PixelController<RGB_ORDER>(pixels);
-	
-	// -- Keep track of the number of strips we've seen
-	gNumStarted++;
-
-	// -- The last call to showPixels is the one responsible for doing
-	//    all of the actual worl
-	if (gNumStarted == gNumControllers) {
-	    gNext = 0;
-
-	    // -- First, fill all the available channels
-	    int channel = 0;
-	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-		startNext(channel);
-		channel++;
-	    }
-
-	    // -- Wait here while the rest of the data is sent. The interrupt handler
-	    //    will keep refilling the RMT buffers until it is all sent; then it
-	    //    gives the semaphore back.
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	    xSemaphoreGive(gTX_sem);
-
-	    // -- Reset the counters
-	    gNumStarted = 0;
-	    gNumDone = 0;
-	    gNext = 0;
-	}
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-	if (gNext < gNumControllers) {
-	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-	    pController->startOnChannel(channel);
-	    gNext++;
-	}
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-	// -- Assign this channel and configure the RMT
-	mRMT_channel = rmt_channel_t(channel);
-
-	// -- Store a reference to this controller, so we can get it
-	//    inside the interrupt handler
-	gOnChannel[channel] = this;
-
-	// -- Assign the pin to this channel
-	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-	if (FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Use the built-in RMT driver to send all the data in one shot
-	    rmt_register_tx_end_callback(doneOnChannel, 0);
-	    writeAllRMTItems();
-	} else {
-	    // -- Use our custom driver to send the data incrementally
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	
-	    // -- Initialize the counters that keep track of where we are in
-	    //    the pixel data.
-	    mCurPulse = 0;
-	    mRGB_channel = 0;
-
-	    // -- Fill both halves of the buffer
-	    fillHalfRMTBuffer();
-	    fillHalfRMTBuffer();
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	    
-	    // -- Start the RMT TX operation
-	    rmt_tx_start(mRMT_channel, true);
-	}
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-	// -- Turn off output on the pin
-	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-	gOnChannel[channel] = NULL;
-	gNumDone++;
-
-	if (gNumDone == gNumControllers) {
-	    // -- If this is the last controller, signal that we are all done
-	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-	} else {
-	    // -- Otherwise, if there are still controllers waiting, then
-	    //    start the next one on this channel
-	    if (gNext < gNumControllers)
-		startNext(channel);
-	}
->>>>>>> upstream/master
-    }
-    
-    static IRAM_ATTR void interruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gOnChannel[channel] != NULL) {
-
-<<<<<<< HEAD
-                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-                // -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    controller->fillHalfRMTBuffer();
-                }
-
-                // -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    doneOnChannel(rmt_channel_t(channel), 0);
-=======
-		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-		// -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-		    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-		    controller->fillHalfRMTBuffer();
-                }
-
-		// -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-		    doneOnChannel(rmt_channel_t(channel), 0);
->>>>>>> upstream/master
-                }
-            }
-        }
-    }
-
-    virtual void fillHalfRMTBuffer()
-    {
-        // -- Fill half of the RMT pulse buffer
-
-        //    The buffer holds 64 total pulse items, so this loop converts
-        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
-        //    32 items). In our case, each pixel consists of three bytes,
-        //    each bit turns into one pulse item -- 24 items per pixel. So,
-        //    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-        //    The member variable mCurPulse keeps track of which of the 64
-        //    items we are writing. During the first call to this method it
-        //    fills 0-31; in the second call it fills 32-63, and then wraps
-        //    back around to zero.
-
-        //    When we run out of pixel data, just fill the remaining items
-        //    with zero pulses.
-
-        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
-        uint32_t byteval = 0;
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-        bool done_strip = false;
-
-        while (pulse_count < MAX_PULSES) {
-            if (! mPixels->has(1)) {
-<<<<<<< HEAD
-                if (mCurPulse > 0) {
-                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
-                    //    sure if this is really necessary.
-                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-                }
-=======
->>>>>>> upstream/master
-                done_strip = true;
-                break;
-            }
-
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulse_count++;
-            }
-<<<<<<< HEAD
-=======
-
-	    if (done_strip)
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
->>>>>>> upstream/master
-        }
-        
-        if (done_strip) {
-            // -- And fill the remaining items with zero pulses. The zero values triggers
-            //    the tx_done interrupt.
-            while (pulse_count < MAX_PULSES) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-
-    virtual void writeAllRMTItems()
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-<<<<<<< HEAD
-        mBufferSize = mPixels->size() * 3 * 8;
-=======
-	mBufferSize = mPixels->size() * 3 * 8;
->>>>>>> upstream/master
-
-        // TODO: need a specific number here
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        mCurPulse = 0;
-        mRGB_channel = 0;
-        uint32_t byteval = 0;
-        while (mPixels->has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-
-<<<<<<< HEAD
-        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-=======
-	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
->>>>>>> upstream/master
-    }
-};
-
-FASTLED_NAMESPACE_END

From 1c2d03fcd521f5d6c659460940894cb3fe46bd10 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 2 May 2019 14:49:25 -0400
Subject: [PATCH 051/204] Changed the RMT driver so that it no longer needs to
 copy all the pixel data up front, which was slowing it down and using a lot
 of extra memory

---
 platforms/esp/32/clockless_rmt_esp32.h | 90 +++++++++++++-------------
 1 file changed, 46 insertions(+), 44 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index 248325ad3b..d010b242b8 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -112,6 +112,7 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 }
 
 #define FASTLED_HAS_CLOCKLESS 1
+#define NUM_COLOR_CHANNELS 3
 
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
@@ -185,10 +186,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t   mZero;
     rmt_item32_t   mOne;
 
-    // -- State information for keeping track of where we are in the pixel data
-    uint8_t *      mPixelData = NULL;
-    int            mSize = 0;
-    int            mCurByte;
+    // -- Save the pixel controller
+    PixelController<RGB_ORDER> * mPixels;
+    int            mCurColor;
     uint16_t       mCurPulse;
 
     // -- Buffer to hold all of the pulses. For the version that uses
@@ -200,6 +200,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     void init()
     {
+        // -- Allocate space to save the pixel controller
+        //    during parallel output
+        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
+        
         // -- Precompute rmt items corresponding to a zero bit and a one bit
         //    according to the timing values given in the template instantiation
         // T1H
@@ -288,17 +292,15 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
         }
 
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-
-        //if (mPixels != NULL) delete mPixels;
-        //mPixels = new PixelController<RGB_ORDER>(pixels);
         if (FASTLED_RMT_BUILTIN_DRIVER)
             convertAllPixelData(pixels);
-        else
-            copyPixelData(pixels);
+        else {
+            // -- Initialize the local state, save a pointer to the pixel
+            //    data. We need to make a copy because pixels is a local
+            //    variable in the calling function, and this data structure
+            //    needs to outlive this call to showPixels.
+            (*mPixels) = pixels;
+        }        
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
@@ -328,33 +330,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-    // -- Copy pixel data
-    //    Make a safe copy of the pixel data, so that the FastLED show
-    //    function can continue to the next controller while the RMT
-    //    device starts sending this data asynchronously.
-    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Make sure we have a buffer of the right size
-        //    (3 bytes per pixel)
-        int size_needed = pixels.size() * 3;
-        if (size_needed > mSize) {
-            if (mPixelData != NULL) free(mPixelData);
-            mSize = size_needed;
-            mPixelData = (uint8_t *) malloc( mSize);
-        }
-
-        // -- Cycle through the R,G, and B values in the right order,
-        //    storing the resulting raw pixel data in the buffer.
-        int cur = 0;
-        while (pixels.has(1)) {
-            mPixelData[cur++] = pixels.loadAndScale0();
-            mPixelData[cur++] = pixels.loadAndScale1();
-            mPixelData[cur++] = pixels.loadAndScale2();
-            pixels.advanceData();
-            pixels.stepDithering();
-        }
-    }
-
     // -- Convert all pixels to RMT pulses
     //    This function is only used when the user chooses to use the
     //    built-in RMT driver, which needs all of the RMT pulses
@@ -440,7 +415,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
             mCurPulse = 0;
-            mCurByte = 0;
+            mCurColor = 0;
 
             // -- Fill both halves of the buffer
             fillHalfRMTBuffer();
@@ -519,6 +494,33 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    uint8_t getNextByte()
+    {
+        uint8_t byte;
+
+        // -- Cycle through the color channels
+        switch (mCurColor) {
+        case 0: 
+            byte = mPixels->loadAndScale0();
+            break;
+        case 1: 
+            byte = mPixels->loadAndScale0();
+            break;
+        case 2: 
+            byte = mPixels->loadAndScale0();
+            mPixels->advanceData();
+            mPixels->stepDithering();
+            break;
+        default:
+            // -- This is bad!
+            byte = 0;
+        }
+
+        mCurColor = (mCurColor + 1) % NUM_COLOR_CHANNELS;
+
+        return byte;
+    }
+
     // -- Fill the RMT buffer
     //    This function fills the next 32 slots in the RMT write
     //    buffer with pixel data. It also handles the case where the
@@ -533,9 +535,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    into RMT pulses that encode the zeros and ones.
         int pulses = 0;
         uint32_t byteval;
-        while (pulses < 32 && mCurByte < mSize) {
+        while (pulses < 32 && mPixels->has(1)) {
             // -- Get one byte
-            byteval = mPixelData[mCurByte++];
+            byteval = getNextByte();
             byteval <<= 24;
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
@@ -550,7 +552,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         // -- When we reach the end of the pixel data, fill the rest of the
         //    RMT buffer with 0's, which signals to the device that we're done.
-        if (mCurByte == mSize) {
+        if ( ! mPixels->has(1) ) {
             while (pulses < 32) {
                 RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
                 mCurPulse++;

From c4b0202406618a6d9757d35f06fce86395bcbd86 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 7 May 2019 12:25:27 -0400
Subject: [PATCH 052/204] Fixed a typo: make sure to load a different channel
 each time

---
 platforms/esp/32/clockless_rmt_esp32.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index d010b242b8..caefa141a8 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -504,10 +504,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             byte = mPixels->loadAndScale0();
             break;
         case 1: 
-            byte = mPixels->loadAndScale0();
+            byte = mPixels->loadAndScale1();
             break;
         case 2: 
-            byte = mPixels->loadAndScale0();
+            byte = mPixels->loadAndScale2();
             mPixels->advanceData();
             mPixels->stepDithering();
             break;

From 408e36d6353104dd8136dde539ffa7da8dc319d2 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 14 May 2019 11:33:06 -0400
Subject: [PATCH 053/204] Commented out all the Serial.print output

---
 platforms/esp/32/clockless_i2s_esp32.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
index 07b2ed5d67..a4d15ba750 100644
--- a/platforms/esp/32/clockless_i2s_esp32.h
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -259,16 +259,16 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     static void initBitPatterns()
     {
         // Precompute the bit patterns based on the I2S sample rate
-        Serial.println("Setting up fastled using I2S");
+        // Serial.println("Setting up fastled using I2S");
 
         // -- First, convert back to ns from CPU clocks
         uint32_t T1ns = ESPCLKS_TO_NS(T1);
         uint32_t T2ns = ESPCLKS_TO_NS(T2);
         uint32_t T3ns = ESPCLKS_TO_NS(T3);
         
-        Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
-        Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
-        Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
+        // Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
+        // Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
+        // Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
         
         /*
          We calculate the best pcgd to the timing
@@ -284,7 +284,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if(smallest>T3)
             smallest=T3;
         double freq=(double)1/(double)(T1ns + T2ns + T3ns);
-        Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
+        // Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
        // Serial.printf("smallest %d\n",smallest);
         int pgc_=1;
         int precision=0;
@@ -297,8 +297,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
         }
         pgc_=pgcd(smallest,precision,T1,T2,T3);
-        Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
-        Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
+        // Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
+        // Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
         gPulsesPerBit=(int)T1/pgc_ +(int)T2/pgc_ +(int)T3/pgc_;
         /*
          we calculate the duration of one pulse nd htre base frequency of the led
@@ -309,7 +309,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
          */
 
         freq=1000000000L*freq*gPulsesPerBit;
-        Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
+        // Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
         
         /*
          we do calculate the needed N a and b
@@ -362,9 +362,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //Serial.printf("freq %f %f\n",freq,I2S_BASE_CLK/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A));
         freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
         freq=freq*I2S_BASE_CLK;
-        Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
+        // Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
         double pulseduration=1000000000/freq;
-        Serial.printf("Pulse duration: %f ns\n",pulseduration);
+        // Serial.printf("Pulse duration: %f ns\n",pulseduration);
         // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
         
         //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
@@ -375,7 +375,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //Serial.print(T1ns+T2ns); Serial.print("ns --- ");
         //Serial.print(ones_for_one); Serial.print(" 1 bits");
         //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
-        Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
+        // Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
         
         
         int i = 0;
@@ -394,7 +394,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
        // Serial.print(T1ns); Serial.print("ns --- ");
         //Serial.print(ones_for_zero); Serial.print(" 1 bits");
         //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
-        Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
+        // Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
         i = 0;
         while ( i < ones_for_zero ) {
             gZeroBit[i] = 0xFFFFFF00;

From 6b6c9c1c35f235e846334d9cd8984b4b4399f471 Mon Sep 17 00:00:00 2001
From: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>
Date: Wed, 15 May 2019 06:46:59 -0400
Subject: [PATCH 054/204] Removed conflicting (and unneeded) typedef of
 'boolean', fixes #790

---
 platforms/esp/32/led_sysdefs_esp32.h     | 2 +-
 platforms/esp/8266/led_sysdefs_esp8266.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/32/led_sysdefs_esp32.h b/platforms/esp/32/led_sysdefs_esp32.h
index 68e782398e..5cd374e2f4 100644
--- a/platforms/esp/32/led_sysdefs_esp32.h
+++ b/platforms/esp/32/led_sysdefs_esp32.h
@@ -12,7 +12,7 @@
 typedef volatile uint32_t RoReg;
 typedef volatile uint32_t RwReg;
 typedef unsigned long prog_uint32_t;
-typedef bool boolean;
+
 
 // Default to NOT using PROGMEM here
 #ifndef FASTLED_USE_PROGMEM
diff --git a/platforms/esp/8266/led_sysdefs_esp8266.h b/platforms/esp/8266/led_sysdefs_esp8266.h
index a3f6ac3237..26dffdcf52 100644
--- a/platforms/esp/8266/led_sysdefs_esp8266.h
+++ b/platforms/esp/8266/led_sysdefs_esp8266.h
@@ -12,7 +12,7 @@
 typedef volatile uint32_t RoReg;
 typedef volatile uint32_t RwReg;
 typedef uint32_t prog_uint32_t;
-typedef uint8_t boolean;
+
 
 // Default to NOT using PROGMEM here
 #ifndef FASTLED_USE_PROGMEM

From e86f850e932e792772c158bc8538b57c5a411eb4 Mon Sep 17 00:00:00 2001
From: starkatt <50758044+starkatt@users.noreply.github.com>
Date: Fri, 17 May 2019 19:47:41 -0700
Subject: [PATCH 055/204] Update fastpin_arm_d21 add ItsyBitsy M0 (#794)

Add support for Adafruit ItsyBitsy M0 including onboard APA102 ("Dotstar") LED
---
 platforms/arm/d21/fastpin_arm_d21.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index e35b4cd8e0..1606d65084 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -188,8 +188,24 @@ _DEFPIN_ARM( 3, 0, 7); _DEFPIN_ARM( 4, 0, 6); _DEFPIN_ARM( 7, 0, 0); _DEFPIN_ARM
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
-#endif
+#elif defined(ADAFRUIT_ITSYBITSY_M0)
+
+#define MAX_PIN 16
+_DEFPIN_ARM( 2, 0, 14); _DEFPIN_ARM( 3, 0, 9); _DEFPIN_ARM( 4, 0, 8);
+_DEFPIN_ARM( 5, 0, 15); _DEFPIN_ARM( 6, 0, 20); _DEFPIN_ARM( 7, 0, 21);
+_DEFPIN_ARM( 8, 0, 6); _DEFPIN_ARM( 9, 0, 7); _DEFPIN_ARM( 10, 0, 18);
+_DEFPIN_ARM( 11, 0, 16); _DEFPIN_ARM( 12, 0, 19); _DEFPIN_ARM( 13, 0, 17);
+_DEFPIN_ARM( 29, 0, 10); // MOSI
+_DEFPIN_ARM( 30, 0, 11); // SCK
+_DEFPIN_ARM( 40, 0, 0); //APA102 Clock
+_DEFPIN_ARM( 41, 0, 1) //APA102 Data
+
+#define SPI_DATA  29
+#define SPI_CLOCK 30
 
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#endif
 
 
 #endif // FASTLED_FORCE_SOFTWARE_PINS

From fe94a0ce3b71f368a81eed5685f6082df80894c3 Mon Sep 17 00:00:00 2001
From: devoh747 <dvohwinkel@nc.rr.com>
Date: Thu, 6 Jun 2019 23:59:05 -0400
Subject: [PATCH 056/204] Added in manitou48 's fixes to FastLed for ItsyBitsy
 M4 (SADM51 boards) (#803)

* Files fixed by manitou48 m4 feather express update

I have not been able to find user manitou48 so I forked his changes and am trying to make a pull request to get it into FastLED.

* delete led_sysdefs.h

move led_sysdef.h to root of FastLED where it belongs.

* Delete platforms.h

Move platform.h to root of FastLED where it belongs.

* move files to root of FastLED

The led_sysdefs.h and platforms.h files belong in the root of the FastLED directory, not in platforms/arm/d51
---
 led_sysdefs.h                         |   2 +-
 platforms.h                           |   2 +-
 platforms/arm/d51/README.txt          |   4 +
 platforms/arm/d51/clockless_arm_d51.h | 140 +++++++++++++++++++-------
 platforms/arm/d51/fastpin_arm_d51.h   |  23 ++++-
 5 files changed, 131 insertions(+), 40 deletions(-)
 create mode 100644 platforms/arm/d51/README.txt

diff --git a/led_sysdefs.h b/led_sysdefs.h
index 13fbf4c32a..ea8c14f443 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -21,7 +21,7 @@
 #include "platforms/arm/sam/led_sysdefs_arm_sam.h"
 #elif defined(STM32F10X_MD) || defined(__STM32F1__)
 #include "platforms/arm/stm32/led_sysdefs_arm_stm32.h"
-#elif defined(__SAMD21G18A__) || defined(__SAMD21J18A__) || defined(__SAMD21E17A__) || defined(__SAMD21E18A__) || defined(__SAMD51G19A__)
+#elif defined(__SAMD21G18A__) || defined(__SAMD21J18A__) || defined(__SAMD21E17A__) || defined(__SAMD21E18A__) || defined(__SAMD51G19A__) || defined(__SAMD51J19A__)
 #include "platforms/arm/d21/led_sysdefs_arm_d21.h"
 #elif defined(ESP8266)
 #include "platforms/esp/8266/led_sysdefs_esp8266.h"
diff --git a/platforms.h b/platforms.h
index 88bf462d4e..625791bc27 100644
--- a/platforms.h
+++ b/platforms.h
@@ -23,7 +23,7 @@
 #include "platforms/arm/stm32/fastled_arm_stm32.h"
 #elif defined(__SAMD21G18A__) || defined(__SAMD21J18A__) || defined(__SAMD21E17A__) || defined(__SAMD21E18A__)
 #include "platforms/arm/d21/fastled_arm_d21.h"
-#elif defined(__SAMD51G19A__)
+#elif defined(__SAMD51G19A__) || defined(__SAMD51J19A__)
 #include "platforms/arm/d51/fastled_arm_d51.h"
 #elif defined(ESP8266)
 #include "platforms/esp/8266/fastled_esp8266.h"
diff --git a/platforms/arm/d51/README.txt b/platforms/arm/d51/README.txt
new file mode 100644
index 0000000000..b00fb670af
--- /dev/null
+++ b/platforms/arm/d51/README.txt
@@ -0,0 +1,4 @@
+FastLED updates for adafruit FEATHER M4 and fixes to ITSBITSY M4 compiles
+  SAMD51
+
+only tested on FEATHER M4 with DOTSTAR and neopixel strips
diff --git a/platforms/arm/d51/clockless_arm_d51.h b/platforms/arm/d51/clockless_arm_d51.h
index a543ec18ab..0c3f6d4dac 100644
--- a/platforms/arm/d51/clockless_arm_d51.h
+++ b/platforms/arm/d51/clockless_arm_d51.h
@@ -1,62 +1,128 @@
 #ifndef __INC_CLOCKLESS_ARM_D51
 #define __INC_CLOCKLESS_ARM_D51
 
-// D51 is an M4 chip, however the M0 clockless logic seems to work.
-#include "../common/m0clockless.h"
 FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for SAMD51
+// See clockless.h for detailed info on how the template parameters are used.
+#define ARM_DEMCR               (*(volatile uint32_t *)0xE000EDFC) // Debug Exception and Monitor Control
+#define ARM_DEMCR_TRCENA                (1 << 24)        // Enable debugging & monitoring blocks
+#define ARM_DWT_CTRL            (*(volatile uint32_t *)0xE0001000) // DWT control register
+#define ARM_DWT_CTRL_CYCCNTENA          (1 << 0)                // Enable cycle count
+#define ARM_DWT_CYCCNT          (*(volatile uint32_t *)0xE0001004) // Cycle count register
+
+
 #define FASTLED_HAS_CLOCKLESS 1
 
-template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
-  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
-  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
 
-  data_t mPinMask;
-  data_ptr_t mPort;
-  CMinWait<WAIT_TIME> mWait;
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
 public:
-  virtual void init() {
-    FastPinBB<DATA_PIN>::setOutput();
-    mPinMask = FastPinBB<DATA_PIN>::mask();
-    mPort = FastPinBB<DATA_PIN>::port();
-  }
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+		mPinMask = FastPin<DATA_PIN>::mask();
+		mPort = FastPin<DATA_PIN>::port();
+	}
 
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
-  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
     mWait.wait();
-    cli();
-    if(!showRGBInternal(pixels)) {
+		if(!showRGBInternal(pixels)) {
       sei(); delayMicroseconds(WAIT_TIME); cli();
       showRGBInternal(pixels);
     }
-    sei();
     mWait.mark();
   }
 
-  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-  // gcc will use register Y for the this pointer.
-  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-    struct M0ClocklessData data;
-    data.d[0] = pixels.d[0];
-    data.d[1] = pixels.d[1];
-    data.d[2] = pixels.d[2];
-    data.s[0] = pixels.mScale[0];
-    data.s[1] = pixels.mScale[1];
-    data.s[2] = pixels.mScale[2];
-    data.e[0] = pixels.e[0];
-    data.e[1] = pixels.e[1];
-    data.e[2] = pixels.e[2];
-    data.adj = pixels.mAdvance;
-
-    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
-    return showLedData<8,4,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
-  }
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
+		for(register uint32_t i = BITS-1; i > 0; i--) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+			FastPin<DATA_PIN>::fastset(port, hi);
+			if(b&0x80) {
+				while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+				FastPin<DATA_PIN>::fastset(port, lo);
+			} else {
+				while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+				FastPin<DATA_PIN>::fastset(port, lo);
+			}
+			b <<= 1;
+		}
+
+		while(ARM_DWT_CYCCNT < next_mark);
+		next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+		FastPin<DATA_PIN>::fastset(port, hi);
+
+		if(b&0x80) {
+			while((next_mark - ARM_DWT_CYCCNT) > (T3+(2*(F_CPU/24000000))));
+			FastPin<DATA_PIN>::fastset(port, lo);
+		} else {
+			while((next_mark - ARM_DWT_CYCCNT) > (T2+T3+(2*(F_CPU/24000000))));
+			FastPin<DATA_PIN>::fastset(port, lo);
+		}
+	}
 
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+	    // Get access to the clock
+		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
+		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
+		ARM_DWT_CYCCNT = 0;
 
+		register data_ptr_t port = FastPin<DATA_PIN>::port();
+		register data_t hi = *port | FastPin<DATA_PIN>::mask();;
+		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+		*port = lo;
+
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint8_t b = pixels.loadAndScale0();
+
+		cli();
+		uint32_t next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
+
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+			}
+
+			hi = *port | FastPin<DATA_PIN>::mask();
+			lo = *port & ~FastPin<DATA_PIN>::mask();
+			#endif
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+			b = pixels.advanceAndLoadAndScale0();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		sei();
+		return ARM_DWT_CYCCNT;
+	}
 };
 
 FASTLED_NAMESPACE_END
 
-
-#endif // __INC_CLOCKLESS_ARM_D51
+#endif
diff --git a/platforms/arm/d51/fastpin_arm_d51.h b/platforms/arm/d51/fastpin_arm_d51.h
index 5562d846f4..6d14c633ab 100644
--- a/platforms/arm/d51/fastpin_arm_d51.h
+++ b/platforms/arm/d51/fastpin_arm_d51.h
@@ -75,7 +75,8 @@ _DEFPIN_ARM(14, 0,  2); _DEFPIN_ARM(15, 0,  5); _DEFPIN_ARM(16, 1,  8); _DEFPIN_
 _DEFPIN_ARM(18, 0,  4); _DEFPIN_ARM(19, 0,  6); /* A6 is present in variant.h but couldn't find it on the schematic */
 // SDA/SCL
 _DEFPIN_ARM(21, 0, 12); _DEFPIN_ARM(22, 0, 13);
-// MISO/SCK/MOSI
+
+// 23..25  MISO/SCK/MOSI
 _DEFPIN_ARM(23, 1, 23); _DEFPIN_ARM(24, 0,  1); _DEFPIN_ARM(25, 0,  0);
 
 #define SPI_DATA 25
@@ -83,6 +84,26 @@ _DEFPIN_ARM(23, 1, 23); _DEFPIN_ARM(24, 0,  1); _DEFPIN_ARM(25, 0,  0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+#elif defined(ADAFRUIT_FEATHER_M4_EXPRESS)
+
+#define MAX_PIN 19
+// D0-D13, including D8 (neopixel)  no pins 2 3
+_DEFPIN_ARM( 0, 1, 17); _DEFPIN_ARM( 1, 1, 16);
+_DEFPIN_ARM( 4, 0, 14); _DEFPIN_ARM( 5, 0, 16); _DEFPIN_ARM( 6, 0,  18);
+_DEFPIN_ARM( 8, 1,  3); _DEFPIN_ARM( 9, 0, 19); _DEFPIN_ARM(10, 0, 20); _DEFPIN_ARM(11, 0, 21);
+_DEFPIN_ARM(12, 0, 22); _DEFPIN_ARM(13, 0, 23);
+// A0-A5
+_DEFPIN_ARM(14, 0,  2); _DEFPIN_ARM(15, 0,  5); _DEFPIN_ARM(16, 1,  8); _DEFPIN_ARM(17, 1,  9);
+_DEFPIN_ARM(18, 0,  4); _DEFPIN_ARM(19, 0,  6); /* A6 is present in variant.h but couldn't find it on the schematic */
+// SDA/SCL
+_DEFPIN_ARM(21, 0, 12); _DEFPIN_ARM(22, 0, 13);
+// 23..25  MISO/MOSI/SCK
+_DEFPIN_ARM(23, 1, 22); _DEFPIN_ARM(24, 1,  23); _DEFPIN_ARM(25, 0,  17);
+
+#define SPI_DATA 24
+#define SPI_CLOCK 25
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
 #endif
 
 

From 3698f8390e1de5628ed6025db133bbc54c14576d Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Thu, 6 Jun 2019 21:03:24 -0700
Subject: [PATCH 057/204] Enable support for nRF52 chipset. (#802)

LED strings for clockless are temporarily limited to 144 LEDs,
adjustable via led_sysdefs.h #define.
---
 chipsets.h                                    |  69 ++-
 fastspi.h                                     |   5 +
 led_sysdefs.h                                 |   2 +
 platforms.cpp                                 |  40 ++
 platforms.h                                   |   2 +
 platforms/arm/nrf52/arbiter_nrf52.h           | 115 ++++
 platforms/arm/nrf52/clockless_arm_nrf52.h     | 371 +++++++++++
 platforms/arm/nrf52/fastled_arm_nrf52.h       |  11 +
 platforms/arm/nrf52/fastpin_arm_nrf52.h       | 328 ++++++++++
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 579 ++++++++++++++++++
 platforms/arm/nrf52/fastspi_arm_nrf52.h       | 341 +++++++++++
 platforms/arm/nrf52/led_sysdefs_arm_nrf52.h   |  58 ++
 12 files changed, 1897 insertions(+), 24 deletions(-)
 create mode 100644 platforms.cpp
 create mode 100644 platforms/arm/nrf52/arbiter_nrf52.h
 create mode 100644 platforms/arm/nrf52/clockless_arm_nrf52.h
 create mode 100644 platforms/arm/nrf52/fastled_arm_nrf52.h
 create mode 100644 platforms/arm/nrf52/fastpin_arm_nrf52.h
 create mode 100644 platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
 create mode 100644 platforms/arm/nrf52/fastspi_arm_nrf52.h
 create mode 100644 platforms/arm/nrf52/led_sysdefs_arm_nrf52.h

diff --git a/chipsets.h b/chipsets.h
index 97ddc947bc..5651d2803c 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -428,6 +428,16 @@ class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 //
 // Clockless template instantiations - see clockless.h for how the timing values are used
 //
+// Base template for clockless controllers.  These controllers have 3 control points in their cycle for each bit.
+// At T=0        : the line is raised hi to start a bit
+// At T=T1       : the line is dropped low to transmit a zero bit
+// At T=T1+T2    : the line is dropped low to transmit a one bit
+// At T=T1+T2+T3 : the cycle is concluded (next bit can be sent)
+//
+// The units used for T1, T2, and T3 is nanoseconds.
+// For 8MHz/16MHz/24MHz frequencies, these values are also guaranteed
+// to be integral multiples of an 8MHz clock (125ns increments).
+//
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 #ifdef FASTLED_HAS_CLOCKLESS
@@ -435,10 +445,16 @@ class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 /// Provides timing definitions for the variety of clockless controllers supplied by the library.
 /// @{
 
+// Allow clock that clockless controller is based on to have different
+// frequency than the CPU.
+#if !defined(CLOCKLESS_FREQUENCY)
+    #define CLOCKLESS_FREQUENCY F_CPU
+#endif
+
 // We want to force all avr's to use the Trinket controller when running at 8Mhz, because even the 328's at 8Mhz
 // need the more tightly defined timeframes.
-#if (F_CPU == 8000000 || F_CPU == 16000000 || F_CPU == 24000000) //  || F_CPU == 48000000 || F_CPU == 96000000) // 125ns/clock
-#define FMUL (F_CPU/8000000)
+#if (CLOCKLESS_FREQUENCY == 8000000 || CLOCKLESS_FREQUENCY == 16000000 || CLOCKLESS_FREQUENCY == 24000000) //  || CLOCKLESS_FREQUENCY == 48000000 || CLOCKLESS_FREQUENCY == 96000000) // 125ns/clock
+#define FMUL (CLOCKLESS_FREQUENCY/8000000)
 
 // GE8822
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
@@ -493,7 +509,7 @@ template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
 class TM1803Controller400Khz : public ClocklessController<DATA_PIN, 6 * FMUL, 9 * FMUL, 6 * FMUL, RGB_ORDER> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class TM1829Controller800Khz : public ClocklessController<DATA_PIN, 2 * FMUL, 5 * FMUL, 3 * FMUL, RGB_ORDER> {};
+class TM1829Controller800Khz : public ClocklessController<DATA_PIN, 2 * FMUL, 5 * FMUL, 3 * FMUL, RGB_ORDER, 0, true, 500> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
 class GW6205Controller400Khz : public ClocklessController<DATA_PIN, 6 * FMUL, 7 * FMUL, 6 * FMUL, RGB_ORDER, 4> {};
@@ -505,82 +521,87 @@ template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
 class PL9823Controller : public ClocklessController<DATA_PIN, 3 * FMUL, 8 * FMUL, 3 * FMUL, RGB_ORDER> {};
 
 #else
+    
+// Similar to NS() macro, this calculates the number of cycles for
+// the clockless chipset (which may differ from CPU cycles)
+#define C_NS(_NS) (((_NS * ((CLOCKLESS_FREQUENCY / 1000000L) + 999) / 1000)
+
 // GE8822 - 350ns 660ns 350ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class GE8822Controller800Khz : public ClocklessController<DATA_PIN, NS(350), NS(660), NS(350), RGB_ORDER, 4> {};
+class GE8822Controller800Khz : public ClocklessController<DATA_PIN, C_NS(350), C_NS(660), C_NS(350), RGB_ORDER, 4> {};
 
 // GW6205@400khz - 800ns, 800ns, 800ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class GW6205Controller400Khz : public ClocklessController<DATA_PIN, NS(800), NS(800), NS(800), RGB_ORDER, 4> {};
+class GW6205Controller400Khz : public ClocklessController<DATA_PIN, C_NS(800), C_NS(800), C_NS(800), RGB_ORDER, 4> {};
 
 // GW6205@400khz - 400ns, 400ns, 400ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class GW6205Controller800Khz : public ClocklessController<DATA_PIN, NS(400), NS(400), NS(400), RGB_ORDER, 4> {};
+class GW6205Controller800Khz : public ClocklessController<DATA_PIN, C_NS(400), C_NS(400), C_NS(400), RGB_ORDER, 4> {};
 
 // UCS1903 - 500ns, 1500ns, 500ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class UCS1903Controller400Khz : public ClocklessController<DATA_PIN, NS(500), NS(1500), NS(500), RGB_ORDER> {};
+class UCS1903Controller400Khz : public ClocklessController<DATA_PIN, C_NS(500), C_NS(1500), C_NS(500), RGB_ORDER> {};
 
 // UCS1903B - 400ns, 450ns, 450ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class UCS1903BController800Khz : public ClocklessController<DATA_PIN, NS(400), NS(450), NS(450), RGB_ORDER> {};
+class UCS1903BController800Khz : public ClocklessController<DATA_PIN, C_NS(400), C_NS(450), C_NS(450), RGB_ORDER> {};
 
 // UCS1904 - 400ns, 400ns, 450ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class UCS1904Controller800Khz : public ClocklessController<DATA_PIN, NS(400), NS(400), NS(450), RGB_ORDER> {};
+class UCS1904Controller800Khz : public ClocklessController<DATA_PIN, C_NS(400), C_NS(400), C_NS(450), RGB_ORDER> {};
 
 // UCS2903 - 250ns, 750ns, 250ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class UCS2903Controller : public ClocklessController<DATA_PIN, NS(250), NS(750), NS(250), RGB_ORDER> {};
+class UCS2903Controller : public ClocklessController<DATA_PIN, C_NS(250), C_NS(750), C_NS(250), RGB_ORDER> {};
 
 // TM1809 - 350ns, 350ns, 550ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class TM1809Controller800Khz : public ClocklessController<DATA_PIN, NS(350), NS(350), NS(450), RGB_ORDER> {};
+class TM1809Controller800Khz : public ClocklessController<DATA_PIN, C_NS(350), C_NS(350), C_NS(450), RGB_ORDER> {};
 
 // WS2811 - 320ns, 320ns, 640ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class WS2811Controller800Khz : public ClocklessController<DATA_PIN, NS(320), NS(320), NS(640), RGB_ORDER> {};
+class WS2811Controller800Khz : public ClocklessController<DATA_PIN, C_NS(320), C_NS(320), C_NS(640), RGB_ORDER> {};
 
 // WS2813 - 320ns, 320ns, 640ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class WS2813Controller : public ClocklessController<DATA_PIN, NS(320), NS(320), NS(640), RGB_ORDER> {};
+class WS2813Controller : public ClocklessController<DATA_PIN, C_NS(320), C_NS(320), C_NS(640), RGB_ORDER> {};
 
 // WS2812 - 250ns, 625ns, 375ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class WS2812Controller800Khz : public ClocklessController<DATA_PIN, NS(250), NS(625), NS(375), RGB_ORDER> {};
+class WS2812Controller800Khz : public ClocklessController<DATA_PIN, C_NS(250), C_NS(625), C_NS(375), RGB_ORDER> {};
 
 // WS2811@400khz - 800ns, 800ns, 900ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class WS2811Controller400Khz : public ClocklessController<DATA_PIN, NS(800), NS(800), NS(900), RGB_ORDER> {};
+class WS2811Controller400Khz : public ClocklessController<DATA_PIN, C_NS(800), C_NS(800), C_NS(900), RGB_ORDER> {};
 
 // 750NS, 750NS, 750NS
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class TM1803Controller400Khz : public ClocklessController<DATA_PIN, NS(700), NS(1100), NS(700), RGB_ORDER> {};
+class TM1803Controller400Khz : public ClocklessController<DATA_PIN, C_NS(700), C_NS(1100), C_NS(700), RGB_ORDER> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class TM1829Controller800Khz : public ClocklessController<DATA_PIN, NS(340), NS(340), NS(550), RGB_ORDER, 0, true, 500> {};
+class TM1829Controller800Khz : public ClocklessController<DATA_PIN, C_NS(340), C_NS(340), C_NS(550), RGB_ORDER, 0, true, 500> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class TM1829Controller1600Khz : public ClocklessController<DATA_PIN, NS(100), NS(300), NS(200), RGB_ORDER, 0, true, 500> {};
+class TM1829Controller1600Khz : public ClocklessController<DATA_PIN, C_NS(100), C_NS(300), C_NS(200), RGB_ORDER, 0, true, 500> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class LPD1886Controller1250Khz : public ClocklessController<DATA_PIN, NS(200), NS(400), NS(200), RGB_ORDER, 4> {};
+class LPD1886Controller1250Khz : public ClocklessController<DATA_PIN, C_NS(200), C_NS(400), C_NS(200), RGB_ORDER, 4> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class LPD1886Controller1250Khz_8bit : public ClocklessController<DATA_PIN, NS(200), NS(400), NS(200), RGB_ORDER> {};
+class LPD1886Controller1250Khz_8bit : public ClocklessController<DATA_PIN, C_NS(200), C_NS(400), C_NS(200), RGB_ORDER> {};
 
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class SK6822Controller : public ClocklessController<DATA_PIN, NS(375), NS(1000), NS(375), RGB_ORDER> {};
+class SK6822Controller : public ClocklessController<DATA_PIN, C_NS(375), C_NS(1000), C_NS(375), RGB_ORDER> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class SK6812Controller : public ClocklessController<DATA_PIN, NS(300), NS(300), NS(600), RGB_ORDER> {};
+class SK6812Controller : public ClocklessController<DATA_PIN, C_NS(300), C_NS(300), C_NS(600), RGB_ORDER> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class SM16703Controller : public ClocklessController<DATA_PIN, NS(300), NS(600), NS(300), RGB_ORDER> {};
+class SM16703Controller : public ClocklessController<DATA_PIN, C_NS(300), C_NS(600), C_NS(300), RGB_ORDER> {};
 
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
-class PL9823Controller : public ClocklessController<DATA_PIN, NS(350), NS(1010), NS(350), RGB_ORDER> {};
+class PL9823Controller : public ClocklessController<DATA_PIN, C_NS(350), C_NS(1010), C_NS(350), RGB_ORDER> {};
 #endif
 ///@}
 
diff --git a/fastspi.h b/fastspi.h
index 8e2a593be5..fc0843be7a 100644
--- a/fastspi.h
+++ b/fastspi.h
@@ -40,6 +40,11 @@ template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public NRF51SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
+#if defined(NRF52_SERIES) && defined(FASTLED_ALL_PINS_HARDWARE_SPI)
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
+#endif
+
 #if defined(SPI_DATA) && defined(SPI_CLOCK)
 
 #if defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
diff --git a/led_sysdefs.h b/led_sysdefs.h
index ea8c14f443..7abcd15e49 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -7,6 +7,8 @@
 
 #if defined(NRF51) || defined(__RFduino__) || defined (__Simblee__)
 #include "platforms/arm/nrf51/led_sysdefs_arm_nrf51.h"
+#elif defined(NRF52_SERIES)
+#include "platforms/arm/nrf52/led_sysdefs_arm_nrf52.h"
 #elif defined(__MK20DX128__) || defined(__MK20DX256__)
 // Include k20/T3 headers
 #include "platforms/arm/k20/led_sysdefs_arm_k20.h"
diff --git a/platforms.cpp b/platforms.cpp
new file mode 100644
index 0000000000..47a0088314
--- /dev/null
+++ b/platforms.cpp
@@ -0,0 +1,40 @@
+#define FASTLED_INTERNAL
+
+
+// Interrupt handlers cannot be defined in the header.
+// They must be defined as C functions, or they won't
+// be found (due to name mangling), and thus won't
+// override any default weak definition.
+#if defined(NRF52_SERIES)
+
+    #include "platforms/arm/nrf52/led_sysdefs_arm_nrf52.h"
+    #include "platforms/arm/nrf52/arbiter_nrf52.h"
+
+    uint32_t isrCount;
+
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
+            // NOTE: Update platforms.cpp in root of FastLED library if this changes        
+            #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
+                void PWM0_IRQHandler(void) { isrCount++; PWM_Arbiter<0>::isr_handler(); }
+            #endif
+            #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE1)
+                void PWM1_IRQHandler(void) { isrCount++; PWM_Arbiter<1>::isr_handler(); }
+            #endif
+            #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE2)
+                void PWM2_IRQHandler(void) { isrCount++; PWM_Arbiter<2>::isr_handler(); }
+            #endif
+            #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE3)
+                void PWM3_IRQHandler(void) { isrCount++; PWM_Arbiter<3>::isr_handler(); }
+            #endif
+    #ifdef __cplusplus
+        }
+    #endif
+
+#endif // defined(NRF52_SERIES)
+
+
+
+// FASTLED_NAMESPACE_BEGIN
+// FASTLED_NAMESPACE_END
diff --git a/platforms.h b/platforms.h
index 625791bc27..82d7d99385 100644
--- a/platforms.h
+++ b/platforms.h
@@ -7,6 +7,8 @@
 
 #if defined(NRF51)
 #include "platforms/arm/nrf51/fastled_arm_nrf51.h"
+#elif defined(NRF52_SERIES)
+#include "platforms/arm/nrf52/fastled_arm_nrf52.h"
 #elif defined(__MK20DX128__) || defined(__MK20DX256__)
 // Include k20/T3 headers
 #include "platforms/arm/k20/fastled_arm_k20.h"
diff --git a/platforms/arm/nrf52/arbiter_nrf52.h b/platforms/arm/nrf52/arbiter_nrf52.h
new file mode 100644
index 0000000000..5a6aa92a67
--- /dev/null
+++ b/platforms/arm/nrf52/arbiter_nrf52.h
@@ -0,0 +1,115 @@
+#ifndef __INC_ARBITER_NRF52
+#define __INC_ARBITER_NRF52
+
+#if defined(NRF52_SERIES)
+
+#include "led_sysdefs_arm_nrf52.h"
+
+//FASTLED_NAMESPACE_BEGIN
+
+typedef void (*FASTLED_NRF52_PWM_INTERRUPT_HANDLER)();
+
+// a trick learned from other embedded projects .. 
+// use the enum as an index to a statically-allocated array
+// to store unique information for that instance.
+// also provides a count of how many instances were enabled.
+//
+// See led_sysdefs_arm_nrf52.h for selection....
+//
+typedef enum _FASTLED_NRF52_ENABLED_PWM_INSTANCE {
+#if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
+    FASTLED_NRF52_PWM0_INSTANCE_IDX,
+#endif
+#if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE1)
+    FASTLED_NRF52_PWM1_INSTANCE_IDX,
+#endif
+#if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE2)
+    FASTLED_NRF52_PWM2_INSTANCE_IDX,
+#endif
+#if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE3)
+    FASTLED_NRF52_PWM3_INSTANCE_IDX,
+#endif
+    FASTLED_NRF52_PWM_INSTANCE_COUNT
+} FASTLED_NRF52_ENABLED_PWM_INSTANCES;
+
+static_assert(FASTLED_NRF52_PWM_INSTANCE_COUNT > 0, "Instance count must be greater than zero -- define FASTLED_NRF52_ENABLE_PWM_INSTNACE[n] (replace `[n]` with digit)");
+
+template <uint32_t _PWM_ID>
+class PWM_Arbiter {
+
+private:
+    static_assert(_PWM_ID < 32, "PWM_ID over 31 breaks current arbitration bitmask");
+    //const  uint32_t _ACQUIRE_MASK =             (1u << _PWM_ID) ;
+    //const  uint32_t _CLEAR_MASK   = ~((uint32_t)(1u << _PWM_ID));
+    static uint32_t                              s_PwmInUse;
+    static NRF_PWM_Type * const                  s_PWM;
+    static IRQn_Type      const                           s_PWM_IRQ;
+    static FASTLED_NRF52_PWM_INTERRUPT_HANDLER volatile   s_Isr;
+
+public:
+    static void isr_handler() {
+        return s_Isr();
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static bool            isAcquired() {
+        return (0u != (s_PwmInUse & 1u)); // _ACQUIRE_MASK
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void            acquire(FASTLED_NRF52_PWM_INTERRUPT_HANDLER isr) {
+        while (!tryAcquire(isr));
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static bool            tryAcquire(FASTLED_NRF52_PWM_INTERRUPT_HANDLER isr) {
+        uint32_t oldValue = __sync_fetch_and_or(&s_PwmInUse, 1u); // _ACQUIRE_MASK
+        if (0u == (oldValue & 1u)) { // _ACQUIRE_MASK
+            s_Isr = isr;
+            return true;
+        }
+        return false;
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void            releaseFromIsr() {
+        uint32_t oldValue = __sync_fetch_and_and(&s_PwmInUse, ~1u); // _CLEAR_MASK
+        if (0u == (oldValue & 1u)) { // _ACQUIRE_MASK
+            // TODO: This should never be true... indicates was not held.
+            // Assert here?
+            (void)oldValue;
+        }
+        return;
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static NRF_PWM_Type *  getPWM() {
+        return s_PWM;
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static IRQn_Type       getIRQn() { return s_PWM_IRQ; }
+};
+template <uint32_t _PWM_ID> NRF_PWM_Type * const PWM_Arbiter<_PWM_ID>::s_PWM           =
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
+        (_PWM_ID == 0 ? NRF_PWM0 :
+    #endif
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE1)
+        (_PWM_ID == 1 ? NRF_PWM1 :
+    #endif
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE2)
+        (_PWM_ID == 2 ? NRF_PWM2 :
+    #endif
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE3)
+        (_PWM_ID == 3 ? NRF_PWM3 :
+    #endif
+        (NRF_PWM_Type*)-1
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
+        )
+    #endif
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE1)
+        )
+    #endif
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE2)
+        )
+    #endif
+    #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE3)
+        )
+    #endif
+    ;
+template <uint32_t _PWM_ID> IRQn_Type    const                            PWM_Arbiter<_PWM_ID>::s_PWM_IRQ   = ((IRQn_Type)((uint8_t)((uint32_t)(s_PWM) >> 12)));
+template <uint32_t _PWM_ID> uint32_t                                      PWM_Arbiter<_PWM_ID>::s_PwmInUse  = 0;
+template <uint32_t _PWM_ID> FASTLED_NRF52_PWM_INTERRUPT_HANDLER volatile  PWM_Arbiter<_PWM_ID>::s_Isr       = NULL;
+
+//FASTLED_NAMESPACE_END
+
+#endif // NRF52_SERIES
+#endif // __INC_ARBITER_NRF52
\ No newline at end of file
diff --git a/platforms/arm/nrf52/clockless_arm_nrf52.h b/platforms/arm/nrf52/clockless_arm_nrf52.h
new file mode 100644
index 0000000000..d8a5da9885
--- /dev/null
+++ b/platforms/arm/nrf52/clockless_arm_nrf52.h
@@ -0,0 +1,371 @@
+#ifndef __INC_CLOCKLESS_ARM_NRF52
+#define __INC_CLOCKLESS_ARM_NRF52
+
+#if defined(NRF52_SERIES)
+
+
+//FASTLED_NAMESPACE_BEGIN
+
+#define FASTLED_HAS_CLOCKLESS 1
+#define FASTLED_NRF52_MAXIMUM_PIXELS_PER_STRING 144 // TODO: Figure out how to safely let this be calller-defined....
+
+// nRF52810 has a single PWM peripheral (PWM0)
+// nRF52832 has three PWM peripherals (PWM0, PWM1, PWM2)
+// nRF52840 has four PWM peripherals (PWM0, PWM1, PWM2, PWM3)
+// NOTE: Update platforms.cpp in root of FastLED library if this changes
+#define FASTLED_NRF52_PWM_ID 0
+
+
+extern uint32_t isrCount;
+
+
+template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER = RGB, int _XTRA0 = 0, bool _FLIP = false, int _WAIT_TIME_MICROSECONDS = 10>
+class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
+    static_assert(FASTLED_NRF52_MAXIMUM_PIXELS_PER_STRING > 0, "Maximum string length must be positive value (FASTLED_NRF52_MAXIMUM_PIXELS_PER_STRING)");
+    static_assert(_T1         >             0 , "negative values are not allowed");
+    static_assert(_T2         >             0 , "negative values are not allowed");
+    static_assert(_T3         >             0 , "negative values are not allowed");
+    static_assert(_T1         <  (0x8000u-2u), "_T1 must fit in 15 bits");
+    static_assert(_T2         <  (0x8000u-2u), "_T2 must fit in 15 bits");
+    static_assert(_T3         <  (0x8000u-2u), "_T3 must fit in 15 bits");
+    static_assert(_T1         <  (0x8000u-2u), "_T0H must fit in 15 bits");
+    static_assert(_T1+_T2     <  (0x8000u-2u), "_T1H must fit in 15 bits");
+    static_assert(_T1+_T2+_T3 <  (0x8000u-2u), "_TOP must fit in 15 bits");
+    static_assert(_T1+_T2+_T3 <= PWM_COUNTERTOP_COUNTERTOP_Msk, "_TOP too large for peripheral");
+
+private:
+    static const bool     _INITIALIZE_PIN_HIGH = (_FLIP ? 1 : 0);
+    static const uint16_t _POLARITY_BIT        = (_FLIP ? 0 : 0x8000);
+
+    static const uint8_t  _BITS_PER_PIXEL   = (8 + _XTRA0) * 3; // NOTE: 3 means RGB only...
+    static const uint16_t _PWM_BUFFER_COUNT = (_BITS_PER_PIXEL * FASTLED_NRF52_MAXIMUM_PIXELS_PER_STRING);
+    static const uint8_t  _T0H = ((uint16_t)(_T1        ));
+    static const uint8_t  _T1H = ((uint16_t)(_T1+_T2    ));
+    static const uint8_t  _TOP = ((uint16_t)(_T1+_T2+_T3));
+
+    // may as well be static, as can only attach one LED string per _DATA_PIN....
+    static uint16_t s_SequenceBuffer[_PWM_BUFFER_COUNT];
+    static uint16_t s_SequenceBufferValidElements;
+    static uint32_t s_SequenceBufferInUse;
+    static CMinWait<_WAIT_TIME_MICROSECONDS> mWait;  // ensure data has time to latch
+
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_InitializePinState() {
+        FastPin<_DATA_PIN>::setOutput();
+        if (_INITIALIZE_PIN_HIGH) {
+            FastPin<_DATA_PIN>::hi();
+        } else {
+            FastPin<_DATA_PIN>::lo();
+        }
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_InitializePwmInstance(NRF_PWM_Type * pwm) {
+
+        // Pins must be set before enabling the peripheral
+        pwm->PSEL.OUT[0] = FastPin<_DATA_PIN>::nrf_pin();
+        pwm->PSEL.OUT[1] = NRF_PWM_PIN_NOT_CONNECTED;
+        pwm->PSEL.OUT[2] = NRF_PWM_PIN_NOT_CONNECTED;
+        pwm->PSEL.OUT[3] = NRF_PWM_PIN_NOT_CONNECTED;
+        nrf_pwm_enable(pwm);
+        nrf_pwm_configure(pwm, NRF_PWM_CLK_16MHz, NRF_PWM_MODE_UP, _TOP);
+        nrf_pwm_decoder_set(pwm, NRF_PWM_LOAD_COMMON, NRF_PWM_STEP_AUTO);
+
+        // clear any prior shorts / interrupt enable bits
+        nrf_pwm_shorts_set(pwm, 0);
+        nrf_pwm_int_set(pwm, 0);
+        // clear all prior events
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_STOPPED);
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_SEQSTARTED0);
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_SEQSTARTED1);
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_SEQEND0);
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_SEQEND1);
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_PWMPERIODEND);
+        nrf_pwm_event_clear(pwm, NRF_PWM_EVENT_LOOPSDONE);
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_ConfigurePwmSequence(NRF_PWM_Type * pwm) {
+        // config is easy, using SEQ0, no loops...
+        nrf_pwm_sequence_t sequenceConfig;
+        sequenceConfig.values.p_common = &(s_SequenceBuffer[0]);
+        sequenceConfig.length          = s_SequenceBufferValidElements;
+        sequenceConfig.repeats         = 0; // send the data once, and only once
+        sequenceConfig.end_delay       = 0; // no extra delay at the end of SEQ[0] / SEQ[1]
+        nrf_pwm_sequence_set(pwm, 0, &sequenceConfig);
+        nrf_pwm_sequence_set(pwm, 1, &sequenceConfig);
+        nrf_pwm_loop_set(pwm, 0);
+
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_EnableInterruptsAndShortcuts(NRF_PWM_Type * pwm) {
+        IRQn_Type irqn = PWM_Arbiter<FASTLED_NRF52_PWM_ID>::getIRQn();
+        // TODO: check API results...
+        uint32_t result;
+
+        result = sd_nvic_SetPriority(irqn, configMAX_SYSCALL_INTERRUPT_PRIORITY);
+        (void)result;
+        result = sd_nvic_EnableIRQ(irqn);
+        (void)result;
+
+        // shortcuts prevent (up to) 4-cycle delay from interrupt handler to next action
+        uint32_t shortsToEnable = 0;
+        shortsToEnable |= NRF_PWM_SHORT_SEQEND0_STOP_MASK;        ///< SEQEND[0] --> STOP task.
+        shortsToEnable |= NRF_PWM_SHORT_SEQEND1_STOP_MASK;        ///< SEQEND[1] --> STOP task.
+        //shortsToEnable |= NRF_PWM_SHORT_LOOPSDONE_SEQSTART0_MASK; ///< LOOPSDONE --> SEQSTART[0] task.
+        //shortsToEnable |= NRF_PWM_SHORT_LOOPSDONE_SEQSTART1_MASK; ///< LOOPSDONE --> SEQSTART[1] task.
+        shortsToEnable |= NRF_PWM_SHORT_LOOPSDONE_STOP_MASK;      ///< LOOPSDONE --> STOP task.
+        nrf_pwm_shorts_set(pwm, shortsToEnable);
+
+        // mark which events should cause interrupts...
+        uint32_t interruptsToEnable = 0;
+        interruptsToEnable |= NRF_PWM_INT_SEQEND0_MASK;
+        interruptsToEnable |= NRF_PWM_INT_SEQEND1_MASK;
+        interruptsToEnable |= NRF_PWM_INT_LOOPSDONE_MASK;
+        interruptsToEnable |= NRF_PWM_INT_STOPPED_MASK;
+        nrf_pwm_int_set(pwm, interruptsToEnable);
+
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_StartTask(NRF_PWM_Type * pwm) {
+        nrf_pwm_task_trigger(pwm, NRF_PWM_TASK_SEQSTART0);
+    }
+
+public:
+    static void isr_handler() {
+        NRF_PWM_Type * pwm = PWM_Arbiter<FASTLED_NRF52_PWM_ID>::getPWM();
+        IRQn_Type irqn = PWM_Arbiter<FASTLED_NRF52_PWM_ID>::getIRQn();
+
+        // Currently, only use SEQUENCE 0, so only event
+        // of consequence is LOOPSDONE ...
+        if (nrf_pwm_event_check(pwm,NRF_PWM_EVENT_STOPPED)) {
+            nrf_pwm_event_clear(pwm,NRF_PWM_EVENT_STOPPED);
+
+            // mark the sequence as no longer in use -- pointer, comparator, exchange value
+            __sync_fetch_and_and(&s_SequenceBufferInUse, 0);
+            // prevent further interrupts from PWM events
+            nrf_pwm_int_set(pwm, 0);
+            // disable PWM interrupts - None of the PWM IRQs are shared
+            // with other peripherals, avoiding complexity of shared IRQs.
+            sd_nvic_DisableIRQ(irqn);
+            // disable the PWM instance
+            nrf_pwm_disable(pwm);
+            // may take up to 4 cycles for writes to propagate (APB bus @ 16MHz)
+            asm __volatile__ ( "NOP; NOP; NOP; NOP;" );
+            // release the PWM arbiter to be re-used by another LED string
+            PWM_Arbiter<FASTLED_NRF52_PWM_ID>::releaseFromIsr();
+        }
+    }
+
+
+    virtual void init() {
+        FASTLED_NRF52_DEBUGPRINT("Clockless Timings:\n");
+        FASTLED_NRF52_DEBUGPRINT("    T0H == %d", _T0H);
+        FASTLED_NRF52_DEBUGPRINT("    T1H == %d", _T1H);
+        FASTLED_NRF52_DEBUGPRINT("    TOP == %d\n", _TOP);
+    }
+    virtual uint16_t getMaxRefreshRate() const { return 800; }
+
+    virtual void showPixels(PixelController<_RGB_ORDER> & pixels) {
+        // wait for the only sequence buffer to become available
+        while (s_SequenceBufferInUse != 0);
+        prepareSequenceBuffers(pixels);
+        mWait.wait(); // ensure min time between updates
+        startPwmPlayback(s_SequenceBufferValidElements);
+        return;
+    }
+
+    template<uint8_t _BIT>
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void WriteBitToSequence(uint8_t byte, uint16_t * e) {
+        *e = _POLARITY_BIT | (((byte & (1u << _BIT)) == 0) ? _T0H : _T1H);
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void prepareSequenceBuffers(PixelController<_RGB_ORDER> & pixels) {
+        s_SequenceBufferValidElements = 0;
+        int32_t    remainingSequenceElements = _PWM_BUFFER_COUNT;
+        uint16_t * e = s_SequenceBuffer;
+        uint32_t size_needed = pixels.size(); // count of pixels
+        size_needed *= (8 + _XTRA0);          // bits per pixel
+        size_needed *= 2;                     // each bit takes two bytes
+
+        if (size_needed > _PWM_BUFFER_COUNT) {
+            // TODO: assert()?
+            return;
+        }
+
+        while (pixels.has(1) && (remainingSequenceElements >= _BITS_PER_PIXEL)) {
+            uint8_t b0 = pixels.loadAndScale0();
+            WriteBitToSequence<7>(b0, e); e++;
+            WriteBitToSequence<6>(b0, e); e++;
+            WriteBitToSequence<5>(b0, e); e++;
+            WriteBitToSequence<4>(b0, e); e++;
+            WriteBitToSequence<3>(b0, e); e++;
+            WriteBitToSequence<2>(b0, e); e++;
+            WriteBitToSequence<1>(b0, e); e++;
+            WriteBitToSequence<0>(b0, e); e++;
+            if (_XTRA0 > 0) {
+                for (int i = 0; i < _XTRA0; i++) {
+                    WriteBitToSequence<0>(0,e); e++;
+                }
+            }
+            uint8_t b1 = pixels.loadAndScale1();
+            WriteBitToSequence<7>(b1, e); e++;
+            WriteBitToSequence<6>(b1, e); e++;
+            WriteBitToSequence<5>(b1, e); e++;
+            WriteBitToSequence<4>(b1, e); e++;
+            WriteBitToSequence<3>(b1, e); e++;
+            WriteBitToSequence<2>(b1, e); e++;
+            WriteBitToSequence<1>(b1, e); e++;
+            WriteBitToSequence<0>(b1, e); e++;
+            if (_XTRA0 > 0) {
+                for (int i = 0; i < _XTRA0; i++) {
+                    WriteBitToSequence<0>(0,e); e++;
+                }
+            }
+            uint8_t b2 = pixels.loadAndScale2();
+            WriteBitToSequence<7>(b2, e); e++;
+            WriteBitToSequence<6>(b2, e); e++;
+            WriteBitToSequence<5>(b2, e); e++;
+            WriteBitToSequence<4>(b2, e); e++;
+            WriteBitToSequence<3>(b2, e); e++;
+            WriteBitToSequence<2>(b2, e); e++;
+            WriteBitToSequence<1>(b2, e); e++;
+            WriteBitToSequence<0>(b2, e); e++;
+            if (_XTRA0 > 0) {
+                for (int i = 0; i < _XTRA0; i++) {
+                    WriteBitToSequence<0>(0,e); e++;
+                }
+            }
+
+            // advance pixel and sequence pointers
+            s_SequenceBufferValidElements += _BITS_PER_PIXEL;
+            remainingSequenceElements     -= _BITS_PER_PIXEL;
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+    }
+
+
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback(uint16_t bytesToSend) {
+        PWM_Arbiter<FASTLED_NRF52_PWM_ID>::acquire(isr_handler);
+        NRF_PWM_Type * pwm = PWM_Arbiter<FASTLED_NRF52_PWM_ID>::getPWM();
+
+        // mark the sequence as being in-use
+        __sync_fetch_and_or(&s_SequenceBufferInUse, 1);
+
+        startPwmPlayback_InitializePinState();
+        startPwmPlayback_InitializePwmInstance(pwm);
+        startPwmPlayback_ConfigurePwmSequence(pwm);
+        startPwmPlayback_EnableInterruptsAndShortcuts(pwm);
+        startPwmPlayback_StartTask(pwm);
+        return;
+    }
+
+
+#if 0
+    FASTLED_NRF52_INLINE_ATTRIBUTE static uint16_t* getRawSequenceBuffer() { return s_SequenceBuffer; }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static uint16_t getRawSequenceBufferSize() { return _PWM_BUFFER_COUNT; }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static uint16_t getSequenceBufferInUse() { return s_SequenceBufferInUse; }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void sendRawSequenceBuffer(uint16_t bytesToSend) {
+        mWait.wait(); // ensure min time between updates
+        startPwmPlayback(bytesToSend);
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void sendRawBytes(uint8_t * arrayOfBytes, uint16_t bytesToSend) {
+        // wait for sequence buffer to be available
+        while (s_SequenceBufferInUse != 0);
+
+        s_SequenceBufferValidElements = 0;
+        int32_t    remainingSequenceElements = _PWM_BUFFER_COUNT;
+        uint16_t * e           = s_SequenceBuffer;
+        uint8_t  * nextByte    = arrayOfBytes;
+        for (uint16_t bytesRemain = bytesToSend;
+            (remainingSequenceElements >= 8) && (bytesRemain > 0);
+            bytesRemain--,
+            remainingSequenceElements     -= 8,
+            s_SequenceBufferValidElements += 8
+            ) {
+            uint8_t b = *nextByte;
+            WriteBitToSequence<7,false>(b, e); e++;
+            WriteBitToSequence<6,false>(b, e); e++;
+            WriteBitToSequence<5,false>(b, e); e++;
+            WriteBitToSequence<4,false>(b, e); e++;
+            WriteBitToSequence<3,false>(b, e); e++;
+            WriteBitToSequence<2,false>(b, e); e++;
+            WriteBitToSequence<1,false>(b, e); e++;
+            WriteBitToSequence<0,false>(b, e); e++;
+            if (_XTRA0 > 0) {
+                for (int i = 0; i < _XTRA0; i++) {
+                    WriteBitToSequence<0,_FLIP>(0,e); e++;
+                }
+            }
+        }
+        mWait.wait(); // ensure min time between updates
+
+        startPwmPlayback(s_SequenceBufferValidElements);
+    }
+#endif // 0
+
+};
+
+template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
+uint16_t ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBufferValidElements = 0;
+template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
+uint32_t ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBufferInUse = 0;
+template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
+uint16_t ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBuffer[_PWM_BUFFER_COUNT];
+template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
+CMinWait<_WAIT_TIME_MICROSECONDS> ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::mWait;
+
+/* nrf_pwm solution
+// 
+// When the nRF52 softdevice (e.g., BLE) is enabled, the CPU can be pre-empted
+// at any time for radio interrupts.  These interrupts cannot be disabled.
+// The problem is, even simple BLE advertising interrupts may take **`348μs`**
+// (per softdevice 1.40, see http://infocenter.nordicsemi.com/pdf/S140_SDS_v1.3.pdf)
+// 
+// The nRF52 chips have a decent Easy-DMA-enabled PWM peripheral.
+//
+// The major downside:
+// [] The PWM peripheral has a fixed input buffer size at 16 bits per clock cycle.
+//    (each clockless protocol bit == 2 bytes)
+//
+// The major upsides include:
+// [] Fully asynchronous, freeing CPU for other tasks
+// [] Softdevice interrupts do not affect PWM clocked output (reliable clocking)
+//
+// The initial solution generally does the following for showPixels():
+// [] wait for a sequence buffer to become available
+// [] prepare the entire LED string's sequence (see `prepareSequenceBuffers()`)
+// [] ensures minimum wait time from prior sequence's end
+//
+// Options after initial solution working:
+// [] 
+
+// TODO: Double-buffers, so one can be doing DMA while the second
+//       buffer is being prepared.
+// TODO: Pool of buffers, so can keep N-1 active in DMA, while
+//       preparing data in the final buffer?
+//       Write another class similar to PWM_Arbiter, only for
+//       tracking use of sequence buffers?
+// TODO: Use volatile variable to track buffers that the
+//       prior DMA operation is finished with, so can fill
+//       in those buffers with newly-prepared data...
+// apis to send the pre-generated buffer.  This would be essentially asynchronous,
+// and result in efficient run time if the pixels are either (a) static, or
+// (b) cycle through a limited number of options whose converted results can
+// be cached and re-used.  While simple, this method takes lots of extra RAM...
+// 16 bits for every full clock (high/low) cycle.
+//
+// Clockless chips typically send 24 bits (3x 8-bit) per pixel.
+// One odd clockless chip sends 36 bits (3x 12-bit) per pixel.
+// Each bit requires a 16-bit sequence entry for the PWM peripheral.
+// This gives approximately:
+//                 24 bpp           36 bpp
+// ==========================================
+//  1 pixel        48 bytes        72 bytes          
+// 32 pixels    1,536 bytes     2,304 bytes
+// 64 pixels    3,072 bytes     4,608 bytes
+//
+//
+// UPDATE: this is the method I'm choosing, to get _SOMETHING_
+//         clockless working...  3k RAM for 64 pixels is acceptable
+//         for a first release, as it allows re-use of FASTLED
+//         color correction, dithering, etc. ....
+*/
+
+//FASTLED_NAMESPACE_END
+
+#endif // NRF52_SERIES
+#endif // __INC_CLOCKLESS_ARM_NRF52
\ No newline at end of file
diff --git a/platforms/arm/nrf52/fastled_arm_nrf52.h b/platforms/arm/nrf52/fastled_arm_nrf52.h
new file mode 100644
index 0000000000..453003068e
--- /dev/null
+++ b/platforms/arm/nrf52/fastled_arm_nrf52.h
@@ -0,0 +1,11 @@
+#ifndef __INC_FASTLED_ARM_NRF52_H
+#define __INC_FASTLED_ARM_NRF52_H
+
+#include "led_sysdefs_arm_nrf52.h"
+#include "arbiter_nrf52.h"
+#include "fastpin_arm_nrf52.h"
+#include "fastspi_arm_nrf52.h"
+#include "clockless_arm_nrf52.h"
+
+#endif // #ifndef __INC_FASTLED_ARM_NRF52_H
+
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
new file mode 100644
index 0000000000..a8684665c6
--- /dev/null
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -0,0 +1,328 @@
+#ifndef __FASTPIN_ARM_NRF52_H
+#define __FASTPIN_ARM_NRF52_H
+
+    
+/*
+//
+// Background:
+// ===========
+// the nRF52 has more than 32 ports, and thus must support
+// two distinct GPIO port registers.  
+//
+// For the nRF52 series, the structure to control the port is
+// `NRF_GPIO_Type`, with separate addresses mapped for set, clear, etc.
+// The two ports are defined as NRF_P0 and NRF_P1.
+// An example declaration for the ports is:
+//     #define NRF_P0_BASE   0x50000000UL
+//     #define NRF_P1_BASE   0x50000300UL
+//     #define NRF_P0        ((NRF_GPIO_Type*)NRF_P0_BASE)
+//     #define NRF_P1        ((NRF_GPIO_Type*)NRF_P1_BASE)
+//
+// Therefore, ideally, the _DEFPIN_ARM() macro would simply
+// conditionally pass either NRF_P0 or NRF_P1 to the underlying
+// FastPin<> template class class.
+//
+// The "pin" provided to the FastLED<> template (and which
+// the _DEFPIN_ARM() macro specializes for valid pins) is NOT
+// the microcontroller port.pin, but the Arduino digital pin.
+// Some boards have an identity mapping (e.g., nRF52832 Feather)
+// but most do not.  Therefore, the _DEFPIN_ARM() macro
+// must translate the Arduino pin to the mcu port.pin.
+//
+// 
+// Difficulties:
+// =============
+// The goal is to avoid any such lookups, using compile-time
+// optimized functions for speed, in line with FastLED's
+// overall design goals. This means constexpr, compile-time
+// and aggressive inlining of functions....
+//
+// Right away, this precludes the use of g_ADigitalPinMap,
+// which is not constexpr, and thus not available for
+// preprocessor/compile-time optimizations.  Therefore,
+// we have to specialize FastPin<uint8_t PIN>, given a
+// compile-time value for PIN, into at least a PORT and
+// a BITMASK for the port.
+//
+// Arduino compiles using C++11 for at least Feather nRF52840 Express.
+// C++11 is very restrictive about template parameters.
+// Template parameters can only be:
+// 1. a type (as most people expect)
+// 2. a template
+// 3. a constexpr native integer type
+//
+// Therefore, attempts to use `NRF_GPIO_Type *` as a
+// template parameter will fail....
+//
+// Solution:
+// =========
+// The solution chosen is to define a unique structure for each port,
+// whose SOLE purpose is to have a static inline function that
+// returns the `NRF_GPIO_Type *` that is needed.
+//
+// Thus, while it's illegal to pass `NRF_P0` as a template
+// parameter, it's perfectly legal to pass `__generated_struct_NRF_P0`,
+// and have the template call a well-known `static inline` function
+// that returns `NRF_P0` ... which is itself a compile-time constant.
+//
+// Note that additional magic can be applied that will automatically
+// generate the structures.  If you want to add that to this platform,
+// check out the KL26 platform files for a starting point.
+//
+*/
+
+// manually define two structures, to avoid fighting with preprocessor macros
+struct __generated_struct_NRF_P0 {
+    FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static NRF_GPIO_Type * r() {
+        return NRF_P0;
+    }
+};
+struct __generated_struct_NRF_P1 {
+    FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static NRF_GPIO_Type * r() {
+        return NRF_P1;
+    }
+};
+
+
+// The actual class template can then use a typename, for what is essentially a constexpr NRF_GPIO_Type*
+template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUMBER> class _ARMPIN  {
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       setOutput() {
+    // OK for this to be more than one instruction, as unusual to quickly switch input/output modes
+    nrf_gpio_cfg(
+        nrf_pin(),
+        NRF_GPIO_PIN_DIR_OUTPUT,        // set pin as output
+        NRF_GPIO_PIN_INPUT_DISCONNECT,  // disconnect the input buffering
+        NRF_GPIO_PIN_NOPULL,            // neither pull-up nor pull-down resistors enabled
+        NRF_GPIO_PIN_H0H1,              // high drive mode required for faster speeds
+        NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
+        );
+  }
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       setInput()  {
+    // OK for this to be more than one instruction, as unusual to quickly switch input/output modes
+    nrf_gpio_cfg(
+        nrf_pin(),
+        NRF_GPIO_PIN_DIR_INPUT,         // set pin as input
+        NRF_GPIO_PIN_INPUT_DISCONNECT,  // disconnect the input buffering
+        NRF_GPIO_PIN_NOPULL,            // neither pull-up nor pull-down resistors enabled
+        NRF_GPIO_PIN_H0H1,              // high drive mode required for faster speeds
+        NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
+        );
+  }
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       hi()        { _PORT::r()->OUTSET = _MASK;            } // sets _MASK in the SET   OUTPUT register (output set high)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       lo()        { _PORT::r()->OUTCLR = _MASK;            } // sets _MASK in the CLEAR OUTPUT register (output set low)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       toggle()    { _PORT::r()->OUT ^= _MASK;              } // toggles _MASK bits in the OUTPUT GPIO port directly
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       strobe()    { toggle();     toggle();                } // BUGBUG -- Is this used by FastLED?  Without knowing (for example) SPI Speed?
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     hival()     { return _PORT::r()->OUT | _MASK;        } // sets all _MASK bit(s) in the OUTPUT GPIO port to 1
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     loval()     { return _PORT::r()->OUT & ~_MASK;       } // sets all _MASK bit(s) in the OUTPUT GPIO port to 0
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t port()      { return &(_PORT::r()->OUT);             } // gets raw pointer to OUTPUT          GPIO port
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t cport()     { return &(_PORT::r()->OUTCLR);          } // gets raw pointer to SET   DIRECTION GPIO port
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t sport()     { return &(_PORT::r()->OUTSET);          } // gets raw pointer to CLEAR DIRECTION GPIO port
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     mask()      { return _MASK;                          } // gets the value of _MASK
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void hi (register port_ptr_t port) { hi();                      } // sets _MASK in the SET   OUTPUT register (output set high)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void lo (register port_ptr_t port) { lo();                      } // sets _MASK in the CLEAR OUTPUT register (output set low)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void set(register port_t     val ) { _PORT::r()->OUT = val;     } // sets entire port's value (optimization used by FastLED)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void fastset(register port_ptr_t port, register port_t val) { *port = val; }
+  constexpr                      static uint32_t   nrf_pin2() { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
+  constexpr                      static bool       LowSpeedOnlyRecommended() {
+    // only allow one function body.
+    #undef _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT
+
+    // unique cases for each board / processor package / module?
+    #if defined(NRF52810_XXAA) && defined(NRF52810_PACKAGE_QFN48)
+        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
+            #error "Multiple board match"
+        #endif
+        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
+        static_assert(_PORT_NUMBER == 0, "nRF52810 only has one port");
+        return (
+            (_PIN_NUMBER == 25) ||
+            (_PIN_NUMBER == 26) ||
+            (_PIN_NUMBER == 27) ||
+            (_PIN_NUMBER == 28) ||
+            (_PIN_NUMBER == 29)
+            );
+    #endif
+    #if defined(NRF52810_XXAA) && defined(NRF52810_PACKAGE_QFN32)
+        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
+            #error "Multiple board match"
+        #endif
+        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
+        static_assert(_PORT_NUMBER == 0, "nRF52810 only has one port");
+        if (_PORT_NUMBER == 0) {
+            if (
+                (_PIN_NUMBER == 26) ||
+                (_PIN_NUMBER == 27)
+                ) {
+                return true;
+            }
+        }
+        return false;
+    #endif
+    #if defined(NRF52832_XXAA) || defined(NRF52832_XXAB)
+        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
+            #error "Multiple board match"
+        #endif
+        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
+        static_assert(_PORT_NUMBER == 0, "nRF52832 only has one port");
+        // data sheets shows the same pins in both QFN48 and WLCSP package
+        // are recommended as low-speed only:
+        return (
+            (_PIN_NUMBER == 22) ||
+            (_PIN_NUMBER == 23) ||
+            (_PIN_NUMBER == 24) ||
+            (_PIN_NUMBER == 25) ||
+            (_PIN_NUMBER == 26) ||
+            (_PIN_NUMBER == 27) ||
+            (_PIN_NUMBER == 28) ||
+            (_PIN_NUMBER == 29) ||
+            (_PIN_NUMBER == 30) ||
+            (_PIN_NUMBER == 31)
+            );
+    #endif
+    #if defined(NRF52840_XXAA) && defined(NRF52840_PACKAGE_aQFN73)
+        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
+            #error "Multiple board match"
+        #endif
+        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
+        static_assert(_PORT_NUMBER == 0 || _PORT_NUMBER == 1, "nRF52840 only has two ports");
+        return
+            (
+                (
+                    (_PORT_NUMBER == 0) &&
+                    (
+                        (_PIN_NUMBER ==  2) ||
+                        (_PIN_NUMBER ==  3) ||
+                        (_PIN_NUMBER ==  9) ||
+                        (_PIN_NUMBER == 10) ||
+                        (_PIN_NUMBER == 11) ||
+                        (_PIN_NUMBER == 12) ||
+                        (_PIN_NUMBER == 14) ||
+                        (_PIN_NUMBER == 28) ||
+                        (_PIN_NUMBER == 29) ||
+                        (_PIN_NUMBER == 30) ||
+                        (_PIN_NUMBER == 31)
+                    )
+                )
+                ||
+                (
+                    (_PORT_NUMBER == 1) &&
+                    (
+                        (_PIN_NUMBER ==  2) ||
+                        (_PIN_NUMBER ==  3) ||
+                        (_PIN_NUMBER ==  4) ||
+                        (_PIN_NUMBER ==  5) ||
+                        (_PIN_NUMBER ==  6) ||
+                        (_PIN_NUMBER ==  7) ||
+                        (_PIN_NUMBER == 10) ||
+                        (_PIN_NUMBER == 13) ||
+                        (_PIN_NUMBER == 15)
+                    )
+                )
+            );
+    #endif
+    #if false && defined(NRF52840_XXAA) && (defined(NRF52840_PACKAGE_aQFN73) || defined(ARDUINO_NRF52840_FEATHER))
+        // Adafruit nRF52840 feather uses RAYTAC MDBT50Q module, which is aQFN73
+        // See https://cdn-learn.adafruit.com/assets/assets/000/068/544/original/Raytac_MDBT50Q.pdf
+        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
+            #error "Multiple board match"
+        #endif
+        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
+        static_assert(_PORT_NUMBER == 0 || _PORT_NUMBER == 1, "nRF52840 only has two ports");
+        return
+            (
+                (
+                    (_PORT_NUMBER == 0) &&
+                    (
+                        (_PIN_NUMBER ==  2) ||
+                        (_PIN_NUMBER ==  3) ||
+                        (_PIN_NUMBER ==  9) ||
+                        (_PIN_NUMBER == 10) ||
+                        (_PIN_NUMBER == 28) ||
+                        (_PIN_NUMBER == 29) ||
+                        (_PIN_NUMBER == 30) ||
+                        (_PIN_NUMBER == 31)
+                    )
+                )
+                ||
+                (
+                    (_PORT_NUMBER == 1) &&
+                    (
+                        (_PIN_NUMBER ==  1) ||
+                        (_PIN_NUMBER ==  2) ||
+                        (_PIN_NUMBER ==  3) ||
+                        (_PIN_NUMBER ==  4) ||
+                        (_PIN_NUMBER ==  5) ||
+                        (_PIN_NUMBER ==  6) ||
+                        (_PIN_NUMBER ==  7) ||
+                        (_PIN_NUMBER == 10) ||
+                        (_PIN_NUMBER == 11) ||
+                        (_PIN_NUMBER == 12) ||
+                        (_PIN_NUMBER == 13) ||
+                        (_PIN_NUMBER == 14) ||
+                        (_PIN_NUMBER == 15)
+                    )
+                )
+            );
+    #endif
+    #if !defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
+        #warning "Unknown board / package, ... caller must pins support high-speed"
+        return false; // choosing default to be FALSE, to allow users to ATTEMPT to use high-speed on pins where support is not known
+    #endif
+  }
+  // Expose the nrf pin (port/pin combined), port, and pin as properties (e.g., for setting up SPI)
+
+  FASTLED_NRF52_INLINE_ATTRIBUTE static uint32_t   nrf_pin()  { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
+};
+
+//
+// BOARD_PIN can be either the pin portion of a port.pin, or the combined NRF_GPIO_PIN_MAP() number.
+// For example both the following two defines refer to P1.15 (pin 47) as Arduino pin 3:
+//     _DEFPIN_ARM(3, 1, 15);
+//     _DEFPIN_ARM(3, 1, 47);
+//
+// Similarly, the following defines are all equivalent:
+//     _DEFPIN_ARM_IDENTITY_P1(47);
+//     _DEFPIN_ARM(47, 1, 15);
+//     _DEFPIN_ARM(47, 1, 47);
+//
+
+#define _DEFPIN_ARM(ARDUINO_PIN, BOARD_PORT, BOARD_PIN)  \
+    template<> class FastPin<ARDUINO_PIN> :              \
+    public _ARMPIN<                                      \
+        1u << (BOARD_PIN & 31u),                         \
+        __generated_struct_NRF_P ## BOARD_PORT,          \
+        (BOARD_PIN / 32),                                \
+        BOARD_PIN & 31u                                  \
+        >                                                \
+    {}
+
+#define _DEFPIN_ARM_IDENTITY_P0(ARDUINO_PIN)      \
+    template<> class FastPin<ARDUINO_PIN> :       \
+    public _ARMPIN<                               \
+        1u << (ARDUINO_PIN & 31u),                \
+        __generated_struct_NRF_P0,                \
+        0,                                        \
+        (ARDUINO_PIN & 31u) + 0                   \
+        >                                         \
+    {}
+
+#define _DEFPIN_ARM_IDENTITY_P1(ARDUINO_PIN)      \
+    template<> class FastPin<ARDUINO_PIN> :       \
+    public _ARMPIN<                               \
+        1u << (ARDUINO_PIN & 31u),                \
+        __generated_struct_NRF_P1,                \
+        1,                                        \
+        (ARDUINO_PIN & 31u) + 32                  \
+        >                                         \
+    {}
+
+// The actual pin definitions are in a separate header file...
+#include "fastpin_arm_nrf52_variants.h"
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#endif // #ifndef __FASTPIN_ARM_NRF52_H
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
new file mode 100644
index 0000000000..b3b9ff9935
--- /dev/null
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -0,0 +1,579 @@
+#ifndef __FASTPIN_ARM_NRF52_VARIANTS_H
+#define __FASTPIN_ARM_NRF52_VARIANTS_H
+
+// use this to determine if found variant or not (avoid multiple boards at once)
+#undef __FASTPIN_ARM_NRF52_VARIANT_FOUND
+
+// Adafruit Bluefruit nRF52832 Feather
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52832_FEATHER) 
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Adafruit Bluefruit nRF52832 Feather is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    _DEFPIN_ARM_IDENTITY_P0( 0); // xtal 1
+    _DEFPIN_ARM_IDENTITY_P0( 1); // xtal 2
+    _DEFPIN_ARM_IDENTITY_P0( 2); // a0
+    _DEFPIN_ARM_IDENTITY_P0( 3); // a1
+    _DEFPIN_ARM_IDENTITY_P0( 4); // a2
+    _DEFPIN_ARM_IDENTITY_P0( 5); // a3
+    _DEFPIN_ARM_IDENTITY_P0( 6); // TXD
+    _DEFPIN_ARM_IDENTITY_P0( 7); // GPIO #7
+    _DEFPIN_ARM_IDENTITY_P0( 8); // RXD
+    _DEFPIN_ARM_IDENTITY_P0( 9); // NFC1
+    _DEFPIN_ARM_IDENTITY_P0(10); // NFC2
+    _DEFPIN_ARM_IDENTITY_P0(11); // GPIO #11
+    _DEFPIN_ARM_IDENTITY_P0(12); // SCK
+    _DEFPIN_ARM_IDENTITY_P0(13); // MOSI
+    _DEFPIN_ARM_IDENTITY_P0(14); // MISO
+    _DEFPIN_ARM_IDENTITY_P0(15); // GPIO #15
+    _DEFPIN_ARM_IDENTITY_P0(16); // GPIO #16
+    _DEFPIN_ARM_IDENTITY_P0(17); // LED #1 (red)
+    _DEFPIN_ARM_IDENTITY_P0(18); // SWO
+    _DEFPIN_ARM_IDENTITY_P0(19); // LED #2 (blue)
+    _DEFPIN_ARM_IDENTITY_P0(20); // DFU
+    // _DEFPIN_ARM_IDENTITY_P0(21); // Reset -- not valid to use for FastLED?
+    // _DEFPIN_ARM_IDENTITY_P0(22); // Factory Reset -- not vaild to use for FastLED?
+    // _DEFPIN_ARM_IDENTITY_P0(23); // N/A
+    // _DEFPIN_ARM_IDENTITY_P0(24); // N/A
+    _DEFPIN_ARM_IDENTITY_P0(25); // SDA
+    _DEFPIN_ARM_IDENTITY_P0(26); // SCL
+    _DEFPIN_ARM_IDENTITY_P0(27); // GPIO #27
+    _DEFPIN_ARM_IDENTITY_P0(28); // A4
+    _DEFPIN_ARM_IDENTITY_P0(29); // A5
+    _DEFPIN_ARM_IDENTITY_P0(30); // A6
+    _DEFPIN_ARM_IDENTITY_P0(31); // A7
+#endif // defined (ARDUINO_NRF52832_FEATHER) 
+
+// Adafruit Bluefruit nRF52840 Feather Express
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52840_FEATHER)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+
+    #define MAX_PIN (33u) // 34 if wanting to use NFC1 test point
+
+    // Arduino pins 0..7
+    _DEFPIN_ARM( 0, 0, 25); // D0  is P0.25 -- UART TX
+    //_DEFPIN_ARM( 1, 0, 24); // D1  is P0.24 -- UART RX
+    _DEFPIN_ARM( 2, 0, 10); // D2  is P0.10 -- NFC2
+    _DEFPIN_ARM( 3, 1, 47); // D3  is P1.15 -- PIN_LED1 (red)
+    _DEFPIN_ARM( 4, 1, 42); // D4  is P1.10 -- PIN_LED2 (blue)
+    _DEFPIN_ARM( 5, 1, 40); // D5  is P1.08 -- SPI/SS
+    _DEFPIN_ARM( 6, 0,  7); // D6  is P0.07
+    _DEFPIN_ARM( 7, 1, 34); // D7  is P1.02 -- PIN_DFU (Button)
+    
+    // Arduino pins 8..15
+    _DEFPIN_ARM( 8, 0, 16); // D8  is P0.16 -- PIN_NEOPIXEL
+    _DEFPIN_ARM( 9, 0, 26); // D9  is P0.26
+    _DEFPIN_ARM(10, 0, 27); // D10 is P0.27
+    _DEFPIN_ARM(11, 0,  6); // D11 is P0.06
+    _DEFPIN_ARM(12, 0,  8); // D12 is P0.08
+    _DEFPIN_ARM(13, 1, 41); // D13 is P1.09
+    _DEFPIN_ARM(14, 0,  4); // D14 is P0.04 -- A0
+    _DEFPIN_ARM(15, 0,  5); // D15 is P0.05 -- A1
+
+    // Arduino pins 16..23
+    _DEFPIN_ARM(16, 0, 30); // D16 is P0.30 -- A2
+    _DEFPIN_ARM(17, 0, 28); // D17 is P0.28 -- A3
+    _DEFPIN_ARM(18, 0,  2); // D18 is P0.02 -- A4
+    _DEFPIN_ARM(19, 0,  3); // D19 is P0.03 -- A5
+    //_DEFPIN_ARM(20, 0, 29); // D20 is P0.29 -- A6 -- Connected to battery!
+    //_DEFPIN_ARM(21, 0, 31); // D21 is P0.31 -- A7 -- AREF
+    _DEFPIN_ARM(22, 0, 12); // D22 is P0.12 -- SDA
+    _DEFPIN_ARM(23, 0, 11); // D23 is P0.11 -- SCL
+
+    // Arduino pins 24..31
+    _DEFPIN_ARM(24, 0, 15); // D24 is P0.15 -- PIN_SPI_MISO
+    _DEFPIN_ARM(25, 0, 13); // D25 is P0.13 -- PIN_SPI_MOSI
+    _DEFPIN_ARM(26, 0, 14); // D26 is P0.14 -- PIN_SPI_SCK
+    //_DEFPIN_ARM(27, 0, 19); // D27 is P0.19 -- PIN_QSPI_SCK
+    //_DEFPIN_ARM(28, 0, 20); // D28 is P0.20 -- PIN_QSPI_CS
+    //_DEFPIN_ARM(29, 0, 17); // D29 is P0.17 -- PIN_QSPI_DATA0
+    //_DEFPIN_ARM(30, 0, 22); // D30 is P0.22 -- PIN_QSPI_DATA1
+    //_DEFPIN_ARM(31, 0, 23); // D31 is P0.23 -- PIN_QSPI_DATA2
+
+    // Arduino pins 32..34
+    //_DEFPIN_ARM(32, 0, 21); // D32 is P0.21 -- PIN_QSPI_DATA3
+    //_DEFPIN_ARM(33, 0,  9); // D33 is NFC1, only accessible via test point
+#endif // defined (ARDUINO_NRF52840_FEATHER)
+
+// Adafruit Bluefruit nRF52840 Metro Express
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52840_METRO)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Adafruit Bluefruit nRF52840 Metro Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+
+    _DEFPIN_ARM( 0, 0, 25); // D0  is P0.25 (UART TX)
+    _DEFPIN_ARM( 1, 0, 24); // D1  is P0.24 (UART RX)
+    _DEFPIN_ARM( 2, 1, 10); // D2  is P1.10 
+    _DEFPIN_ARM( 3, 1,  4); // D3  is P1.04 
+    _DEFPIN_ARM( 4, 1, 11); // D4  is P1.11 
+    _DEFPIN_ARM( 5, 1, 12); // D5  is P1.12 
+    _DEFPIN_ARM( 6, 1, 14); // D6  is P1.14
+    _DEFPIN_ARM( 7, 0, 26); // D7  is P0.26
+    _DEFPIN_ARM( 8, 0, 27); // D8  is P0.27
+    _DEFPIN_ARM( 9, 0, 12); // D9  is P0.12
+    _DEFPIN_ARM(10, 0,  6); // D10 is P0.06 
+    _DEFPIN_ARM(11, 0,  8); // D11 is P0.08 
+    _DEFPIN_ARM(12, 1,  9); // D12 is P1.09 
+    _DEFPIN_ARM(13, 0, 14); // D13 is P0.14 
+    _DEFPIN_ARM(14, 0,  4); // D14 is P0.04 (A0)
+    _DEFPIN_ARM(15, 0,  5); // D15 is P0.05 (A1)
+    _DEFPIN_ARM(16, 0, 28); // D16 is P0.28 (A2)
+    _DEFPIN_ARM(17, 0, 30); // D17 is P0.30 (A3)
+    _DEFPIN_ARM(18, 0,  2); // D18 is P0.02 (A4)
+    _DEFPIN_ARM(19, 0,  3); // D19 is P0.03 (A5)
+    _DEFPIN_ARM(20, 0, 29); // D20 is P0.29 (A6, battery)
+    _DEFPIN_ARM(21, 0, 31); // D21 is P0.31 (A7, ARef)
+    _DEFPIN_ARM(22, 0, 15); // D22 is P0.15 (SDA)
+    _DEFPIN_ARM(23, 0, 16); // D23 is P0.16 (SCL)
+    _DEFPIN_ARM(24, 0, 11); // D24 is P0.11 (SPI MISO)
+    _DEFPIN_ARM(25, 1,  8); // D25 is P1.08 (SPI MOSI)
+    _DEFPIN_ARM(26, 0,  7); // D26 is P0.07 (SPI SCK )
+    //_DEFPIN_ARM(27, 0, 19); // D27 is P0.19 (QSPI CLK   )
+    //_DEFPIN_ARM(28, 0, 20); // D28 is P0.20 (QSPI CS    )
+    //_DEFPIN_ARM(29, 0, 17); // D29 is P0.17 (QSPI Data 0)
+    //_DEFPIN_ARM(30, 0, 23); // D30 is P0.23 (QSPI Data 1)
+    //_DEFPIN_ARM(31, 0, 22); // D31 is P0.22 (QSPI Data 2)
+    //_DEFPIN_ARM(32, 0, 21); // D32 is P0.21 (QSPI Data 3)
+    _DEFPIN_ARM(33, 1, 13); // D33 is P1.13 LED1
+    _DEFPIN_ARM(34, 1, 15); // D34 is P1.15 LED2
+    _DEFPIN_ARM(35, 0, 13); // D35 is P0.13 NeoPixel
+    _DEFPIN_ARM(36, 1,  0); // D36 is P1.02 Switch
+    _DEFPIN_ARM(37, 1,  0); // D37 is P1.00 SWO/DFU
+    _DEFPIN_ARM(38, 0,  9); // D38 is P0.09 NFC1
+    _DEFPIN_ARM(39, 0, 10); // D39 is P0.10 NFC2
+#endif // defined (ARDUINO_NRF52840_METRO)
+
+// Adafruit Bluefruit on nRF52840DK PCA10056
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52840_PCA10056)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Adafruit Bluefruit on nRF52840DK PCA10056 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    
+    #if defined(USE_ARDUINO_PIN_NUMBERING)
+        /* pca10056_schematic_and_pcb.pdf
+           Page 3 shows the Arduino Pin to GPIO Px.xx mapping
+        */
+        _DEFPIN_ARM( 0, 1,  1); // D0  is P1.01 
+        _DEFPIN_ARM( 1, 1,  2); // D1  is P1.02 
+        _DEFPIN_ARM( 2, 1,  3); // D2  is P1.03
+        _DEFPIN_ARM( 3, 1,  4); // D3  is P1.04 
+        _DEFPIN_ARM( 4, 1,  5); // D4  is P1.05 
+        _DEFPIN_ARM( 5, 1,  6); // D5  is P1.06 
+        _DEFPIN_ARM( 6, 1,  7); // D6  is P1.07 (BUTTON1 option)
+        _DEFPIN_ARM( 7, 1,  8); // D7  is P1.08 (BUTTON2 option)
+        _DEFPIN_ARM( 8, 1, 10); // D8  is P1.10 
+        _DEFPIN_ARM( 9, 1, 11); // D9  is P1.11 
+        _DEFPIN_ARM(10, 1, 12); // D10 is P1.12 
+        _DEFPIN_ARM(11, 1, 13); // D11 is P1.13 
+        _DEFPIN_ARM(12, 1, 14); // D12 is P1.14
+        _DEFPIN_ARM(13, 1, 15); // D13 is P1.15 
+        _DEFPIN_ARM(14, 0,  0); // D14 is P0.00 (if SB4 bridged)
+        _DEFPIN_ARM(15, 0,  1); // D15 is P0.01 (if SB3 bridged)
+        _DEFPIN_ARM(16, 0,  5); // D16 is P0.05 (aka AIN3, aka UART RTS)
+        _DEFPIN_ARM(17, 0,  6); // D17 is P0.06 (UART TxD)
+        _DEFPIN_ARM(18, 0,  7); // D18 is P0.07 (UART CTS default)
+        _DEFPIN_ARM(19, 0,  8); // D19 is P0.08 (UART RxD)
+        _DEFPIN_ARM(20, 0,  9); // D20 is P0.09 (NFC1)
+        _DEFPIN_ARM(21, 0, 10); // D21 is P0.10 (NFC2)
+        _DEFPIN_ARM(22, 0, 11); // D22 is P0.11 (TRACEDATA2 / BUTTON1 default)
+        _DEFPIN_ARM(23, 0, 12); // D23 is P0.12 (TRACEDATA1 / BUTTON2 default)
+        _DEFPIN_ARM(24, 0, 13); // D24 is P0.13 (LED1)
+        _DEFPIN_ARM(25, 0, 14); // D25 is P0.14 (LED2)
+        _DEFPIN_ARM(26, 0, 15); // D26 is P0.15 (LED3)
+        _DEFPIN_ARM(27, 0, 16); // D27 is P0.16 (LED4)
+        _DEFPIN_ARM(28, 0, 17); // D28 is P0.17 (QSPI !CS , unless SB13 cut)
+        // _DEFPIN_ARM(29, 0, 18); // D29 is P0.18 (RESET)
+        _DEFPIN_ARM(30, 0, 19); // D30 is P0.19 (QSPI CLK , unless SB11 cut)
+        _DEFPIN_ARM(31, 0, 20); // D31 is P0.20 (QSPI DIO0, unless SB12 cut)
+        _DEFPIN_ARM(32, 0, 21); // D32 is P0.21 (QSPI DIO1, unless SB14 cut)
+        _DEFPIN_ARM(33, 0, 22); // D33 is P0.22 (QSPI DIO2, unless SB15 cut)
+        _DEFPIN_ARM(34, 0, 23); // D34 is P0.23 (QSPI DIO3, unless SB10 cut)
+        _DEFPIN_ARM(35, 0, 24); // D35 is P0.24 (BUTTON3)
+        _DEFPIN_ARM(36, 0, 25); // D36 is P0.25 (BUTTON4)
+        _DEFPIN_ARM(37, 1, 00); // D37 is P1.00 (TRACEDATA0 / SWO)
+        _DEFPIN_ARM(38, 1, 09); // D38 is P1.09 (TRACEDATA3)
+        //_DEFPIN_ARM(??, 0,  2); // D?? is P0.02 (AREF, aka AIN0)
+        //_DEFPIN_ARM(??, 0,  3); // D?? is P0.03 (A0,   aka AIN1)
+        //_DEFPIN_ARM(??, 0,  4); // D?? is P0.04 (A1,   aka AIN2, aka UART CTS option)
+        //_DEFPIN_ARM(??, 0, 28); // D?? is P0.28 (A2,   aka AIN4)
+        //_DEFPIN_ARM(??, 0, 29); // D?? is P0.29 (A3,   aka AIN5)
+        //_DEFPIN_ARM(??, 0, 30); // D?? is P0.30 (A4,   aka AIN6)
+        //_DEFPIN_ARM(??, 0, 31); // D?? is P0.31 (A5,   aka AIN7)
+
+    #else
+        /* 48 pins, defined using natural mapping in Adafruit's variant.cpp (!) */
+        _DEFPIN_ARM_IDENTITY_P0( 0); // P0.00 (XL1 .. ensure SB4 bridged, SB2 cut)
+        _DEFPIN_ARM_IDENTITY_P0( 1); // P0.01 (XL2 .. ensure SB3 bridged, SB1 cut)
+        _DEFPIN_ARM_IDENTITY_P0( 2); // P0.02 (AIN0)
+        _DEFPIN_ARM_IDENTITY_P0( 3); // P0.03 (AIN1)
+        _DEFPIN_ARM_IDENTITY_P0( 4); // P0.04 (AIN2 / UART CTS option)
+        _DEFPIN_ARM_IDENTITY_P0( 5); // P0.05 (AIN3 / UART RTS)
+        _DEFPIN_ARM_IDENTITY_P0( 6); // P0.06 (UART TxD)
+        _DEFPIN_ARM_IDENTITY_P0( 7); // P0.07 (TRACECLK / UART CTS default)
+        _DEFPIN_ARM_IDENTITY_P0( 8); // P0.08 (UART RxD)
+        _DEFPIN_ARM_IDENTITY_P0( 9); // P0.09 (NFC1)
+        _DEFPIN_ARM_IDENTITY_P0(10); // P0.10 (NFC2)
+        _DEFPIN_ARM_IDENTITY_P0(11); // P0.11 (TRACEDATA2 / BUTTON1 default)
+        _DEFPIN_ARM_IDENTITY_P0(12); // P0.12 (TRACEDATA1 / BUTTON2 default)
+        _DEFPIN_ARM_IDENTITY_P0(13); // P0.13 (LED1)
+        _DEFPIN_ARM_IDENTITY_P0(14); // P0.14 (LED2)
+        _DEFPIN_ARM_IDENTITY_P0(15); // P0.15 (LED3)
+        _DEFPIN_ARM_IDENTITY_P0(16); // P0.16 (LED4)
+        //_DEFPIN_ARM_IDENTITY_P0(17); // P0.17 (QSPI !CS )
+        //_DEFPIN_ARM_IDENTITY_P0(18); // P0.18 (RESET)
+        //_DEFPIN_ARM_IDENTITY_P0(19); // P0.19 (QSPI CLK )
+        //_DEFPIN_ARM_IDENTITY_P0(20); // P0.20 (QSPI DIO0)
+        //_DEFPIN_ARM_IDENTITY_P0(21); // P0.21 (QSPI DIO1)
+        //_DEFPIN_ARM_IDENTITY_P0(22); // P0.22 (QSPI DIO2)
+        //_DEFPIN_ARM_IDENTITY_P0(23); // P0.23 (QSPI DIO3)
+        _DEFPIN_ARM_IDENTITY_P0(24); // P0.24 (BUTTON3)
+        _DEFPIN_ARM_IDENTITY_P0(25); // P0.25 (BUTTON4)
+        _DEFPIN_ARM_IDENTITY_P0(26); // P0.26
+        _DEFPIN_ARM_IDENTITY_P0(27); // P0.27
+        _DEFPIN_ARM_IDENTITY_P0(28); // P0.28 (AIN4)
+        _DEFPIN_ARM_IDENTITY_P0(29); // P0.29 (AIN5)
+        _DEFPIN_ARM_IDENTITY_P0(30); // P0.30 (AIN6)
+        _DEFPIN_ARM_IDENTITY_P0(31); // P0.31 (AIN7)
+        _DEFPIN_ARM_IDENTITY_P0(32); // P1.00 (SWO / TRACEDATA0)
+        _DEFPIN_ARM_IDENTITY_P0(33); // P1.01 
+        _DEFPIN_ARM_IDENTITY_P0(34); // P1.02
+        _DEFPIN_ARM_IDENTITY_P0(35); // P1.03
+        _DEFPIN_ARM_IDENTITY_P0(36); // P1.04
+        _DEFPIN_ARM_IDENTITY_P0(37); // P1.05
+        _DEFPIN_ARM_IDENTITY_P0(38); // P1.06
+        _DEFPIN_ARM_IDENTITY_P0(39); // P1.07 (BUTTON1 option)
+        _DEFPIN_ARM_IDENTITY_P0(40); // P1.08 (BUTTON2 option)
+        _DEFPIN_ARM_IDENTITY_P0(41); // P1.09 (TRACEDATA3)
+        _DEFPIN_ARM_IDENTITY_P0(42); // P1.10
+        _DEFPIN_ARM_IDENTITY_P0(43); // P1.11
+        _DEFPIN_ARM_IDENTITY_P0(44); // P1.12
+        _DEFPIN_ARM_IDENTITY_P0(45); // P1.13
+        _DEFPIN_ARM_IDENTITY_P0(46); // P1.14
+        _DEFPIN_ARM_IDENTITY_P0(47); // P1.15
+    #endif
+#endif // defined (ARDUINO_NRF52840_PCA10056)
+
+// Electronut labs bluey
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/bluey/variant.cpp
+#if defined(ARDUINO_ELECTRONUT_BLUEY)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Electronut labs bluey is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+
+    _DEFPIN_ARM( 0, 0, 26); // D0  is P0.26
+    _DEFPIN_ARM( 1, 0, 27); // D1  is P0.27
+    _DEFPIN_ARM( 2, 0, 22); // D2  is P0.22 (SPI SS  )
+    _DEFPIN_ARM( 3, 0, 23); // D3  is P0.23 (SPI MOSI)
+    _DEFPIN_ARM( 4, 0, 24); // D4  is P0.24 (SPI MISO, also A3)
+    _DEFPIN_ARM( 5, 0, 25); // D5  is P0.25 (SPI SCK )
+    _DEFPIN_ARM( 6, 0, 16); // D6  is P0.16 (Button)
+    _DEFPIN_ARM( 7, 0, 19); // D7  is P0.19 (R)
+    _DEFPIN_ARM( 8, 0, 18); // D8  is P0.18 (G)
+    _DEFPIN_ARM( 9, 0, 17); // D9  is P0.17 (B)
+    _DEFPIN_ARM(10, 0, 11); // D10 is P0.11 (SCL)
+    _DEFPIN_ARM(11, 0, 12); // D11 is P0.12 (DRDYn)
+    _DEFPIN_ARM(12, 0, 13); // D12 is P0.13 (SDA)
+    _DEFPIN_ARM(13, 0, 14); // D13 is P0.17 (INT)
+    _DEFPIN_ARM(14, 0, 15); // D14 is P0.15 (INT1)
+    _DEFPIN_ARM(15, 0, 20); // D15 is P0.20 (INT2)
+    _DEFPIN_ARM(16, 0,  2); // D16 is P0.02 (A0)
+    _DEFPIN_ARM(17, 0,  3); // D17 is P0.03 (A1)
+    _DEFPIN_ARM(18, 0,  4); // D18 is P0.04 (A2)
+    _DEFPIN_ARM(19, 0, 24); // D19 is P0.24 (A3, also D4/SPI MISO) -- is this right?
+    _DEFPIN_ARM(20, 0, 29); // D20 is P0.29 (A4)
+    _DEFPIN_ARM(21, 0, 30); // D21 is P0.30 (A5)
+    _DEFPIN_ARM(22, 0, 31); // D22 is P0.31 (A6)
+    _DEFPIN_ARM(23, 0,  8); // D23 is P0.08 (RX)
+    _DEFPIN_ARM(24, 0,  6); // D24 is P0.06 (TX)
+    _DEFPIN_ARM(25, 0,  5); // D25 is P0.05 (RTS)
+    _DEFPIN_ARM(26, 0,  7); // D26 is P0.07 (CTS)
+#endif // defined(ARDUINO_ELECTRONUT_BLUEY)
+
+// Electronut labs hackaBLE
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/hackaBLE/variant.cpp
+#if defined(ARDUINO_ELECTRONUT_HACKABLE)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Electronut labs hackaBLE is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    _DEFPIN_ARM( 0, 0, 14); // D0  is P0.14 (RX)
+    _DEFPIN_ARM( 1, 0, 13); // D1  is P0.13 (TX)
+    _DEFPIN_ARM( 2, 0, 12); // D2  is P0.12
+    _DEFPIN_ARM( 3, 0, 11); // D3  is P0.11 (SPI MOSI)
+    _DEFPIN_ARM( 4, 0,  8); // D4  is P0.08 (SPI MISO)
+    _DEFPIN_ARM( 5, 0,  7); // D5  is P0.07 (SPI SCK )
+    _DEFPIN_ARM( 6, 0,  6); // D6  is P0.06
+    _DEFPIN_ARM( 7, 0, 27); // D7  is P0.27
+    _DEFPIN_ARM( 8, 0, 26); // D8  is P0.26
+    _DEFPIN_ARM( 9, 0, 25); // D9  is P0.25
+    _DEFPIN_ARM(10, 0,  5); // D10 is P0.05 (A3)
+    _DEFPIN_ARM(11, 0,  4); // D11 is P0.04 (A2)
+    _DEFPIN_ARM(12, 0,  3); // D12 is P0.03 (A1)
+    _DEFPIN_ARM(13, 0,  2); // D13 is P0.02 (A0 / AREF)
+    _DEFPIN_ARM(14, 0, 23); // D14 is P0.23
+    _DEFPIN_ARM(15, 0, 22); // D15 is P0.22
+    _DEFPIN_ARM(16, 0, 18); // D16 is P0.18
+    _DEFPIN_ARM(17, 0, 16); // D17 is P0.16
+    _DEFPIN_ARM(18, 0, 15); // D18 is P0.15
+    _DEFPIN_ARM(19, 0, 24); // D19 is P0.24
+    _DEFPIN_ARM(20, 0, 28); // D20 is P0.28 (A4)
+    _DEFPIN_ARM(21, 0, 29); // D21 is P0.29 (A5)
+    _DEFPIN_ARM(22, 0, 30); // D22 is P0.30 (A6)
+    _DEFPIN_ARM(23, 0, 31); // D23 is P0.31 (A7)
+    _DEFPIN_ARM(24, 0, 19); // D24 is P0.19 (RED LED)
+    _DEFPIN_ARM(25, 0, 20); // D25 is P0.20 (GREEN LED)
+    _DEFPIN_ARM(26, 0, 17); // D26 is P0.17 (BLUE LED)
+#endif // defined(ARDUINO_ELECTRONUT_HACKABLE)
+
+// Electronut labs hackaBLE_v2
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/hackaBLE_v2/variant.cpp
+// (32 pins, natural mapping)
+#if defined(ARDUINO_ELECTRONUT_hackaBLE_v2)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Electronut labs hackaBLE_v2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    _DEFPIN_ARM_IDENTITY_P0( 0); // P0.00
+    _DEFPIN_ARM_IDENTITY_P0( 1); // P0.01
+    _DEFPIN_ARM_IDENTITY_P0( 2); // P0.02 (A0 / SDA / AREF)
+    _DEFPIN_ARM_IDENTITY_P0( 3); // P0.03 (A1 / SCL )
+    _DEFPIN_ARM_IDENTITY_P0( 4); // P0.04 (A2)
+    _DEFPIN_ARM_IDENTITY_P0( 5); // P0.05 (A3)
+    _DEFPIN_ARM_IDENTITY_P0( 6); // P0.06
+    _DEFPIN_ARM_IDENTITY_P0( 7); // P0.07 (RX)
+    _DEFPIN_ARM_IDENTITY_P0( 8); // P0.08 (TX)
+    _DEFPIN_ARM_IDENTITY_P0( 9); // P0.09
+    _DEFPIN_ARM_IDENTITY_P0(10); // P0.10
+    _DEFPIN_ARM_IDENTITY_P0(11); // P0.11 (SPI MISO)
+    _DEFPIN_ARM_IDENTITY_P0(12); // P0.12 (SPI MOSI)
+    _DEFPIN_ARM_IDENTITY_P0(13); // P0.13 (SPI SCK )
+    _DEFPIN_ARM_IDENTITY_P0(14); // P0.14 (SPI SS  )
+    _DEFPIN_ARM_IDENTITY_P0(15); // P0.15
+    _DEFPIN_ARM_IDENTITY_P0(16); // P0.16
+    _DEFPIN_ARM_IDENTITY_P0(17); // P0.17 (BLUE LED)
+    _DEFPIN_ARM_IDENTITY_P0(18); // P0.18
+    _DEFPIN_ARM_IDENTITY_P0(19); // P0.19 (RED LED)
+    _DEFPIN_ARM_IDENTITY_P0(20); // P0.20 (GREEN LED)
+    // _DEFPIN_ARM_IDENTITY_P0(21); // P0.21 (RESET)
+    _DEFPIN_ARM_IDENTITY_P0(22); // P0.22
+    _DEFPIN_ARM_IDENTITY_P0(23); // P0.23
+    _DEFPIN_ARM_IDENTITY_P0(24); // P0.24
+    _DEFPIN_ARM_IDENTITY_P0(25); // P0.25
+    _DEFPIN_ARM_IDENTITY_P0(26); // P0.26
+    _DEFPIN_ARM_IDENTITY_P0(27); // P0.27
+    _DEFPIN_ARM_IDENTITY_P0(28); // P0.28 (A4)
+    _DEFPIN_ARM_IDENTITY_P0(29); // P0.29 (A5)
+    _DEFPIN_ARM_IDENTITY_P0(30); // P0.30 (A6)
+    _DEFPIN_ARM_IDENTITY_P0(31); // P0.31 (A7)
+#endif // defined(ARDUINO_ELECTRONUT_hackaBLE_v2)
+
+// RedBear Blend 2
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/RedBear_Blend2/variant.cpp
+#if defined(ARDUINO_RB_BLEND_2)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "RedBear Blend 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    _DEFPIN_ARM( 0, 0, 11); // D0  is P0.11
+    _DEFPIN_ARM( 1, 0, 12); // D1  is P0.12
+    _DEFPIN_ARM( 2, 0, 13); // D2  is P0.13
+    _DEFPIN_ARM( 3, 0, 14); // D3  is P0.14
+    _DEFPIN_ARM( 4, 0, 15); // D4  is P0.15
+    _DEFPIN_ARM( 5, 0, 16); // D5  is P0.16
+    _DEFPIN_ARM( 6, 0, 17); // D6  is P0.17
+    _DEFPIN_ARM( 7, 0, 18); // D7  is P0.18
+    _DEFPIN_ARM( 8, 0, 19); // D8  is P0.19
+    _DEFPIN_ARM( 9, 0, 20); // D9  is P0.20
+    _DEFPIN_ARM(10, 0, 22); // D10 is P0.22 (SPI SS  )
+    _DEFPIN_ARM(11, 0, 23); // D11 is P0.23 (SPI MOSI)
+    _DEFPIN_ARM(12, 0, 24); // D12 is P0.24 (SPI MISO)
+    _DEFPIN_ARM(13, 0, 25); // D13 is P0.25 (SPI SCK / LED)
+    _DEFPIN_ARM(14, 0,  3); // D14 is P0.03 (A0)
+    _DEFPIN_ARM(15, 0,  4); // D15 is P0.04 (A1)
+    _DEFPIN_ARM(16, 0, 28); // D16 is P0.28 (A2)
+    _DEFPIN_ARM(17, 0, 29); // D17 is P0.29 (A3)
+    _DEFPIN_ARM(18, 0, 30); // D18 is P0.30 (A4)
+    _DEFPIN_ARM(19, 0, 31); // D19 is P0.31 (A5)
+    _DEFPIN_ARM(20, 0, 26); // D20 is P0.26 (SDA)
+    _DEFPIN_ARM(21, 0, 27); // D21 is P0.27 (SCL)
+    _DEFPIN_ARM(22, 0,  8); // D22 is P0.08 (RX)
+    _DEFPIN_ARM(23, 0,  6); // D23 is P0.06 (TX)
+    _DEFPIN_ARM(24, 0,  2); // D24 is P0.02 (AREF)
+#endif // defined(ARDUINO_RB_BLEND_2)
+
+// RedBear BLE Nano 2
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/RedBear_BLENano2/variant.cpp
+#if defined(ARDUINO_RB_BLE_NANO_2)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "RedBear BLE Nano 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    _DEFPIN_ARM( 0, 0, 30); // D0  is P0.30 (A0 / RX)
+    _DEFPIN_ARM( 1, 0, 29); // D1  is P0.29 (A1 / TX)
+    _DEFPIN_ARM( 2, 0, 28); // D2  is P0.28 (A2 / SDA)
+    _DEFPIN_ARM( 3, 0,  2); // D3  is P0.02 (A3 / SCL)
+    _DEFPIN_ARM( 4, 0,  5); // D4  is P0.05 (A4)
+    _DEFPIN_ARM( 5, 0,  4); // D5  is P0.04 (A5)
+    _DEFPIN_ARM( 6, 0,  3); // D6  is P0.03 (SPI SS  )
+    _DEFPIN_ARM( 7, 0,  6); // D7  is P0.06 (SPI MOSI)
+    _DEFPIN_ARM( 8, 0,  7); // D8  is P0.07 (SPI MISO)
+    _DEFPIN_ARM( 9, 0,  8); // D9  is P0.08 (SPI SCK )
+    // _DEFPIN_ARM(10, 0, 21); // D10 is P0.21 (RESET)
+    _DEFPIN_ARM(13, 0, 11); // D11 is P0.11 (LED)
+#endif // defined(ARDUINO_RB_BLE_NANO_2)
+
+// Nordic Semiconductor nRF52 DK
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/nRF52DK/variant.cpp
+#if defined(ARDUINO_NRF52_DK)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Nordic Semiconductor nRF52 DK is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    _DEFPIN_ARM( 0, 0, 11); // D0  is P0.11
+    _DEFPIN_ARM( 1, 0, 12); // D1  is P0.12
+    _DEFPIN_ARM( 2, 0, 13); // D2  is P0.13 (BUTTON1)
+    _DEFPIN_ARM( 3, 0, 14); // D3  is P0.14 (BUTTON2)
+    _DEFPIN_ARM( 4, 0, 15); // D4  is P0.15 (BUTTON3)
+    _DEFPIN_ARM( 5, 0, 16); // D5  is P0.16 (BUTTON4)
+    _DEFPIN_ARM( 6, 0, 17); // D6  is P0.17 (LED1)
+    _DEFPIN_ARM( 7, 0, 18); // D7  is P0.18 (LED2)
+    _DEFPIN_ARM( 8, 0, 19); // D8  is P0.19 (LED3)
+    _DEFPIN_ARM( 9, 0, 20); // D9  is P0.20 (LED4)
+    _DEFPIN_ARM(10, 0, 22); // D10 is P0.22 (SPI SS  )
+    _DEFPIN_ARM(11, 0, 23); // D11 is P0.23 (SPI MOSI)
+    _DEFPIN_ARM(12, 0, 24); // D12 is P0.24 (SPI MISO)
+    _DEFPIN_ARM(13, 0, 25); // D13 is P0.25 (SPI SCK / LED)
+    _DEFPIN_ARM(14, 0,  3); // D14 is P0.03 (A0)
+    _DEFPIN_ARM(15, 0,  4); // D15 is P0.04 (A1)
+    _DEFPIN_ARM(16, 0, 28); // D16 is P0.28 (A2)
+    _DEFPIN_ARM(17, 0, 29); // D17 is P0.29 (A3)
+    _DEFPIN_ARM(18, 0, 30); // D18 is P0.30 (A4)
+    _DEFPIN_ARM(19, 0, 31); // D19 is P0.31 (A5)
+    _DEFPIN_ARM(20, 0,  5); // D20 is P0.05 (A6)
+    _DEFPIN_ARM(21, 0,  2); // D21 is P0.02 (A7 / AREF)
+    _DEFPIN_ARM(22, 0, 26); // D22 is P0.26 (SDA)
+    _DEFPIN_ARM(23, 0, 27); // D23 is P0.27 (SCL)
+    _DEFPIN_ARM(24, 0,  8); // D24 is P0.08 (RX)
+    _DEFPIN_ARM(25, 0,  6); // D25 is P0.06 (TX)
+#endif // defined(ARDUINO_NRF52_DK)
+
+// Taida Century nRF52 mini board
+// https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/Taida_Century_nRF52_minidev/variant.cpp
+#if defined(ARDUINO_STCT_NRF52_minidev)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Taida Century nRF52 mini board is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    //_DEFPIN_ARM( 0, 0, 25); // D0  is P0.xx (near radio!)
+    //_DEFPIN_ARM( 1, 0, 26); // D1  is P0.xx (near radio!)
+    //_DEFPIN_ARM( 2, 0, 27); // D2  is P0.xx (near radio!)
+    //_DEFPIN_ARM( 3, 0, 28); // D3  is P0.xx (near radio!)
+    //_DEFPIN_ARM( 4, 0, 29); // D4  is P0.xx (Not connected, near radio!)
+    //_DEFPIN_ARM( 5, 0, 30); // D5  is P0.xx (LED1, near radio!)
+    //_DEFPIN_ARM( 6, 0, 31); // D6  is P0.xx (LED2, near radio!)
+    _DEFPIN_ARM( 7, 0,  2); // D7  is P0.xx (SDA)
+    _DEFPIN_ARM( 8, 0,  3); // D8  is P0.xx (SCL)
+    _DEFPIN_ARM( 9, 0,  4); // D9  is P0.xx (BUTTON1 / NFC1)
+    _DEFPIN_ARM(10, 0,  5); // D10 is P0.xx
+    //_DEFPIN_ARM(11, 0,  0); // D11 is P0.xx (Not connected)
+    //_DEFPIN_ARM(12, 0,  1); // D12 is P0.xx (Not connected)
+    _DEFPIN_ARM(13, 0,  6); // D13 is P0.xx
+    _DEFPIN_ARM(14, 0,  7); // D14 is P0.xx
+    _DEFPIN_ARM(15, 0,  8); // D15 is P0.xx
+    //_DEFPIN_ARM(16, 0,  9); // D16 is P0.xx (Not connected)
+    //_DEFPIN_ARM(17, 0, 10); // D17 is P0.xx (NFC2, Not connected)
+    _DEFPIN_ARM(18, 0, 11); // D18 is P0.xx (RXD)
+    _DEFPIN_ARM(19, 0, 12); // D19 is P0.xx (TXD)
+    _DEFPIN_ARM(20, 0, 13); // D20 is P0.xx (SPI SS  )
+    _DEFPIN_ARM(21, 0, 14); // D21 is P0.xx (SPI MISO)
+    _DEFPIN_ARM(22, 0, 15); // D22 is P0.xx (SPI MOSI)
+    _DEFPIN_ARM(23, 0, 16); // D23 is P0.xx (SPI SCK )
+    _DEFPIN_ARM(24, 0, 17); // D24 is P0.xx (A0)
+    _DEFPIN_ARM(25, 0, 18); // D25 is P0.xx (A1)
+    _DEFPIN_ARM(26, 0, 19); // D26 is P0.xx (A2)
+    _DEFPIN_ARM(27, 0, 20); // D27 is P0.xx (A3)
+    //_DEFPIN_ARM(28, 0, 22); // D28 is P0.xx (A4, near radio!)
+    //_DEFPIN_ARM(29, 0, 23); // D29 is P0.xx (A5, near radio!)
+    _DEFPIN_ARM(30, 0, 24); // D30 is P0.xx
+    // _DEFPIN_ARM(31, 0, 21); // D31 is P0.21 (RESET)
+#endif // defined(ARDUINO_STCT_NRF52_minidev)
+
+// Generic nRF52832
+// See https://github.com/sandeepmistry/arduino-nRF5/blob/master/boards.txt
+#if defined(ARDUINO_GENERIC) && (\
+    defined(NRF52832_XXAA) || defined(NRF52832_XXAB)\
+    )
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Using `generic` NRF52832 board is an untested configuration -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+
+    _DEFPIN_ARM_IDENTITY_P0( 0); // P0.00 (    UART RX
+    _DEFPIN_ARM_IDENTITY_P0( 1); // P0.01 (A0, UART TX)
+    _DEFPIN_ARM_IDENTITY_P0( 2); // P0.02 (A1)
+    _DEFPIN_ARM_IDENTITY_P0( 3); // P0.03 (A2)
+    _DEFPIN_ARM_IDENTITY_P0( 4); // P0.04 (A3)
+    _DEFPIN_ARM_IDENTITY_P0( 5); // P0.05 (A4)
+    _DEFPIN_ARM_IDENTITY_P0( 6); // P0.06 (A5)
+    _DEFPIN_ARM_IDENTITY_P0( 7); // P0.07
+    _DEFPIN_ARM_IDENTITY_P0( 8); // P0.08
+    _DEFPIN_ARM_IDENTITY_P0( 9); // P0.09
+    _DEFPIN_ARM_IDENTITY_P0(10); // P0.10
+    _DEFPIN_ARM_IDENTITY_P0(11); // P0.11
+    _DEFPIN_ARM_IDENTITY_P0(12); // P0.12
+    _DEFPIN_ARM_IDENTITY_P0(13); // P0.13 (LED)
+    _DEFPIN_ARM_IDENTITY_P0(14); // P0.14
+    _DEFPIN_ARM_IDENTITY_P0(15); // P0.15
+    _DEFPIN_ARM_IDENTITY_P0(16); // P0.16
+    _DEFPIN_ARM_IDENTITY_P0(17); // P0.17
+    _DEFPIN_ARM_IDENTITY_P0(18); // P0.18
+    _DEFPIN_ARM_IDENTITY_P0(19); // P0.19
+    _DEFPIN_ARM_IDENTITY_P0(20); // P0.20 (I2C SDA)
+    _DEFPIN_ARM_IDENTITY_P0(21); // P0.21 (I2C SCL)
+    _DEFPIN_ARM_IDENTITY_P0(22); // P0.22 (SPI MISO)
+    _DEFPIN_ARM_IDENTITY_P0(23); // P0.23 (SPI MOSI)
+    _DEFPIN_ARM_IDENTITY_P0(24); // P0.24 (SPI SCK )
+    _DEFPIN_ARM_IDENTITY_P0(25); // P0.25 (SPI SS  )
+    _DEFPIN_ARM_IDENTITY_P0(26); // P0.26
+    _DEFPIN_ARM_IDENTITY_P0(27); // P0.27
+    _DEFPIN_ARM_IDENTITY_P0(28); // P0.28
+    _DEFPIN_ARM_IDENTITY_P0(29); // P0.29
+    _DEFPIN_ARM_IDENTITY_P0(30); // P0.30
+    _DEFPIN_ARM_IDENTITY_P0(31); // P0.31
+#endif // defined(ARDUINO_GENERIC)
+
+
+#endif // __FASTPIN_ARM_NRF52_VARIANTS_H
\ No newline at end of file
diff --git a/platforms/arm/nrf52/fastspi_arm_nrf52.h b/platforms/arm/nrf52/fastspi_arm_nrf52.h
new file mode 100644
index 0000000000..8492282bae
--- /dev/null
+++ b/platforms/arm/nrf52/fastspi_arm_nrf52.h
@@ -0,0 +1,341 @@
+#ifndef __FASTSPI_ARM_NRF52_H
+#define __FASTSPI_ARM_NRF52_H
+
+
+#ifndef FASTLED_FORCE_SOFTWARE_SPI
+
+    #include <nrf_spim.h>
+
+    #define FASTLED_ALL_PINS_HARDWARE_SPI
+
+
+    // NRF52810 has SPIM0: Frequencies from 125kbps to 8Mbps
+    // NRF52832 adds SPIM1, SPIM2 (same frequencies)
+    // NRF52840 adds SPIM3 (same frequencies), adds SPIM3 that can be @ up to 32Mbps frequency(!)
+    #if !defined(FASTLED_NRF52_SPIM)
+        #define FASTLED_NRF52_SPIM   NRF_SPIM0
+    #endif
+
+    /* This class is slightly simpler than fastpin, as it can rely on fastpin
+     * to handle the mapping to the underlying PN.XX board-level pins...
+     */
+
+    /// SPI_CLOCK_DIVIDER is number of CPU clock cycles per SPI transmission bit?
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    class NRF52SPIOutput {
+
+    private:
+        // static variables -- always using same SPIM instance
+        static bool s_InUse;
+        static bool s_NeedToWait; // a data transfer was started, and completion event was not cleared.
+
+        /*
+        // TODO -- Workaround nRF52840 errata #198, which relates to
+        //         contention between SPIM3 and CPU over AHB.
+        //         The workaround is to ensure the SPIM TX buffer
+        //         is on a different / dedicated RAM block.
+        //         This also avoids AHB contention generally, so
+        //         should be applied to all supported boards.
+        //         
+        //         But... how to allocate m_Buffer[] to be at a
+        //         specific memory range?  Also, might need to
+        //         avoid use of single-transaction writeBytes()
+        //         as cannot control where that memory lies....
+        */
+        static uint8_t  s_BufferIndex;
+        static uint8_t  s_Buffer[2][2]; // 2x two-byte buffers, allows one buffer currently being sent, and a second one being prepped to send.
+
+        // This allows saving the configuration of the SPIM instance
+        // upon select(), and restoring the configuration upon release().
+        struct spim_config {
+            uint32_t inten;
+            uint32_t shorts;
+            uint32_t sck_pin;
+            uint32_t mosi_pin;
+            uint32_t miso_pin;
+            uint32_t frequency;
+            // data pointers, RX/TX counts not saved as would only hide bugs
+            uint32_t config; // mode & bit order
+            uint32_t orc;
+
+#if false // additional configuration to save/restore for SPIM3
+            uint32_t csn_pin;
+            uint32_t csn_polarity; // CSNPOL
+            uint32_t csn_duration; // IFTIMING.CSNDUR
+            uint32_t rx_delay;     // IFTIMING.RXDELAY
+            uint32_t dcx_pin;      // PSELDCX
+            uint32_t dcx_config;   // DCXCNT
+#endif
+
+        } m_SpiSavedConfig;
+        void saveSpimConfig() {
+            m_SpiSavedConfig.inten          = FASTLED_NRF52_SPIM->INTENSET;
+            m_SpiSavedConfig.shorts         = FASTLED_NRF52_SPIM->SHORTS;
+            m_SpiSavedConfig.sck_pin        = FASTLED_NRF52_SPIM->PSEL.SCK;
+            m_SpiSavedConfig.mosi_pin       = FASTLED_NRF52_SPIM->PSEL.MOSI;
+            m_SpiSavedConfig.miso_pin       = FASTLED_NRF52_SPIM->PSEL.MISO;
+            m_SpiSavedConfig.frequency      = FASTLED_NRF52_SPIM->FREQUENCY;
+            m_SpiSavedConfig.config         = FASTLED_NRF52_SPIM->CONFIG;
+            m_SpiSavedConfig.orc            = FASTLED_NRF52_SPIM->ORC;
+
+#if false // additional configuration to save/restore for SPIM3
+            m_SpiSavedConfig.csn_pin        = FASTLED_NRF52_SPIM->PSEL.CSN;
+            m_SpiSavedConfig.csn_polarity   = FASTLED_NRF52_SPIM->CSNPOL;
+            m_SpiSavedConfig.csn_duration   = FASTLED_NRF52_SPIM->IFTIMING.CSNDUR;
+            m_SpiSavedConfig.dcx_pin        = FASTLED_NRF52_SPIM->PSELDCX;
+            m_SpiSavedConfig.dcx_config     = FASTLED_NRF52_SPIM->DCXCNT;
+#endif
+        }
+        void restoreSpimConfig() {
+            // 0. ASSERT() the SPIM instance is not enabled
+
+            FASTLED_NRF52_SPIM->INTENCLR        = 0xFFFFFFFF;
+            FASTLED_NRF52_SPIM->INTENSET        = m_SpiSavedConfig.inten;
+            FASTLED_NRF52_SPIM->SHORTS          = m_SpiSavedConfig.shorts;
+            FASTLED_NRF52_SPIM->PSEL.SCK        = m_SpiSavedConfig.sck_pin;
+            FASTLED_NRF52_SPIM->PSEL.MOSI       = m_SpiSavedConfig.mosi_pin;
+            FASTLED_NRF52_SPIM->PSEL.MISO       = m_SpiSavedConfig.miso_pin;
+            FASTLED_NRF52_SPIM->FREQUENCY       = m_SpiSavedConfig.frequency;
+            FASTLED_NRF52_SPIM->CONFIG          = m_SpiSavedConfig.config;
+            FASTLED_NRF52_SPIM->ORC             = m_SpiSavedConfig.orc;
+
+#if false // additional configuration to save/restore for SPIM3
+            FASTLED_NRF52_SPIM->PSEL.CSN        = m_SpiSavedConfig.csn_pin;
+            FASTLED_NRF52_SPIM->CSNPOL          = m_SpiSavedConfig.csn_polarity;
+            FASTLED_NRF52_SPIM->IFTIMING.CSNDUR = m_SpiSavedConfig.csn_duration;
+            FASTLED_NRF52_SPIM->PSELDCX         = m_SpiSavedConfig.dcx_pin;
+            FASTLED_NRF52_SPIM->DCXCNT          = m_SpiSavedConfig.dcx_config;
+#endif
+        }
+
+    public:
+        NRF52SPIOutput() {}
+
+        // Low frequency GPIO is for signals with a frequency up to 10 kHz.  Lowest speed SPIM is 125kbps.
+        static_assert(!FastPin<_DATA_PIN>::LowSpeedOnlyRecommended(),  "Invalid (low-speed only) pin specified");
+        static_assert(!FastPin<_CLOCK_PIN>::LowSpeedOnlyRecommended(), "Invalid (low-speed only) pin specified");
+
+        /// initialize the SPI subssytem
+        void init() {
+            // 0. ASSERT() the SPIM instance is not enabled / in use
+            //ASSERT(m_SPIM->ENABLE != (SPIM_ENABLE_ENABLE_Enabled << SPIM_ENABLE_ENABLE_Pos));
+
+            // 1. set pins to output/H0H1 drive/etc.
+            FastPin<_DATA_PIN>::setOutput();
+            FastPin<_CLOCK_PIN>::setOutput();
+
+            // 2. Configure SPIMx
+            nrf_spim_configure(
+                FASTLED_NRF52_SPIM,
+                NRF_SPIM_MODE_0,
+                NRF_SPIM_BIT_ORDER_MSB_FIRST
+                );
+            nrf_spim_frequency_set(
+                FASTLED_NRF52_SPIM,
+                NRF_SPIM_FREQ_4M // BUGBUG -- use _SPI_CLOCK_DIVIDER to determine frequency
+                );
+            nrf_spim_pins_set(
+                FASTLED_NRF52_SPIM,
+                FastPin<_CLOCK_PIN>::nrf_pin(),
+                FastPin<_DATA_PIN>::nrf_pin(),
+                NRF_SPIM_PIN_NOT_CONNECTED
+                );
+
+            // 4. Ensure events are cleared
+            nrf_spim_event_clear(FASTLED_NRF52_SPIM, NRF_SPIM_EVENT_END);
+            nrf_spim_event_clear(FASTLED_NRF52_SPIM, NRF_SPIM_EVENT_STARTED);
+
+            // 5. Enable the SPIM instance
+            nrf_spim_enable(FASTLED_NRF52_SPIM);
+        }
+
+        /// latch the CS select
+        void select() {
+            //ASSERT(!s_InUse);
+            saveSpimConfig();
+            s_InUse = true;
+            init();
+        }
+
+        /// release the CS select
+        void release() {
+            //ASSERT(s_InUse);
+            waitFully();
+            s_InUse = false;
+            restoreSpimConfig();
+        }
+
+        /// wait until all queued up data has been written
+        static void waitFully() {
+            if (!s_NeedToWait) return;
+            // else, need to wait for END event
+            while(!FASTLED_NRF52_SPIM->EVENTS_END) {};
+            s_NeedToWait = 0;
+            // only use two events in this code...
+            nrf_spim_event_clear(FASTLED_NRF52_SPIM, NRF_SPIM_EVENT_END);
+            nrf_spim_event_clear(FASTLED_NRF52_SPIM, NRF_SPIM_EVENT_STARTED);
+            return;
+        }
+        // wait only until we can add a new transaction into the registers
+        // (caller must still waitFully() before actually starting this next transaction)
+        static void wait() {
+            if (!s_NeedToWait) return;
+            while (!FASTLED_NRF52_SPIM->EVENTS_STARTED) {};
+            // leave the event set here... caller must waitFully() and start next transaction
+            return;
+        }
+
+        /// write a byte out via SPI (returns immediately on writing register)
+        static void writeByte(uint8_t b) {
+            wait();
+            // cannot use pointer to stack, so copy to m_buffer[]
+            uint8_t i = (s_BufferIndex ? 1u : 0u);
+            s_BufferIndex = !s_BufferIndex; // 1 <==> 0 swap
+
+            s_Buffer[i][0u] = b; // cannot use the stack location, so copy to a more permanent buffer...
+            nrf_spim_tx_buffer_set(
+                FASTLED_NRF52_SPIM,
+                &(s_Buffer[i][0u]),
+                1
+                );
+
+            waitFully();
+            nrf_spim_task_trigger(
+                FASTLED_NRF52_SPIM,
+                NRF_SPIM_TASK_START
+                );
+            return;
+        }
+
+        /// write a word out via SPI (returns immediately on writing register)
+        static void writeWord(uint16_t w) {
+            wait();
+            // cannot use pointer to stack, so copy to m_buffer[]
+            uint8_t i = (s_BufferIndex ? 1u : 0u);
+            s_BufferIndex = !s_BufferIndex; // 1 <==> 0 swap
+
+            s_Buffer[i][0u] = (w >> 8u); // cannot use the stack location, so copy to a more permanent buffer...
+            s_Buffer[i][1u] = (w & 0xFFu); // cannot use the stack location, so copy to a more permanent buffer...
+            nrf_spim_tx_buffer_set(
+                FASTLED_NRF52_SPIM,
+                &(s_Buffer[i][0u]),
+                2
+                );
+
+            waitFully();
+            nrf_spim_task_trigger(
+                FASTLED_NRF52_SPIM,
+                NRF_SPIM_TASK_START
+                );
+            return;
+        }
+
+        /// A raw set of writing byte values, assumes setup/init/waiting done elsewhere (static for use by adjustment classes)
+        static void writeBytesValueRaw(uint8_t value, int len) {
+            while (len--) { writeByte(value); }
+        }
+
+        /// A full cycle of writing a value for len bytes, including select, release, and waiting
+        void writeBytesValue(uint8_t value, int len) {
+            select();
+            writeBytesValueRaw(value, len);
+            waitFully();
+            release();
+        }
+
+        /// A full cycle of writing a raw block of data out, including select, release, and waiting
+        void writeBytes(uint8_t *data, int len) {
+            // This is a special-case, with no adjustment of the bytes... write them directly...
+            select();
+            wait();
+            nrf_spim_tx_buffer_set(
+                FASTLED_NRF52_SPIM,
+                data,
+                len
+                );
+            waitFully();
+            nrf_spim_task_trigger(
+                FASTLED_NRF52_SPIM,
+                NRF_SPIM_TASK_START
+                );
+            waitFully();
+            release();
+        }
+
+        /// A full cycle of writing a raw block of data out, including select, release, and waiting
+        template<class D> void writeBytes(uint8_t *data, int len) {
+            uint8_t * end = data + len;
+            select();
+            wait();
+            while(data != end) {
+                writeByte(D::adjust(*data++));
+            }
+            D::postBlock(len);
+            waitFully();
+            release();
+        }
+        /// specialization for DATA_NOP ...
+        //template<DATA_NOP> void writeBytes(uint8_t * data, int len) {
+        //    writeBytes(data, len);
+        //}
+
+        /// write a single bit out, which bit from the passed in byte is determined by template parameter
+        template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+            // SPIM instance must be finished transmitting and then disabled
+            waitFully();
+            nrf_spim_disable(FASTLED_NRF52_SPIM);
+            // set the data pin to appropriate state
+            if (b & (1 << BIT)) {
+                FastPin<_DATA_PIN>::hi();
+            } else {
+                FastPin<_DATA_PIN>::lo();
+            }
+            // delay 1/2 cycle per SPI bit
+            delaycycles<_SPI_CLOCK_DIVIDER/2>();
+            FastPin<_CLOCK_PIN>::toggle();
+            delaycycles<_SPI_CLOCK_DIVIDER/2>();
+            FastPin<_CLOCK_PIN>::toggle();
+            // re-enable the SPIM instance
+            nrf_spim_enable(FASTLED_NRF52_SPIM);
+        }
+
+        /// write out pixel data from the given PixelController object, including select, release, and waiting
+        template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+            select();
+            int len = pixels.mLen;
+            // TODO: If user indicates a pre-allocated double-buffer,
+            //       then process all the pixels at once into that buffer,
+            //       then use the non-templated WriteBytes(data, len) function
+            //       to write the entire buffer as a single SPI transaction.
+            while (pixels.has(1)) {
+                if (FLAGS & FLAG_START_BIT) {
+                    writeBit<0>(1);
+                }
+                writeByte(D::adjust(pixels.loadAndScale0()));
+                writeByte(D::adjust(pixels.loadAndScale1()));
+                writeByte(D::adjust(pixels.loadAndScale2()));
+                pixels.advanceData();
+                pixels.stepDithering();
+            }
+            D::postBlock(len);
+            waitFully();
+            release();
+        }
+    };
+
+    // Static member definition and initialization using templates.
+    // see https://stackoverflow.com/questions/3229883/static-member-initialization-in-a-class-template#answer-3229919
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    bool NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_InUse = false;
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    bool NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_NeedToWait = false;
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    uint8_t NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_BufferIndex = 0;
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    uint8_t NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_Buffer[2][2] = {{0,0},{0,0}};
+
+#endif // #ifndef FASTLED_FORCE_SOFTWARE_SPI
+
+
+
+#endif // #ifndef __FASTPIN_ARM_NRF52_H
diff --git a/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h b/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
new file mode 100644
index 0000000000..440aed9e4c
--- /dev/null
+++ b/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
@@ -0,0 +1,58 @@
+#ifndef __LED_SYSDEFS_ARM_NRF52
+#define __LED_SYSDEFS_ARM_NRF52
+
+#define FASTLED_ARM
+
+#ifndef F_CPU
+    #define F_CPU 64000000 // the NRF52 series has a 64MHz CPU
+#endif
+
+// even though CPU is at 64MHz, use the 8MHz-defined timings because...
+// PWM module   runs at 16MHz
+// SPI0..2      runs at  8MHz
+#define CLOCKLESS_FREQUENCY 16000000 // the NRF52 has EasyDMA for PWM module at 16MHz
+
+#ifndef F_TIMER
+    #define F_TIMER 16000000 // the NRF52 timer is 16MHz, even though CPU is 64MHz
+#endif
+
+#if !defined(FASTLED_USE_PROGMEM)
+    #define FASTLED_USE_PROGMEM 0 // nRF52 series have flat memory model
+#endif
+
+#if !defined(FASTLED_ALLOW_INTERRUPTS)
+    #define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+// Use PWM instance 0
+// See clockless_arm_nrf52.h and (in root of library) platforms.cpp
+#define FASTLED_NRF52_ENABLE_PWM_INSTANCE0
+
+#if defined(FASTLED_NRF52_NEVER_INLINE)
+    #define FASTLED_NRF52_INLINE_ATTRIBUTE __attribute__((always_inline)) inline
+#else     
+    #define FASTLED_NRF52_INLINE_ATTRIBUTE __attribute__((always_inline)) inline
+#endif    
+
+
+
+#include <nrf.h>
+#include <nrf_spim.h>   // for FastSPI
+#include <nrf_pwm.h>    // for Clockless
+#include <nrf_nvic.h>   // for Clockless / anything else using interrupts
+typedef __I  uint32_t RoReg;
+typedef __IO uint32_t RwReg;
+
+#define cli()  __disable_irq()
+#define sei()  __enable_irq()
+
+#define FASTLED_NRF52_DEBUGPRINT(format, ...)
+//#define FASTLED_NRF52_DEBUGPRINT(format, ...)\
+//    do {\
+//        FastLED_NRF52_DebugPrint(format, ##__VA_ARGS__);\
+//    } while(0);
+
+
+
+
+#endif // __LED_SYSDEFS_ARM_NRF52

From 5de3cf77e0b71d27a3dd59d0425da848dcfd3638 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 7 Jun 2019 00:04:18 -0400
Subject: [PATCH 058/204] Updated ESP32 support (#789)

* Support for ESP32

Credit to Rina Shkrabova for the first cut.

* Clean up interrupt handling

I think there was actually an error in the interrupt enabling/disabling, but I also cleaned it up so that it is more clear how interrupts are handled.

* Better interrupt handling

* Added RMT version

Not fully portable yet, though. The timing numbers are hard-wired for WS2812, and the RMT channel is also hard-wired.

* Fixed the timing

Timing is now computed from T1, T2, amd T3 instead of being hard-wired.

* Better buffer management

The RMT signal is sent in 10-pixel chunks, using double-buffering to hide the latency when possible. Also: assign RMT channels sequentially.

* Total rewrite using Martin's code

* Better comments

* Fixed the timing calculation

We were not doing the conversion from ESP32 cycles to RMT cycles correctly. Now it all works!

* Added Martin's changes

* Removed confusing comments

* Added my name!

* Fixed ESP32 compile problem

On ESP platforms the dev kit provides the function __cxa_pure_virtual, so there is no need to define it.

* honor WAIT_TIME

for chipsets that need it (for example TM1829)

* Better interrupt handling

Suggested by @h3ndrik : allocated the interrupt once at the initialization and then just turn it on and off. This is the strategy that the ESP32 core uses also.

* Major refactoring

Two major changes to the RMT driver. First, I realized that we can have only one interrupt handler attached to the RMT peripheral, so it needs to be able to handle all of the attached strips. To accomplish this, I store each ClocklessController in an array indexed by its RMT channel. The interrupt handler can then take the channel that triggered it and index into the array to get the right controller.

The second major change is that I replaced all of the explicit bit twiddling of the RMT configurartion with calls to the proper functions in ESP32 core. That should make the code more stable if the core changes.

* Fixed the interrupt dispatch

Since the interrupt handler is global for all channels, we need to store not just the controller, but also the buffer refill function for each strip.

* Added a demo

This version of DemoReel100 spins off a separate task on core 0 that just performs the FastLED.show() operations. Regular code running on core 1 (the default for Arduino) signals this task to request a show().

* Avoid unnecessary timeouts

Replaced a 500ms delay in the show task with MAX_DELAY. There's really no point in timing out (and crashing the program) just because the application hasn't called show.

* Parallel output

Reworked the code again in order to support parallel output, which is now the default mode. You can also now ask it to use the built-in RMT driver if you have other parts of your code that need the RMT peripheral.

Two #defines control choices -- put either or both of these before including FastLED.h:

#define FASTLED_RMT_CORE_DRIVER

Uses the ESP core RMT driver. To do this, though, it allocates a big buffer to hold all of the pixel bits, so there is a memory and compute cost.

#define FASTLED_RMT_SERIAL_OUTPUT

Force serial output of each strip.

* Documentation

Describing the implementation and the compile-time switches

* Removing files that should not be there

* Fixed synchronization

The previous checkin had bugs in the syncronization that caused problems in parallel mode when strips are different lengths.

* Fixed a stupid bug

Made the code bullet-proof in a few ways, but most importantly fixed a terrible integer underflow bug in the code that fills the RMT buffer.

* Another major overhaul

The big change in this version is the ability to support more than 8 controllers. Instead of assigning RMT channels to controllers in a fixed mapping, channels are assigned on the fly, allowing the driver to reuse channels as they become available.

* Oops

Didn't mean to check these in.

* Fixed built-in driver mode

Fixed the code so that it works with the built-in RMT driver. There's nothing special to do to enable it -- just #define FASTLED_RMT_BUILTIN_DRIVER true

* Cleanup

Fixing some documentation and configuration stuff

* Rewrite of fastpin

I've been needing to rewrite fastpin_esp32.h for the ESP32 ports and masks. This file also makes sure we don't use pins that won't work, even with clockless chips like the WS2812.

* Got rid of tabs

Which were making the code ugly.

* Minor tweaks

Added proper definitions for port() and toggle() to use the GPIO.out register. Changed the pin number test to avoid unnecessary conditions.

* Allow TX and RX pins

* Fixed pin access methods

This should be the right set of definitions -- consistent with the other platforms.

* Experimental

Do not merge this code

* Change pixel buffering

The previous version of this code saved a copy of the PixelController every time show() is called. It appears that this causes massive memory fragmentation, eventually locking up the processor. This new version saves the pixel data is a separate buffer that is allocated only one time.

* Some rearranging of the code

Nothing major here. Added comments and put the functions is a better order. Added some defensive programming.

* New I2S driver for ESP32

* Two updates: (1) avoid copying all the pixel data up front, and (2) use T1, T2, and T3 to encode thepulse patterns

* Trying to get the timing better.

* This version seems pretty solid

* Yves' very cool changes to improve performance and accuracy

* First attempt at merging the two drivers

* Complete I2S implementation, with switch to choose it over the RMT

* Removed the old header

* This was added by accident

* Changed the RMT driver so that it no longer needs to copy all the pixel data up front, which was slowing it down and using a lot of extra memory

* Fixed a typo: make sure to load a different channel each time

* Commented out all the Serial.print output
---
 platforms/esp/32/clockless_i2s_esp32.h        | 767 ++++++++++++++++++
 ...lockless_esp32.h => clockless_rmt_esp32.h} |  98 +--
 platforms/esp/32/fastled_esp32.h              |   8 +-
 3 files changed, 823 insertions(+), 50 deletions(-)
 create mode 100644 platforms/esp/32/clockless_i2s_esp32.h
 rename platforms/esp/32/{clockless_esp32.h => clockless_rmt_esp32.h} (90%)

diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
new file mode 100644
index 0000000000..a4d15ba750
--- /dev/null
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -0,0 +1,767 @@
+/*
+ * I2S Driver
+ *
+ * Copyright (c) 2019 Yves Bazin
+ * Copyright (c) 2019 Samuel Z. Guyer
+ * Derived from lots of code examples from other people.
+ *
+ * The I2S implementation can drive up to 24 strips in parallel, but
+ * with the following limitation: all the strips must have the same
+ * timing (i.e., they must all use the same chip).
+ *
+ * To enable the I2S driver, add the following line *before* including
+ * FastLED.h (no other changes are necessary):
+ *
+ * #define FASTLED_ESP32_I2S true
+ *
+ * The overall strategy is to use the parallel mode of the I2S "audio"
+ * peripheral to send up to 24 bits in parallel to 24 different pins.
+ * Unlike the RMT peripheral the I2S system cannot send bits of
+ * different lengths. Instead, we set the I2S data clock fairly high
+ * and then encode a signal as a series of bits. 
+ *
+ * For example, with a clock divider of 10 the data clock will be
+ * 8MHz, so each bit is 125ns. The WS2812 expects a "1" bit to be
+ * encoded as a HIGH signal for around 875ns, followed by LOW for
+ * 375ns. Sending the following pattern results in the right shape
+ * signal:
+ *
+ *    1111111000        WS2812 "1" bit encoded as 10 125ns pulses
+ *
+ * The I2S peripheral expects the bits for all 24 outputs to be packed
+ * into a single 32-bit word. The complete signal is a series of these
+ * 32-bit values -- one for each bit for each strip. The pixel data,
+ * however, is stored "serially" as a series of RGB values separately
+ * for each strip. To prepare the data we need to do three things: (1)
+ * take 1 pixel from each strip, and (2) tranpose the bits so that
+ * they are in the parallel form, (3) translate each data bit into the
+ * bit pattern that encodes the signal for that bit. This code is in
+ * the fillBuffer() method:
+ *
+ *   1. Read 1 pixel from each strip into an array; store this data by
+ *      color channel (e.g., all the red bytes, then all the green
+ *      bytes, then all the blue bytes). For three color channels, the
+ *      array is 3 X 24 X 8 bits.
+ *
+ *   2. Tranpose the array so that it is 3 X 8 X 24 bits. The hardware
+ *      wants the data in 32-bit chunks, so the actual form is 3 X 8 X
+ *      32, with the low 8 bits unused.
+ *
+ *   3. Take each group of 24 parallel bits and "expand" them into a
+ *      pattern according to the encoding. For example, with a 8MHz
+ *      data clock, each data bit turns into 10 I2s pulses, so 24
+ *      parallel data bits turn into 10 X 24 pulses.
+ *
+ * We send data to the I2S peripheral using the DMA interface. We use
+ * two DMA buffers, so that we can fill one buffer while the other
+ * buffer is being sent. Each DMA buffer holds the fully-expanded
+ * pulse pattern for one pixel on up to 24 strips. The exact amount of
+ * memory required depends on the number of color channels and the
+ * number of pulses used to encode each bit.
+ *
+ * We get an interrupt each time a buffer is sent; we then fill that
+ * buffer while the next one is being sent. The DMA interface allows
+ * us to configure the buffers as a circularly linked list, so that it
+ * can automatically start on the next buffer.
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+#pragma message "NOTE: ESP32 support using I2S parallel driver. All strips must use the same chipset"
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    
+#include "esp_heap_caps.h"
+#include "soc/soc.h"
+#include "soc/gpio_sig_map.h"
+#include "soc/i2s_reg.h"
+#include "soc/i2s_struct.h"
+#include "soc/io_mux_reg.h"
+#include "driver/gpio.h"
+#include "driver/periph_ctrl.h"
+#include "rom/lldesc.h"
+#include "esp_intr.h"
+#include "esp_log.h"
+    
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+    uint32_t cyc;
+    __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+    return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+#define NUM_COLOR_CHANNELS 3
+
+// -- Choose which I2S device to use
+#ifndef I2S_DEVICE
+#define I2S_DEVICE 0
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_I2S_MAX_CONTROLLERS
+#define FASTLED_I2S_MAX_CONTROLLERS 24
+#endif
+
+// -- I2S clock
+#define I2S_BASE_CLK (80000000L)
+#define I2S_MAX_CLK (20000000L) //more tha a certain speed and the I2s looses some bits
+#define I2S_MAX_PULSE_PER_BIT 20 //put it higher to get more accuracy but it could decrease the refresh rate without real improvement
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_I2S_MAX_CONTROLLERS];
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+// -- One-time I2S initialization
+static bool gInitialized = false;
+
+// -- Interrupt handler
+static intr_handle_t gI2S_intr_handle = NULL;
+
+// -- A pointer to the memory-mapped structure: I2S0 or I2S1
+static i2s_dev_t * i2s;
+
+// -- I2S goes to these pins until we remap them using the GPIO matrix
+static int i2s_base_pin_index;
+
+// --- I2S DMA buffers
+struct DMABuffer {
+    lldesc_t descriptor;
+    uint8_t * buffer;
+};
+
+#define NUM_DMA_BUFFERS 2
+static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
+
+// -- Bit patterns
+//    For now, we require all strips to be the same chipset, so these
+//    are global variables.
+
+static int      gPulsesPerBit = 0;
+static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static uint32_t gZeroBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+// -- Counters to track progress
+static int gCurBuffer = 0;
+static bool gDoneFilling = false;
+static int ones_for_one;
+static int ones_for_zero;
+
+// -- Temp buffers for pixels and bits being formatted for DMA
+static uint8_t gPixelRow[NUM_COLOR_CHANNELS][32];
+static uint8_t gPixelBits[NUM_COLOR_CHANNELS][8][4];
+static int CLOCK_DIVIDER_N;
+static int CLOCK_DIVIDER_A;
+static int CLOCK_DIVIDER_B;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- Store the GPIO pin
+    gpio_num_t     mPin;
+    
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+    
+    // -- Save the pixel controller
+    PixelController<RGB_ORDER> * mPixels;
+    
+public:
+
+    void init()
+    {
+        i2sInit();
+        
+        // -- Allocate space to save the pixel controller
+        //    during parallel output
+        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
+        
+        gControllers[gNumControllers] = this;
+        int my_index = gNumControllers;
+        gNumControllers++;
+        
+        // -- Set up the pin We have to do two things: configure the
+        //    actual GPIO pin, and route the output from the default
+        //    pin (determined by the I2S device) to the pin we
+        //    want. We compute the default pin using the index of this
+        //    controller in the array. This order is crucial because
+        //    the bits must go into the DMA buffer in the same order.
+        mPin = gpio_num_t(DATA_PIN);
+        
+        PIN_FUNC_SELECT(GPIO_PIN_MUX_REG[DATA_PIN], PIN_FUNC_GPIO);
+        gpio_set_direction(mPin, (gpio_mode_t)GPIO_MODE_DEF_OUTPUT);
+        pinMode(mPin,OUTPUT);
+        gpio_matrix_out(mPin, i2s_base_pin_index + my_index, false, false);
+    }
+    
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+    
+protected:
+   
+   static int pgcd(int smallest,int precision,int a,int b,int c)
+    {
+        int pgc_=1;
+        for( int i=smallest;i>0;i--)
+        {
+            
+            if( a%i<=precision && b%i<=precision && c%i<=precision)
+            {
+                pgc_=i;
+                break;
+            }
+        }
+        return pgc_;
+    }
+    
+    /** Compute pules/bit patterns
+     *
+     *  This is Yves Bazin's mad code for computing the pulse pattern
+     *  and clock timing given the target signal given by T1, T2, and
+     *  T3. In general, these parameters are interpreted as follows:
+     *
+     *  a "1" bit is encoded by setting the pin HIGH to T1+T2 ns, then LOW for T3 ns
+     *  a "0" bit is encoded by setting the pin HIGH to T1 ns, then LOW for T2+T3 ns
+     *
+     */
+    static void initBitPatterns()
+    {
+        // Precompute the bit patterns based on the I2S sample rate
+        // Serial.println("Setting up fastled using I2S");
+
+        // -- First, convert back to ns from CPU clocks
+        uint32_t T1ns = ESPCLKS_TO_NS(T1);
+        uint32_t T2ns = ESPCLKS_TO_NS(T2);
+        uint32_t T3ns = ESPCLKS_TO_NS(T3);
+        
+        // Serial.print("T1 = "); Serial.print(T1); Serial.print(" ns "); Serial.println(T1ns);
+        // Serial.print("T2 = "); Serial.print(T2); Serial.print(" ns "); Serial.println(T2ns);
+        // Serial.print("T3 = "); Serial.print(T3); Serial.print(" ns "); Serial.println(T3ns);
+        
+        /*
+         We calculate the best pcgd to the timing
+         ie
+         WS2811 77 77 154 => 1  1 2 => nb pulses= 4
+         WS2812 60 150 90 => 2 5 3 => nb pulses=10
+         */
+        int smallest=0;
+        if (T1>T2)
+            smallest=T2;
+        else
+            smallest=T1;
+        if(smallest>T3)
+            smallest=T3;
+        double freq=(double)1/(double)(T1ns + T2ns + T3ns);
+        // Serial.printf("chipset frequency:%f Khz\n", 1000000L*freq);
+       // Serial.printf("smallest %d\n",smallest);
+        int pgc_=1;
+        int precision=0;
+        pgc_=pgcd(smallest,precision,T1,T2,T3);
+        //Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
+        while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
+        {
+            precision++;
+            pgc_=pgcd(smallest,precision,T1,T2,T3);
+            //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
+        }
+        pgc_=pgcd(smallest,precision,T1,T2,T3);
+        // Serial.printf("pgcd %d precision:%d\n",pgc_,precision);
+        // Serial.printf("nb pulse per bit:%d\n",T1/pgc_ +T2/pgc_ +T3/pgc_);
+        gPulsesPerBit=(int)T1/pgc_ +(int)T2/pgc_ +(int)T3/pgc_;
+        /*
+         we calculate the duration of one pulse nd htre base frequency of the led
+         ie WS2812B F=1/(250+625+375)=800kHz or 1250ns
+         as we need 10 pulses each pulse is 125ns => frequency 800Khz*10=8MHz
+         WS2811 T=320+320+641=1281ns qnd we need 4 pulses => pulse duration 320.25ns =>frequency 3.1225605Mhz
+         
+         */
+
+        freq=1000000000L*freq*gPulsesPerBit;
+        // Serial.printf("needed frequency (nbpiulse per bit)*(chispset frequency):%f Mhz\n",freq/1000000);
+        
+        /*
+         we do calculate the needed N a and b
+         as f=basefred/(N+b/a);
+         as a is max 63 the precision for the decimal is 1/63
+         
+         */
+        
+         CLOCK_DIVIDER_N=(int)((double)I2S_BASE_CLK/freq);
+        double v=I2S_BASE_CLK/freq-CLOCK_DIVIDER_N;
+
+        double prec=(double)1/63;
+        int a=1;
+        int b=0;
+        CLOCK_DIVIDER_A=1;
+        CLOCK_DIVIDER_B=0;
+        for(a=1;a<64;a++)
+        {
+            for(b=0;b<a;b++)
+            {
+                //printf("%d %d %f %f %f\n",b,a,v,(double)v*(double)a,fabsf(v-(double)b/a));
+                if(fabsf(v-(double)b/a) <= prec/2)
+                    break;
+            }
+            if(fabsf(v-(double)b/a) ==0)
+            {
+                CLOCK_DIVIDER_A=a;
+                CLOCK_DIVIDER_B=b;
+                break;
+            }
+            if(fabsf(v-(double)b/a) < prec/2)
+            {
+                if (fabsf(v-(double)b/a) <fabsf(v-(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A))
+                {
+                    CLOCK_DIVIDER_A=a;
+                    CLOCK_DIVIDER_B=b;
+                }
+                
+            }
+        }
+        //top take care of an issue with double 0.9999999999
+        if(CLOCK_DIVIDER_A==CLOCK_DIVIDER_B)
+        {
+            CLOCK_DIVIDER_A=1;
+            CLOCK_DIVIDER_B=0;
+            CLOCK_DIVIDER_N++;
+        }
+        
+        //printf("%d %d %f %f %d\n",CLOCK_DIVIDER_B,CLOCK_DIVIDER_A,(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A,v,CLOCK_DIVIDER_N);
+        //Serial.printf("freq %f %f\n",freq,I2S_BASE_CLK/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A));
+        freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
+        freq=freq*I2S_BASE_CLK;
+        // Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
+        double pulseduration=1000000000/freq;
+        // Serial.printf("Pulse duration: %f ns\n",pulseduration);
+        // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
+        
+        //Serial.print("Pulses per bit: "); Serial.println(gPulsesPerBit);
+        
+        //int ones_for_one  = ((T1ns + T2ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        ones_for_one  = T1/pgc_ +T2/pgc_;
+        //Serial.print("One bit:  target ");
+        //Serial.print(T1ns+T2ns); Serial.print("ns --- ");
+        //Serial.print(ones_for_one); Serial.print(" 1 bits");
+        //Serial.print(" = "); Serial.print(ones_for_one * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        // Serial.printf("one bit : target %d  ns --- %d  pulses 1 bit = %f ns\n",T1ns+T2ns,ones_for_one ,ones_for_one*pulseduration);
+        
+        
+        int i = 0;
+        while ( i < ones_for_one ) {
+            gOneBit[i] = 0xFFFFFF00;
+            i++;
+        }
+        while ( i < gPulsesPerBit ) {
+            gOneBit[i] = 0x00000000;
+            i++;
+        }
+        
+        //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
+        ones_for_zero =T1/pgc_  ;
+       // Serial.print("Zero bit:  target ");
+       // Serial.print(T1ns); Serial.print("ns --- ");
+        //Serial.print(ones_for_zero); Serial.print(" 1 bits");
+        //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
+        // Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
+        i = 0;
+        while ( i < ones_for_zero ) {
+            gZeroBit[i] = 0xFFFFFF00;
+            i++;
+        }
+        while ( i < gPulsesPerBit ) {
+            gZeroBit[i] = 0x00000000;
+            i++;
+        }
+        
+        memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
+        memset(gPixelBits, 0, NUM_COLOR_CHANNELS * 32);
+    }
+    
+    static DMABuffer * allocateDMABuffer(int bytes)
+    {
+        DMABuffer * b = (DMABuffer *)heap_caps_malloc(sizeof(DMABuffer), MALLOC_CAP_DMA);
+        
+        b->buffer = (uint8_t *)heap_caps_malloc(bytes, MALLOC_CAP_DMA);
+        memset(b->buffer, 0, bytes);
+        
+        b->descriptor.length = bytes;
+        b->descriptor.size = bytes;
+        b->descriptor.owner = 1;
+        b->descriptor.sosf = 1;
+        b->descriptor.buf = b->buffer;
+        b->descriptor.offset = 0;
+        b->descriptor.empty = 0;
+        b->descriptor.eof = 1;
+        b->descriptor.qe.stqe_next = 0;
+        
+        return b;
+    }
+    
+    static void i2sInit()
+    {
+        // -- Only need to do this once
+        if (gInitialized) return;
+        
+        // -- Construct the bit patterns for ones and zeros
+        initBitPatterns();
+        
+        // -- Choose whether to use I2S device 0 or device 1
+        //    Set up the various device-specific parameters
+        int interruptSource;
+        if (I2S_DEVICE == 0) {
+            i2s = &I2S0;
+            periph_module_enable(PERIPH_I2S0_MODULE);
+            interruptSource = ETS_I2S0_INTR_SOURCE;
+            i2s_base_pin_index = I2S0O_DATA_OUT0_IDX;
+        } else {
+            i2s = &I2S1;
+            periph_module_enable(PERIPH_I2S1_MODULE);
+            interruptSource = ETS_I2S1_INTR_SOURCE;
+            i2s_base_pin_index = I2S1O_DATA_OUT0_IDX;
+        }
+        
+        // -- Reset everything
+        i2sReset();
+        i2sReset_DMA();
+        i2sReset_FIFO();
+        
+        // -- Main configuration
+        i2s->conf.tx_msb_right = 1;
+        i2s->conf.tx_mono = 0;
+        i2s->conf.tx_short_sync = 0;
+        i2s->conf.tx_msb_shift = 0;
+        i2s->conf.tx_right_first = 1; // 0;//1;
+        i2s->conf.tx_slave_mod = 0;
+        
+        // -- Set parallel mode
+        i2s->conf2.val = 0;
+        i2s->conf2.lcd_en = 1;
+        i2s->conf2.lcd_tx_wrx2_en = 0; // 0 for 16 or 32 parallel output
+        i2s->conf2.lcd_tx_sdx2_en = 0; // HN
+        
+        // -- Set up the clock rate and sampling
+        i2s->sample_rate_conf.val = 0;
+        i2s->sample_rate_conf.tx_bits_mod = 32; // Number of parallel bits/pins
+        i2s->sample_rate_conf.tx_bck_div_num = 1;
+        i2s->clkm_conf.val = 0;
+        i2s->clkm_conf.clka_en = 0;
+        
+        // -- Data clock is computed as Base/(div_num + (div_b/div_a))
+        //    Base is 80Mhz, so 80/(10 + 0/1) = 8Mhz
+        //    One cycle is 125ns
+        i2s->clkm_conf.clkm_div_a = CLOCK_DIVIDER_A;
+        i2s->clkm_conf.clkm_div_b = CLOCK_DIVIDER_B;
+        i2s->clkm_conf.clkm_div_num = CLOCK_DIVIDER_N;
+        
+        i2s->fifo_conf.val = 0;
+        i2s->fifo_conf.tx_fifo_mod_force_en = 1;
+        i2s->fifo_conf.tx_fifo_mod = 3;  // 32-bit single channel data
+        i2s->fifo_conf.tx_data_num = 32; // fifo length
+        i2s->fifo_conf.dscr_en = 1;      // fifo will use dma
+        
+        i2s->conf1.val = 0;
+        i2s->conf1.tx_stop_en = 0;
+        i2s->conf1.tx_pcm_bypass = 1;
+        
+        i2s->conf_chan.val = 0;
+        i2s->conf_chan.tx_chan_mod = 1; // Mono mode, with tx_msb_right = 1, everything goes to right-channel
+        
+        i2s->timing.val = 0;
+        
+        // -- Allocate two DMA buffers
+        dmaBuffers[0] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+        dmaBuffers[1] = allocateDMABuffer(32 * NUM_COLOR_CHANNELS * gPulsesPerBit);
+        
+        // -- Arrange them as a circularly linked list
+        dmaBuffers[0]->descriptor.qe.stqe_next = &(dmaBuffers[1]->descriptor);
+        dmaBuffers[1]->descriptor.qe.stqe_next = &(dmaBuffers[0]->descriptor);
+       
+        // -- Allocate i2s interrupt
+        SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
+        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
+                                     &interruptHandler, 0, &gI2S_intr_handle);
+        
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+        
+        // Serial.println("Init I2S");
+        gInitialized = true;
+    }
+    
+    /** Clear DMA buffer
+     *
+     *  Yves' clever trick: initialize the bits that we know must be 0
+     *  or 1 regardless of what bit they encode.
+     */
+    static void empty( uint32_t *buf)
+    {
+        for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
+        {
+            int offset=gPulsesPerBit*i;
+            for(int j=0;j<ones_for_zero;j++)
+                buf[offset+j]=0xffffffff;
+            
+            for(int j=ones_for_one;j<gPulsesPerBit;j++)
+                buf[offset+j]=0;
+        }
+    }
+    
+    // -- Show pixels
+    //    This is the main entry point for the controller.
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+        
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+        (*mPixels) = pixels;
+        
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // Serial.print("Show pixels ");
+        // Serial.println(gNumStarted);
+        
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual work
+        if (gNumStarted == gNumControllers) {
+            empty((uint32_t*)dmaBuffers[0]->buffer);
+            empty((uint32_t*)dmaBuffers[1]->buffer);
+            gCurBuffer = 0;
+            gDoneFilling = false;
+            
+            // -- Prefill both buffers
+            fillBuffer();
+            fillBuffer();
+            
+            i2sStart();
+            
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the DMA buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+            
+            i2sStop();
+            
+            // -- Reset the counters
+            gNumStarted = 0;
+        }
+    }
+    
+    // -- Custom interrupt handler
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        if (i2s->int_st.out_eof) {
+            i2s->int_clr.val = i2s->int_raw.val;
+            
+            if ( ! gDoneFilling) {
+                fillBuffer();
+            } else {
+                portBASE_TYPE HPTaskAwoken = 0;
+                xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+            }
+        }
+    }
+    
+    /** Fill DMA buffer
+     *
+     *  This is where the real work happens: take a row of pixels (one
+     *  from each strip), transpose and encode the bits, and store
+     *  them in the DMA buffer for the I2S peripheral to read.
+     */
+    static void fillBuffer()
+    {
+        // -- Alternate between buffers
+        volatile uint32_t * buf = (uint32_t *) dmaBuffers[gCurBuffer]->buffer;
+        gCurBuffer = (gCurBuffer + 1) % NUM_DMA_BUFFERS;
+        
+        // -- Get the requested pixel from each controller. Store the
+        //    data for each color channel in a separate array.
+        uint32_t has_data_mask = 0;
+        for (int i = 0; i < gNumControllers; i++) {
+            // -- Store the pixels in reverse controller order starting at index 23
+            //    This causes the bits to come out in the right position after we
+            //    transpose them.
+            int bit_index = 23-i;
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+            if (pController->mPixels->has(1)) {
+                gPixelRow[0][bit_index] = pController->mPixels->loadAndScale0();
+                gPixelRow[1][bit_index] = pController->mPixels->loadAndScale1();
+                gPixelRow[2][bit_index] = pController->mPixels->loadAndScale2();
+                pController->mPixels->advanceData();
+                pController->mPixels->stepDithering();
+                
+                // -- Record that this controller still has data to send
+                has_data_mask |= (1 << (i+8));
+            }
+        }
+        
+        // -- None of the strips has data? We are done.
+        if (has_data_mask == 0) {
+            gDoneFilling = true;
+            return;
+        }
+        
+        // -- Transpose and encode the pixel data for the DMA buffer
+        int buf_index = 0;
+        for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
+            
+            // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
+            transpose32(gPixelRow[channel], gPixelBits[channel][0] );
+            
+            //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
+            for (int bitnum = 0; bitnum < 8; bitnum++) {
+                uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
+                uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
+                
+               /* SZG: More general, but too slow:
+                    for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
+                        buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
+                     }
+               */
+
+                // -- Only fill in the pulses that are different between the "0" and "1" encodings
+                for(int pulse_num = ones_for_zero; pulse_num < ones_for_one; pulse_num++) {
+                    buf[bitnum*gPulsesPerBit+channel*8*gPulsesPerBit+pulse_num] = has_data_mask & bit;
+                }
+            }
+        }
+    }
+    
+    static void transpose32(uint8_t * pixels, uint8_t * bits)
+    {
+        transpose8rS32(& pixels[0],  1, 4, & bits[0]);
+        transpose8rS32(& pixels[8],  1, 4, & bits[1]);
+        transpose8rS32(& pixels[16], 1, 4, & bits[2]);
+        //transpose8rS32(& pixels[24], 1, 4, & bits[3]);  Can only use 24 bits
+    }
+    
+    /** Transpose 8x8 bit matrix
+     *  From Hacker's Delight
+     */
+    static void transpose8rS32(uint8_t * A, int m, int n, uint8_t * B)
+    {
+        uint32_t x, y, t;
+        
+        // Load the array and pack it into x and y.
+        
+        x = (A[0]<<24)   | (A[m]<<16)   | (A[2*m]<<8) | A[3*m];
+        y = (A[4*m]<<24) | (A[5*m]<<16) | (A[6*m]<<8) | A[7*m];
+        
+        t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+        t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+        
+        t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+        t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+        
+        t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+        y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+        x = t;
+        
+        B[0]=x>>24;    B[n]=x>>16;    B[2*n]=x>>8;  B[3*n]=x;
+        B[4*n]=y>>24;  B[5*n]=y>>16;  B[6*n]=y>>8;  B[7*n]=y;
+    }
+    
+    /** Start I2S transmission
+     */
+    static void i2sStart()
+    {
+        // esp_intr_disable(gI2S_intr_handle);
+        // Serial.println("I2S start");
+        i2sReset();
+        //Serial.println(dmaBuffers[0]->sampleCount());
+        i2s->lc_conf.val=I2S_OUT_DATA_BURST_EN | I2S_OUTDSCR_BURST_EN | I2S_OUT_DATA_BURST_EN;
+        i2s->out_link.addr = (uint32_t) & (dmaBuffers[0]->descriptor);
+        i2s->out_link.start = 1;
+        ////vTaskDelay(5);
+        i2s->int_clr.val = i2s->int_raw.val;
+        // //vTaskDelay(5);
+        i2s->int_ena.out_dscr_err = 1;
+        //enable interrupt
+        ////vTaskDelay(5);
+        esp_intr_enable(gI2S_intr_handle);
+        // //vTaskDelay(5);
+        i2s->int_ena.val = 0;
+        i2s->int_ena.out_eof = 1;
+        
+        //start transmission
+        i2s->conf.tx_start = 1;
+    }
+    
+    static void i2sReset()
+    {
+        // Serial.println("I2S reset");
+        const unsigned long lc_conf_reset_flags = I2S_IN_RST_M | I2S_OUT_RST_M | I2S_AHBM_RST_M | I2S_AHBM_FIFO_RST_M;
+        i2s->lc_conf.val |= lc_conf_reset_flags;
+        i2s->lc_conf.val &= ~lc_conf_reset_flags;
+        
+        const uint32_t conf_reset_flags = I2S_RX_RESET_M | I2S_RX_FIFO_RESET_M | I2S_TX_RESET_M | I2S_TX_FIFO_RESET_M;
+        i2s->conf.val |= conf_reset_flags;
+        i2s->conf.val &= ~conf_reset_flags;
+    }
+    
+    static void i2sReset_DMA()
+    {
+        i2s->lc_conf.in_rst=1; i2s->lc_conf.in_rst=0;
+        i2s->lc_conf.out_rst=1; i2s->lc_conf.out_rst=0;
+    }
+    
+    static void i2sReset_FIFO()
+    {
+        i2s->conf.rx_fifo_reset=1; i2s->conf.rx_fifo_reset=0;
+        i2s->conf.tx_fifo_reset=1; i2s->conf.tx_fifo_reset=0;
+    }
+    
+    static void i2sStop()
+    {
+        // Serial.println("I2S stop");
+        esp_intr_disable(gI2S_intr_handle);
+        i2sReset();
+        i2s->conf.rx_start = 0;
+        i2s->conf.tx_start = 0;
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
similarity index 90%
rename from platforms/esp/32/clockless_esp32.h
rename to platforms/esp/32/clockless_rmt_esp32.h
index 58b3c3bcba..ac91ef116d 100644
--- a/platforms/esp/32/clockless_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -49,7 +49,7 @@
  * co-exist. To switch to this mode, add the following directive
  * before you include FastLED.h:
  *
- *      #define FASTLED_RMT_BUILTIN_DRIVER 1
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
  *
  * There may be a performance penalty for using this mode. We need to
  * compute the RMT signal for the entire LED strip ahead of time,
@@ -112,6 +112,7 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 }
 
 #define FASTLED_HAS_CLOCKLESS 1
+#define NUM_COLOR_CHANNELS 3
 
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
@@ -185,10 +186,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t   mZero;
     rmt_item32_t   mOne;
 
-    // -- State information for keeping track of where we are in the pixel data
-    uint8_t *      mPixelData = NULL;
-    int            mSize = 0;
-    int            mCurByte;
+    // -- Save the pixel controller
+    PixelController<RGB_ORDER> * mPixels;
+    int            mCurColor;
     uint16_t       mCurPulse;
 
     // -- Buffer to hold all of the pulses. For the version that uses
@@ -200,6 +200,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     void init()
     {
+        // -- Allocate space to save the pixel controller
+        //    during parallel output
+        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
+        
         // -- Precompute rmt items corresponding to a zero bit and a one bit
         //    according to the timing values given in the template instantiation
         // T1H
@@ -288,17 +292,15 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
         }
 
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-
-        //if (mPixels != NULL) delete mPixels;
-        //mPixels = new PixelController<RGB_ORDER>(pixels);
         if (FASTLED_RMT_BUILTIN_DRIVER)
             convertAllPixelData(pixels);
-        else
-            copyPixelData(pixels);
+        else {
+            // -- Initialize the local state, save a pointer to the pixel
+            //    data. We need to make a copy because pixels is a local
+            //    variable in the calling function, and this data structure
+            //    needs to outlive this call to showPixels.
+            (*mPixels) = pixels;
+        }        
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
@@ -328,33 +330,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-    // -- Copy pixel data
-    //    Make a safe copy of the pixel data, so that the FastLED show
-    //    function can continue to the next controller while the RMT
-    //    device starts sending this data asynchronously.
-    virtual void copyPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Make sure we have a buffer of the right size
-        //    (3 bytes per pixel)
-        int size_needed = pixels.size() * 3;
-        if (size_needed > mSize) {
-            if (mPixelData != NULL) free(mPixelData);
-            mSize = size_needed;
-            mPixelData = (uint8_t *) malloc( mSize);
-        }
-
-        // -- Cycle through the R,G, and B values in the right order,
-        //    storing the resulting raw pixel data in the buffer.
-        int cur = 0;
-        while (pixels.has(1)) {
-            mPixelData[cur++] = pixels.loadAndScale0();
-            mPixelData[cur++] = pixels.loadAndScale1();
-            mPixelData[cur++] = pixels.loadAndScale2();
-            pixels.advanceData();
-            pixels.stepDithering();
-        }
-    }
-
     // -- Convert all pixels to RMT pulses
     //    This function is only used when the user chooses to use the
     //    built-in RMT driver, which needs all of the RMT pulses
@@ -440,7 +415,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
             mCurPulse = 0;
-            mCurByte = 0;
+            mCurColor = 0;
 
             // -- Fill both halves of the buffer
             fillHalfRMTBuffer();
@@ -460,10 +435,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    handler (below), or as a callback from the built-in
     //    interrupt handler. It is static because we don't know which
     //    controller is done until we look it up.
-    static void IRAM_ATTR doneOnChannel(rmt_channel_t channel, void * arg)
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
     {
-        if (channel >= FASTLED_RMT_MAX_CHANNELS) return;
-
         ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
         portBASE_TYPE HPTaskAwoken = 0;
 
@@ -489,7 +462,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    This interrupt handler handles two cases: a controller is
     //    done writing its data, or a controller needs to fill the
     //    next half of the RMT buffer with data.
-    static IRAM_ATTR void interruptHandler(void *arg)
+    static void IRAM_ATTR interruptHandler(void *arg)
     {
         // -- The basic structure of this code is borrowed from the
         //    interrupt handler in esp-idf/components/driver/rmt.c
@@ -521,6 +494,33 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    uint8_t IRAM_ATTR getNextByte()
+    {
+        uint8_t byte;
+
+        // -- Cycle through the color channels
+        switch (mCurColor) {
+        case 0: 
+            byte = mPixels->loadAndScale0();
+            break;
+        case 1: 
+            byte = mPixels->loadAndScale1();
+            break;
+        case 2: 
+            byte = mPixels->loadAndScale2();
+            mPixels->advanceData();
+            mPixels->stepDithering();
+            break;
+        default:
+            // -- This is bad!
+            byte = 0;
+        }
+
+        mCurColor = (mCurColor + 1) % NUM_COLOR_CHANNELS;
+
+        return byte;
+    }
+
     // -- Fill the RMT buffer
     //    This function fills the next 32 slots in the RMT write
     //    buffer with pixel data. It also handles the case where the
@@ -535,9 +535,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    into RMT pulses that encode the zeros and ones.
         int pulses = 0;
         uint32_t byteval;
-        while (pulses < 32 && mCurByte < mSize) {
+        while (pulses < 32 && mPixels->has(1)) {
             // -- Get one byte
-            byteval = mPixelData[mCurByte++];
+            byteval = getNextByte();
             byteval <<= 24;
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
@@ -552,7 +552,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         // -- When we reach the end of the pixel data, fill the rest of the
         //    RMT buffer with 0's, which signals to the device that we're done.
-        if (mCurByte == mSize) {
+        if ( ! mPixels->has(1) ) {
             while (pulses < 32) {
                 RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
                 mCurPulse++;
diff --git a/platforms/esp/32/fastled_esp32.h b/platforms/esp/32/fastled_esp32.h
index fabbfeda3d..edf27e7d99 100644
--- a/platforms/esp/32/fastled_esp32.h
+++ b/platforms/esp/32/fastled_esp32.h
@@ -1,5 +1,11 @@
 #pragma once
 
 #include "fastpin_esp32.h"
-#include "clockless_esp32.h"
+
+#ifdef FASTLED_ESP32_I2S
+#include "clockless_i2s_esp32.h"
+#else
+#include "clockless_rmt_esp32.h"
+#endif
+
 // #include "clockless_block_esp32.h"

From d4b6be31c96d9c178499ea4f067591a13f2bb719 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Thu, 6 Jun 2019 21:08:20 -0700
Subject: [PATCH 059/204] warning cleanup

---
 platforms/esp/32/clockless_rmt_esp32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index ac91ef116d..c9fee8a539 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -347,7 +347,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Cycle through the R,G, and B values in the right order,
         //    storing the pulses in the big buffer
         mCurPulse = 0;
-        int cur = 0;
+
         uint32_t byteval;
         while (pixels.has(1)) {
             byteval = pixels.loadAndScale0();

From 0cea6c8863f8ef9978923377d476d3510c5830b7 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Thu, 6 Jun 2019 21:11:15 -0700
Subject: [PATCH 060/204] Rev'ing a 3.2.7 release

---
 FastLED.h          |  6 +++---
 library.json       |  6 +++---
 library.properties |  2 +-
 release_notes.md   | 38 ++++++++++++++++++++++++--------------
 4 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index 5bb07526d1..d7934adeb5 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -8,12 +8,12 @@
 #define FASTLED_HAS_PRAGMA_MESSAGE
 #endif
 
-#define FASTLED_VERSION 3002006
+#define FASTLED_VERSION 3002007
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.002.006"
+#    pragma message "FastLED version 3.002.007"
 #  else
-#    warning FastLED version 3.002.006  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.002.007  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index 0e8eb2eaaf..e7080e0dad 100644
--- a/library.json
+++ b/library.json
@@ -18,11 +18,11 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.2.6",
+    "version": "3.2.7",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
-    "platforms": "atmelavr, atmelsam, freescalekinetis, nordicnrf51, nxplpc, ststm32, teensy, espressif8266, espressif32",   
+    "platforms": "atmelavr, atmelsam, freescalekinetis, nordicnrf51, nxplpc, ststm32, teensy, espressif8266, espressif32, nordicnrf52",
     "export": {
         "exclude": [
             "docs",
@@ -36,5 +36,5 @@
             "+<*.h>"
         ],
         "libArchive": false
-    } 
+    }
 }
diff --git a/library.properties b/library.properties
index 619b50953e..b827afe3b2 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.2.6
+version=3.2.7
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index 7590bbd62b..8e52407931 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,15 @@
+FastLED 3.2.7
+=============
+* Update ItsyBitsy support
+* Remove conflicting types courtesy of an esp8266 framework update
+* Fixes to clockless M0 code to allow for more interrupt enabled environments
+* ATTiny25 compilation fix
+* Some STM32 fixes (the platform still seems unhappy, though)
+* NRF52 support
+* Updated ESP32 support - supporting up to 24-way parallel output
+
+
+
 FastLED 3.2.6
 =============
 
@@ -27,7 +39,7 @@ FastLED 3.2.1
 * ATmega644P support
 * Adafruit Hallowwing (Thanks to Lady Ada)
 * Improved STM 32 support
-* Some user contributed cleanups 
+* Some user contributed cleanups
 * ESP32 APA102 output fix
 
 FastLED3.2
@@ -36,7 +48,7 @@ FastLED3.2
 * various minor contributed fixes
 
 FastLED 3.1.8
-============= 
+=============
 * Added support for Adafruit Circuit Playground Express (Thanks to Lady Ada)
 * Improved support for Adafruit Gemma and Trinket m0 (Thanks to Lady Ada)
 * Added support for PJRC's WS2812Serial (Thanks to Paul Stoffregen)
@@ -69,26 +81,26 @@ FastLED3.1.3
 ===============
 
 * Add SK6822 timings
-* Add ESP8266 support - note, only tested w/the arduino esp8266 build environment 
+* Add ESP8266 support - note, only tested w/the arduino esp8266 build environment
 * Improvements to hsv2rgb, palette, and noise performance
 * Improvements to rgb2hsv accuracy
-* Fixed noise discontinuity 
+* Fixed noise discontinuity
 * Add wino board support
 * Fix scale8 (so now, scale8(255,255) == 255, not 254!)
-* Add ESP8266 parallel output support 
+* Add ESP8266 parallel output support
 
 
 FastLED3.1.1
 ============
 * Enabled RFDuino/nrf51822 hardware SPI support
-* Fix edge case bug w/HSV palette blending 
+* Fix edge case bug w/HSV palette blending
 * Fix power management issue w/parallel output
 * Use static_asserts for some more useful compile time errors around bad pins
 * Roll power management into FastLED.show/delay directly
 * Support for adafruit pixies on arduino type platforms that have SoftwareSerial
   * TODO: support hardware serial on platforms that have it available
 * Add UCS2903 timings
-* Preliminary CPixelView/CRGBSet code - more flexible treatment of groups of arrays 
+* Preliminary CPixelView/CRGBSet code - more flexible treatment of groups of arrays
   * https://github.com/FastLED/FastLED/wiki/RGBSet-Reference
 
 
@@ -100,7 +112,7 @@ FastLED3.1.0
   * RFDuino/nrf51822
   * Spark Core
 * Major internal code reoganization
-* Started doxygen based documentation 
+* Started doxygen based documentation
 * Lots of bug/performance fixes
 * Parallel output on various arm platforms
 * lots of new stuff
@@ -149,7 +161,7 @@ FastLED3.0
 * Fixed DMXSERIAL/DMXSIMPLE support
 * Timing adjustments for existing SPI chipsets
 * Cleaned up the code layout to make platform support easier
-* Many bug fixes 
+* Many bug fixes
 * A number of performance/memory improvements
 * Remove Squant (takes up space!)
 
@@ -159,7 +171,7 @@ FastLED2
 ## Full release of the library
 
 ## Release Candidate 6
-* Rename library, offically, to FastLED, move to github 
+* Rename library, offically, to FastLED, move to github
 * Update keywords with all the new stuffs
 
 ## Release Candidate 5
@@ -175,7 +187,7 @@ FastLED2
 * Added ability to set pixel color directly from HSV
 * Added ability to retrieve current random16 seed
 
-## Release Candidate 2 
+## Release Candidate 2
 * mostly bug fixes
 * Fix SPI macro definitions for latest teensy3 software update
 * Teensy 2 compilation fix
@@ -188,7 +200,7 @@ FastLED2
 * high speed memory management operations
 * library for interpolation/easing functions
 * various api changes, addition of clear and showColor functions
-* scale value applied to all show methods 
+* scale value applied to all show methods
 * bug fixes for SM16716
 * performance improvements, lpd8806 exceeds 22Mbit now
 * hardware def fixes
@@ -217,5 +229,3 @@ en if you're using the hardware SPI pins
 
 ## Preview 1
 * Initial release
-
-

From cf8ce86553f6a1c4d17db54659ebb607b68191ae Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Fri, 7 Jun 2019 10:12:36 -0700
Subject: [PATCH 061/204] Fix #811 and #812 - took a bad PR that broke the
 build for other platforms

---
 chipsets.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chipsets.h b/chipsets.h
index 5651d2803c..9941ec8edf 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -521,10 +521,10 @@ template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
 class PL9823Controller : public ClocklessController<DATA_PIN, 3 * FMUL, 8 * FMUL, 3 * FMUL, RGB_ORDER> {};
 
 #else
-    
+
 // Similar to NS() macro, this calculates the number of cycles for
 // the clockless chipset (which may differ from CPU cycles)
-#define C_NS(_NS) (((_NS * ((CLOCKLESS_FREQUENCY / 1000000L) + 999) / 1000)
+#define C_NS(_NS) (_NS * ((CLOCKLESS_FREQUENCY / 1000000L) + 999) / 1000)
 
 // GE8822 - 350ns 660ns 350ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>

From 1cc984c13ef0751062915cd43cbf470947375290 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Fri, 7 Jun 2019 10:13:33 -0700
Subject: [PATCH 062/204] Reving 3.2.8 to fix a build issue that come in with
 the 3.2.7 PR's

---
 FastLED.h          | 6 +++---
 library.json       | 2 +-
 library.properties | 2 +-
 release_notes.md   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index d7934adeb5..4b7cd7473a 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -8,12 +8,12 @@
 #define FASTLED_HAS_PRAGMA_MESSAGE
 #endif
 
-#define FASTLED_VERSION 3002007
+#define FASTLED_VERSION 3002008
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.002.007"
+#    pragma message "FastLED version 3.002.008"
 #  else
-#    warning FastLED version 3.002.007  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.002.008  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index e7080e0dad..6d0a498d31 100644
--- a/library.json
+++ b/library.json
@@ -18,7 +18,7 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.2.7",
+    "version": "3.2.8",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index b827afe3b2..cfd96b9faa 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.2.7
+version=3.2.8
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index 8e52407931..d1c4096796 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,4 +1,4 @@
-FastLED 3.2.7
+FastLED 3.2.8
 =============
 * Update ItsyBitsy support
 * Remove conflicting types courtesy of an esp8266 framework update

From ca9e40b4e9e640ae3f50d1b89b3f3eca36e2496d Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 9 Jun 2019 15:32:00 -0700
Subject: [PATCH 063/204] Fix #813 -- further fix for the C_NS macro added by
 the nrf52 changes

---
 chipsets.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chipsets.h b/chipsets.h
index 9941ec8edf..d452abfe87 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -524,7 +524,7 @@ class PL9823Controller : public ClocklessController<DATA_PIN, 3 * FMUL, 8 * FMUL
 
 // Similar to NS() macro, this calculates the number of cycles for
 // the clockless chipset (which may differ from CPU cycles)
-#define C_NS(_NS) (_NS * ((CLOCKLESS_FREQUENCY / 1000000L) + 999) / 1000)
+#define C_NS(_NS) (((_NS * ((CLOCKLESS_FREQUENCY / 1000000L)) + 999)) / 1000)
 
 // GE8822 - 350ns 660ns 350ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>

From fd859aea4459df0b6bf09d720b5b5a1bc8641fc6 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 9 Jun 2019 15:47:22 -0700
Subject: [PATCH 064/204] Rev 3.2.9 to fix the timing issue introduced by the
 nrf52 changes

---
 FastLED.h          | 6 +++---
 library.json       | 2 +-
 library.properties | 2 +-
 release_notes.md   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index 4b7cd7473a..8fe2b55cbf 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -8,12 +8,12 @@
 #define FASTLED_HAS_PRAGMA_MESSAGE
 #endif
 
-#define FASTLED_VERSION 3002008
+#define FASTLED_VERSION 3002009
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.002.008"
+#    pragma message "FastLED version 3.002.009"
 #  else
-#    warning FastLED version 3.002.008  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.002.009  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index 6d0a498d31..ca8f09e8a2 100644
--- a/library.json
+++ b/library.json
@@ -18,7 +18,7 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.2.8",
+    "version": "3.2.9",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index cfd96b9faa..25460b1ee6 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.2.8
+version=3.2.9
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index d1c4096796..ae9075c050 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,4 +1,4 @@
-FastLED 3.2.8
+FastLED 3.2.9
 =============
 * Update ItsyBitsy support
 * Remove conflicting types courtesy of an esp8266 framework update

From c563be6a37d9d561eb53f49a233fafd5c8c8b44a Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 16 Jun 2019 21:31:30 -0700
Subject: [PATCH 065/204] Try a fix for #804 but making sure gradiant color
 palettes are properly aligned

---
 fastled_progmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastled_progmem.h b/fastled_progmem.h
index 5527c62356..dfcb9effb0 100644
--- a/fastled_progmem.h
+++ b/fastled_progmem.h
@@ -69,7 +69,7 @@ FASTLED_NAMESPACE_BEGIN
 // force 4-byte alignment as needed.  The FastLED gradient
 // palette code uses 'read dword', and now uses this macro
 // to make sure that gradient palettes are 4-byte aligned.
-#if defined(FASTLED_ARM) || defined(ESP32)
+#if defined(FASTLED_ARM) || defined(ESP32) || defined(ESP8266)
 #define FL_ALIGN_PROGMEM  __attribute__ ((aligned (4)))
 #else
 #define FL_ALIGN_PROGMEM

From 4f88655f4e0616af907bc985701dd1d35a826dd7 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 27 Jun 2019 17:59:34 -0400
Subject: [PATCH 066/204] fillHalfRMTBuffer needs to be virtual in order to
 preserve the color channel order from the template parameters

---
 platforms/esp/32/clockless_rmt_esp32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index ac91ef116d..d16725b316 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -526,7 +526,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    buffer with pixel data. It also handles the case where the
     //    pixel data is exhausted, so we need to fill the RMT buffer
     //    with zeros to signal that it's done.
-    void IRAM_ATTR fillHalfRMTBuffer()
+    virtual void IRAM_ATTR fillHalfRMTBuffer()
     {
         uint32_t one_val = mOne.val;
         uint32_t zero_val = mZero.val;

From df0fef3b7cd63cce3fc8e0ce8ed5f8d1c8c87ef4 Mon Sep 17 00:00:00 2001
From: Erin St Blaine <erin@firepixie.com>
Date: Tue, 2 Jul 2019 19:05:28 -0700
Subject: [PATCH 067/204] updated to fastpin_arm_d51.h (#834)

Added pin defs for the Adafruit Metro M4 Airlift Lite
---
 platforms/arm/d51/fastpin_arm_d51.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/platforms/arm/d51/fastpin_arm_d51.h b/platforms/arm/d51/fastpin_arm_d51.h
index 6d14c633ab..7580e18ec7 100644
--- a/platforms/arm/d51/fastpin_arm_d51.h
+++ b/platforms/arm/d51/fastpin_arm_d51.h
@@ -84,6 +84,29 @@ _DEFPIN_ARM(23, 1, 23); _DEFPIN_ARM(24, 0,  1); _DEFPIN_ARM(25, 0,  0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+// Actual pin definitions
+#if defined(ADAFRUIT_METRO_M4_AIRLIFT_LITE)
+
+#define MAX_PIN 20
+// D0-D13, including D6+D8 (DotStar CLK + DATA)
+_DEFPIN_ARM( 0, 0, 23); _DEFPIN_ARM( 1, 0, 22); _DEFPIN_ARM( 2, 1,  17); _DEFPIN_ARM( 3, 1, 16);
+_DEFPIN_ARM( 4, 1, 13); _DEFPIN_ARM( 5, 1, 14); _DEFPIN_ARM( 6, 1,  15); _DEFPIN_ARM( 7, 1, 12);
+_DEFPIN_ARM( 8, 0,  21); _DEFPIN_ARM( 9, 0, 20); _DEFPIN_ARM(10, 0, 18); _DEFPIN_ARM(11, 0, 19);
+_DEFPIN_ARM(12, 0, 17); _DEFPIN_ARM(13, 0, 16);
+// A0-A5
+_DEFPIN_ARM(14, 0,  2); _DEFPIN_ARM(15, 0,  5); _DEFPIN_ARM(16, 0,  6); _DEFPIN_ARM(17, 1,  0);
+_DEFPIN_ARM(18, 1,  8); _DEFPIN_ARM(19, 1,  9); 
+// SDA/SCL
+_DEFPIN_ARM(22, 1, 2); _DEFPIN_ARM(23, 1, 3);
+
+// 23..25  MISO/SCK/MOSI
+_DEFPIN_ARM(24, 0, 14); _DEFPIN_ARM(25, 0,  13); _DEFPIN_ARM(26, 0,  12);
+
+#define SPI_DATA 26
+#define SPI_CLOCK 25
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
 #elif defined(ADAFRUIT_FEATHER_M4_EXPRESS)
 
 #define MAX_PIN 19

From bbcbb4017ced2f63e521bc1cf7e7b9669da1b1cb Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 2 Jul 2019 22:06:18 -0400
Subject: [PATCH 068/204] Bug fix to preserve color order information (#831)

---
 platforms/esp/32/clockless_rmt_esp32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index c9fee8a539..accd60081c 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -526,7 +526,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    buffer with pixel data. It also handles the case where the
     //    pixel data is exhausted, so we need to fill the RMT buffer
     //    with zeros to signal that it's done.
-    void IRAM_ATTR fillHalfRMTBuffer()
+    virtual void IRAM_ATTR fillHalfRMTBuffer()
     {
         uint32_t one_val = mOne.val;
         uint32_t zero_val = mZero.val;

From 66fd04793d27b3368bfe94e4a41bd83de0ba9369 Mon Sep 17 00:00:00 2001
From: jeremy-mcgill <52296248+jeremy-mcgill@users.noreply.github.com>
Date: Tue, 2 Jul 2019 21:06:45 -0500
Subject: [PATCH 069/204] Add pin mappings for Arduino Nano 33 IOT (#830)

---
 platforms/arm/d21/fastpin_arm_d21.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index 1606d65084..997fb06af4 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -169,6 +169,22 @@ _DEFPIN_ARM( 20, 0,  6); _DEFPIN_ARM( 21, 0,  7);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+#elif defined(ARDUINO_SAMD_NANO_33_IOT)
+
+#define MAX_PIN 25
+_DEFPIN_ARM(  0, 0, 11); _DEFPIN_ARM(  1, 0, 10); _DEFPIN_ARM(  2, 0, 14); _DEFPIN_ARM(  3, 0,  9);
+_DEFPIN_ARM(  4, 0,  8); _DEFPIN_ARM(  5, 0, 15); _DEFPIN_ARM(  6, 0, 20); _DEFPIN_ARM(  7, 0, 21);
+_DEFPIN_ARM(  8, 0,  6); _DEFPIN_ARM(  9, 0,  7); _DEFPIN_ARM( 10, 0, 18); _DEFPIN_ARM( 11, 0, 16);
+_DEFPIN_ARM( 12, 0, 19); _DEFPIN_ARM( 13, 0, 17); _DEFPIN_ARM( 14, 0,  2); _DEFPIN_ARM( 15, 1,  8);
+_DEFPIN_ARM( 16, 1,  9); _DEFPIN_ARM( 17, 0,  4); _DEFPIN_ARM( 18, 0,  5); _DEFPIN_ARM( 19, 1,  2);
+_DEFPIN_ARM( 20, 0, 22); _DEFPIN_ARM( 21, 0, 23); _DEFPIN_ARM( 22, 0, 12); _DEFPIN_ARM( 23, 1, 10);
+_DEFPIN_ARM( 24, 1, 11);
+
+#define SPI_DATA 23
+#define SPI_CLOCK 24
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
 #elif defined(ARDUINO_GEMMA_M0)
 
 #define MAX_PIN 4

From b8e531de0b7e80aca9dbc557a91f14bc90d80890 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Wed, 3 Jul 2019 09:20:05 -0700
Subject: [PATCH 070/204] Swap #if for #elif - fix compiling w/m4 metro support

---
 platforms/arm/d51/fastpin_arm_d51.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/arm/d51/fastpin_arm_d51.h b/platforms/arm/d51/fastpin_arm_d51.h
index 7580e18ec7..5e36023d8c 100644
--- a/platforms/arm/d51/fastpin_arm_d51.h
+++ b/platforms/arm/d51/fastpin_arm_d51.h
@@ -85,7 +85,7 @@ _DEFPIN_ARM(23, 1, 23); _DEFPIN_ARM(24, 0,  1); _DEFPIN_ARM(25, 0,  0);
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 // Actual pin definitions
-#if defined(ADAFRUIT_METRO_M4_AIRLIFT_LITE)
+#elif defined(ADAFRUIT_METRO_M4_AIRLIFT_LITE)
 
 #define MAX_PIN 20
 // D0-D13, including D6+D8 (DotStar CLK + DATA)

From a346de18a09ad4471c6cc8bbefe4eb8bcf863d32 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Wed, 3 Jul 2019 09:42:35 -0700
Subject: [PATCH 071/204] Rev 3.2.10 to add a couple more platform defs and bug
 fixes

---
 FastLED.h          | 4 ++--
 library.json       | 2 +-
 library.properties | 2 +-
 release_notes.md   | 6 ++++++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index 8fe2b55cbf..b4302662de 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -11,9 +11,9 @@
 #define FASTLED_VERSION 3002009
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.002.009"
+#    pragma message "FastLED version 3.002.010"
 #  else
-#    warning FastLED version 3.002.009  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.002.010  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index ca8f09e8a2..bcdd17562b 100644
--- a/library.json
+++ b/library.json
@@ -18,7 +18,7 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.2.9",
+    "version": "3.2.10",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index 25460b1ee6..93c90e0fe4 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.2.9
+version=3.2.10
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index ae9075c050..81d16f3318 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,9 @@
+FastLED 3.2.10
+==============
+* Adafruit Metro M4 Airlift support
+* Arduino Nano 33 IOT preliminary definitions
+* Bug fixes
+
 FastLED 3.2.9
 =============
 * Update ItsyBitsy support

From 3190e9673f94f3bb0d654fbd67d4a298d5f44896 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Fri, 19 Jul 2019 14:45:53 -0700
Subject: [PATCH 072/204] Update due to newer gcc being more pendantic about
 constexpr. (#845)

Specifically, cannot cast integer values to pointers in a constexpr,
requiring use of intptr_t in the constexpr.  Callers of the constexpr
may then cast it at the non-constexpr location of use.

See https://stackoverflow.com/questions/10369606/constexpr-pointer-value
---
 platforms/arm/nrf52/fastpin_arm_nrf52.h | 28 ++++++++++++-------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
index a8684665c6..60fb359416 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -73,13 +73,13 @@
 
 // manually define two structures, to avoid fighting with preprocessor macros
 struct __generated_struct_NRF_P0 {
-    FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static NRF_GPIO_Type * r() {
-        return NRF_P0;
+    FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static uintptr_t r() {
+        return NRF_P0_BASE;
     }
 };
 struct __generated_struct_NRF_P1 {
-    FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static NRF_GPIO_Type * r() {
-        return NRF_P1;
+    FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static uintptr_t r() {
+        return NRF_P1_BASE;
     }
 };
 
@@ -112,19 +112,19 @@ template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUM
         NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
         );
   }
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       hi()        { _PORT::r()->OUTSET = _MASK;            } // sets _MASK in the SET   OUTPUT register (output set high)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       lo()        { _PORT::r()->OUTCLR = _MASK;            } // sets _MASK in the CLEAR OUTPUT register (output set low)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       toggle()    { _PORT::r()->OUT ^= _MASK;              } // toggles _MASK bits in the OUTPUT GPIO port directly
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       hi()        { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTSET = _MASK;            } // sets _MASK in the SET   OUTPUT register (output set high)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       lo()        { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTCLR = _MASK;            } // sets _MASK in the CLEAR OUTPUT register (output set low)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void       toggle()    { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT ^= _MASK;              } // toggles _MASK bits in the OUTPUT GPIO port directly
   FASTLED_NRF52_INLINE_ATTRIBUTE static void       strobe()    { toggle();     toggle();                } // BUGBUG -- Is this used by FastLED?  Without knowing (for example) SPI Speed?
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     hival()     { return _PORT::r()->OUT | _MASK;        } // sets all _MASK bit(s) in the OUTPUT GPIO port to 1
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     loval()     { return _PORT::r()->OUT & ~_MASK;       } // sets all _MASK bit(s) in the OUTPUT GPIO port to 0
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t port()      { return &(_PORT::r()->OUT);             } // gets raw pointer to OUTPUT          GPIO port
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t cport()     { return &(_PORT::r()->OUTCLR);          } // gets raw pointer to SET   DIRECTION GPIO port
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t sport()     { return &(_PORT::r()->OUTSET);          } // gets raw pointer to CLEAR DIRECTION GPIO port
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     hival()     { return (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT | _MASK;        } // sets all _MASK bit(s) in the OUTPUT GPIO port to 1
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     loval()     { return (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT & ~_MASK;       } // sets all _MASK bit(s) in the OUTPUT GPIO port to 0
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t port()      { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT);             } // gets raw pointer to OUTPUT          GPIO port
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t cport()     { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTCLR);          } // gets raw pointer to SET   DIRECTION GPIO port
+  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t sport()     { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTSET);          } // gets raw pointer to CLEAR DIRECTION GPIO port
   FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     mask()      { return _MASK;                          } // gets the value of _MASK
   FASTLED_NRF52_INLINE_ATTRIBUTE static void hi (register port_ptr_t port) { hi();                      } // sets _MASK in the SET   OUTPUT register (output set high)
   FASTLED_NRF52_INLINE_ATTRIBUTE static void lo (register port_ptr_t port) { lo();                      } // sets _MASK in the CLEAR OUTPUT register (output set low)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void set(register port_t     val ) { _PORT::r()->OUT = val;     } // sets entire port's value (optimization used by FastLED)
+  FASTLED_NRF52_INLINE_ATTRIBUTE static void set(register port_t     val ) { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT = val;     } // sets entire port's value (optimization used by FastLED)
   FASTLED_NRF52_INLINE_ATTRIBUTE static void fastset(register port_ptr_t port, register port_t val) { *port = val; }
   constexpr                      static uint32_t   nrf_pin2() { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
   constexpr                      static bool       LowSpeedOnlyRecommended() {
@@ -269,7 +269,7 @@ template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUM
             );
     #endif
     #if !defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-        #warning "Unknown board / package, ... caller must pins support high-speed"
+        #warning "Unknown board / package, ... caller must determine pins that support high-speed"
         return false; // choosing default to be FALSE, to allow users to ATTEMPT to use high-speed on pins where support is not known
     #endif
   }

From 13a9e0cc0999025b7ffdf92932bc6efb2199f8ea Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Wed, 31 Jul 2019 17:43:20 -0700
Subject: [PATCH 073/204] Update keywords.txt with additional chipsets (#854)

* Alphabetize chipset section -- no content changes this commit.

* Remove duplicate definition of LPD6803

* Add PL9823 to chipset keyword literals

* Add a dozen more missing chipsets as LITERAL1:
* APA106
* DOTSTAR
* GW6205_400
* LPD1886_8BIT
* PIXIE
* SK6812
* SK6822
* SK9822
* TM1812
* UCS1904
* UCS2903
* WS2852
---
 keywords.txt | 61 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/keywords.txt b/keywords.txt
index c30552d8d6..75df500ca0 100644
--- a/keywords.txt
+++ b/keywords.txt
@@ -294,41 +294,52 @@ CRGB::YellowGreen	KEYWORD2
 #######################################
 
 # Chipsets
-LPD6803	LITERAL1
-LPD8806	LITERAL1
-WS2801	LITERAL1
-WS2803	LITERAL1
-P9813	LITERAL1
-SM16716	LITERAL1
 APA102	LITERAL1
+APA104	LITERAL1
+APA106	LITERAL1
 DMXSERIAL	LITERAL1
 DMXSIMPLE	LITERAL1
-TM1829	LITERAL1
-TM1809	LITERAL1
-TM1804	LITERAL1
-TM1803	LITERAL1
-APA104	LITERAL1
-WS2811	LITERAL1
-WS2812	LITERAL1
-WS2812B	LITERAL1
-WS2811_400	LITERAL1
-WS2813	LITERAL1
-NEOPIXEL	LITERAL1
-UCS1903	LITERAL1
-UCS1903B	LITERAL1
+DOTSTAR	LITERAL1
+GE8822	LITERAL1
+GS1903	LITERAL1
 GW6205	LITERAL1
 GW6205B	LITERAL1
+GW6205_400	LITERAL1
 LPD1886	LITERAL1
+LPD1886_8BIT	LITERAL1
+LPD6803	LITERAL1
+LPD8806	LITERAL1
+NEOPIXEL	LITERAL1
 OCTOWS2811	LITERAL1
 OCTOWS2811_400	LITERAL1
 OCTOWS2813	LITERAL1
-WS2812SERIAL	LITERAL1
-SMART_MATRIX	LITERAL1
-GE8822	LITERAL1
+P9813	LITERAL1
+PIXIE	LITERAL1
+PL9823	LITERAL1
+SK6812	LITERAL1
+SK6822	LITERAL1
+SK9822	LITERAL1
 SM16703 LITERAL1
-GS1903	LITERAL1
-LPD6803	LITERAL1
-
+SM16716	LITERAL1
+SMART_MATRIX	LITERAL1
+TM1803	LITERAL1
+TM1804	LITERAL1
+TM1809	LITERAL1
+TM1812	LITERAL1
+TM1829	LITERAL1
+UCS1903	LITERAL1
+UCS1903B	LITERAL1
+UCS1904	LITERAL1
+UCS2903	LITERAL1
+WS2801	LITERAL1
+WS2803	LITERAL1
+WS2811	LITERAL1
+WS2811_400	LITERAL1
+WS2812	LITERAL1
+WS2812B	LITERAL1
+WS2812SERIAL	LITERAL1
+WS2813	LITERAL1
+WS2852	LITERAL1
 
 # RGB orderings
 RGB	LITERAL1

From e0378f420bba701d6490e958d03571d72759d252 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 2 Aug 2019 22:47:13 -0400
Subject: [PATCH 074/204] Two mods: (1) convert CPU cycles directly to RMT
 cycles without going through nanoseconds; (2) improve performance of fill
 buffer by using a pointer into RMT memory rather than a bunch of indexes, and
 by inlining the getNextByte routine.

---
 platforms/esp/32/clockless_rmt_esp32.h | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index accd60081c..ce496b7b7d 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -131,6 +131,10 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Convert ESP32 cycles to RMT cycles
 #define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
 
+#define RMT_CYCLES_PER_ESP_CYCLE (F_CPU / CYCLES_PER_SEC)
+// #define ESP_TO_RMT_CYCLES(n) TO_RMT_CYCLES(n)
+#define ESP_TO_RMT_CYCLES(n) ((n) / (RMT_CYCLES_PER_ESP_CYCLE))
+
 // -- Number of cycles to signal the strip to latch
 #define RMT_RESET_DURATION NS_TO_CYCLES(50000)
 
@@ -190,6 +194,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     PixelController<RGB_ORDER> * mPixels;
     int            mCurColor;
     uint16_t       mCurPulse;
+    volatile uint32_t * mRMT_mem_ptr;
 
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
@@ -208,17 +213,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    according to the timing values given in the template instantiation
         // T1H
         mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        mOne.duration0 = ESP_TO_RMT_CYCLES(T1+T2); // TO_RMT_CYCLES(T1+T2);
         // T1L
         mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
+        mOne.duration1 = ESP_TO_RMT_CYCLES(T3); // TO_RMT_CYCLES(T3);
 
         // T0H
         mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
+        mZero.duration0 = ESP_TO_RMT_CYCLES(T1); // TO_RMT_CYCLES(T1);
         // T0L
         mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+        mZero.duration1 = ESP_TO_RMT_CYCLES(T2+T3); // TO_RMT_CYCLES(T2 + T3);
 
         gControllers[gNumControllers] = this;
         gNumControllers++;
@@ -414,6 +419,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
+            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
             mCurColor = 0;
 
@@ -494,7 +500,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-    uint8_t IRAM_ATTR getNextByte()
+    uint8_t IRAM_ATTR getNextByte() __attribute__ ((always_inline))
     {
         uint8_t byte;
 
@@ -543,7 +549,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // rmt_item32_t value corresponding to the buffered bit value
             for (register uint32_t j = 0; j < 8; j++) {
                 uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                * mRMT_mem_ptr++ = val;
+                // RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
                 byteval <<= 1;
                 mCurPulse++;
             }
@@ -554,15 +561,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    RMT buffer with 0's, which signals to the device that we're done.
         if ( ! mPixels->has(1) ) {
             while (pulses < 32) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                * mRMT_mem_ptr++ = 0;
+                // RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
                 mCurPulse++;
                 pulses++;
             }
         }
         
         // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
+        if (mCurPulse >= MAX_PULSES*2) {
+            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
+        }            
     }
 };
 

From 80ca65ae37caa8c577671c1e4de405fdfaa9d034 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 2 Aug 2019 22:52:31 -0400
Subject: [PATCH 075/204] Minor cleanup

---
 platforms/esp/32/clockless_rmt_esp32.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index ce496b7b7d..fcd66ddb10 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -131,8 +131,8 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Convert ESP32 cycles to RMT cycles
 #define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
 
+// -- NEW: Just convert directly from CPU cycles to RMT cycles
 #define RMT_CYCLES_PER_ESP_CYCLE (F_CPU / CYCLES_PER_SEC)
-// #define ESP_TO_RMT_CYCLES(n) TO_RMT_CYCLES(n)
 #define ESP_TO_RMT_CYCLES(n) ((n) / (RMT_CYCLES_PER_ESP_CYCLE))
 
 // -- Number of cycles to signal the strip to latch
@@ -550,7 +550,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             for (register uint32_t j = 0; j < 8; j++) {
                 uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
                 * mRMT_mem_ptr++ = val;
-                // RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
                 byteval <<= 1;
                 mCurPulse++;
             }
@@ -562,7 +562,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if ( ! mPixels->has(1) ) {
             while (pulses < 32) {
                 * mRMT_mem_ptr++ = 0;
-                // RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
                 mCurPulse++;
                 pulses++;
             }

From 6126c8217af5d4adc490c48939bbb81613477ad8 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 6 Aug 2019 22:15:56 -0400
Subject: [PATCH 076/204] Cleaned up the conversion of CPU cycles to RMT cycles

---
 platforms/esp/32/clockless_rmt_esp32.h | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index fcd66ddb10..6368bc9328 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -118,25 +118,16 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
 #define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
 
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- NEW: Just convert directly from CPU cycles to RMT cycles
-#define RMT_CYCLES_PER_ESP_CYCLE (F_CPU / CYCLES_PER_SEC)
-#define ESP_TO_RMT_CYCLES(n) ((n) / (RMT_CYCLES_PER_ESP_CYCLE))
+// -- Convert ESP32 CPU cycles to RMT device cycles, taking into account the divider
+#define F_CPU_RMT                   (  80000000L)
+#define RMT_CYCLES_PER_SEC          (F_CPU_RMT/DIVIDER)
+#define RMT_CYCLES_PER_ESP_CYCLE    (F_CPU / RMT_CYCLES_PER_SEC)
+#define ESP_TO_RMT_CYCLES(n)        ((n) / (RMT_CYCLES_PER_ESP_CYCLE))
 
 // -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+#define NS_PER_CYCLE                ( 1000000000L / RMT_CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n)             ( (n) / NS_PER_CYCLE )
+#define RMT_RESET_DURATION          NS_TO_CYCLES(50000)
 
 // -- Core or custom driver
 #ifndef FASTLED_RMT_BUILTIN_DRIVER

From e68b1856ffe1d2196c75f11eda72059340903744 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 11 Aug 2019 14:43:29 -0700
Subject: [PATCH 077/204] Fix for #861 - override size in the octoWS2811
 controller so that power calculations use all the leds.

---
 platforms/arm/k20/octows2811_controller.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/arm/k20/octows2811_controller.h b/platforms/arm/k20/octows2811_controller.h
index 63f6d8f62d..84c28667d1 100644
--- a/platforms/arm/k20/octows2811_controller.h
+++ b/platforms/arm/k20/octows2811_controller.h
@@ -28,7 +28,7 @@ class COctoWS2811Controller : public CPixelLEDController<RGB_ORDER, 8, 0xFF> {
   }
 public:
   COctoWS2811Controller() { pocto = NULL; }
-
+  virtual int size() { return CLEDController::size() * 8; }
 
   virtual void init() { /* do nothing yet */ }
 

From 09b17833ac60cb61ef63a8bca10a1fc7b75ee4b0 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 11 Aug 2019 15:47:31 -0700
Subject: [PATCH 078/204] Pre-teensy4 work - with a 600Mhz clock, a 1Mhz clock
 was giving us a clock divider that overflowed a uint8_t - whoops...

---
 chipsets.h                              | 16 ++++++-------
 fastspi.h                               | 30 ++++++++++++-------------
 fastspi_bitbang.h                       |  2 +-
 fastspi_nop.h                           |  2 +-
 fastspi_ref.h                           |  2 +-
 platforms/arm/k20/fastspi_arm_k20.h     |  2 +-
 platforms/arm/k66/fastspi_arm_k66.h     |  2 +-
 platforms/arm/kl26/fastspi_arm_kl26.h   |  2 +-
 platforms/arm/nrf51/fastspi_arm_nrf51.h |  2 +-
 platforms/arm/nrf52/fastspi_arm_nrf52.h | 10 ++++-----
 platforms/arm/sam/fastspi_arm_sam.h     |  2 +-
 platforms/avr/fastspi_avr.h             |  8 +++----
 12 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/chipsets.h b/chipsets.h
index d452abfe87..477842b38e 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -77,7 +77,7 @@ class PixieController : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(12)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB,  uint8_t SPI_SPEED = DATA_RATE_MHZ(12) >
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB,  uint32_t SPI_SPEED = DATA_RATE_MHZ(12) >
 class LPD8806Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 
@@ -118,7 +118,7 @@ class LPD8806Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(1)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(1)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(1)>
 class WS2801Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -140,7 +140,7 @@ class WS2801Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 };
 
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(25)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(25)>
 class WS2803Controller : public WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_SPEED> {};
 
 /// LPD6803 controller class (LPD1101).
@@ -151,7 +151,7 @@ class WS2803Controller : public WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER,
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(12)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(12)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(12)>
 class LPD6803Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -201,7 +201,7 @@ class LPD6803Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(12)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(12)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(12)>
 class APA102Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -266,7 +266,7 @@ class APA102Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(24)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(24)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(24)>
 class SK9822Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -340,7 +340,7 @@ class SK9822Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(10)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(10)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(10)>
 class P9813Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -390,7 +390,7 @@ class P9813Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(16)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(16)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(16)>
 class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
diff --git a/fastspi.h b/fastspi.h
index fc0843be7a..bf6709a9ff 100644
--- a/fastspi.h
+++ b/fastspi.h
@@ -26,22 +26,22 @@ FASTLED_NAMESPACE_BEGIN
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 #if !defined(FASTLED_ALL_PINS_HARDWARE_SPI)
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SoftwareSPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 
 #ifndef FASTLED_FORCE_SOFTWARE_SPI
 
 #if defined(NRF51) && defined(FASTLED_ALL_PINS_HARDWARE_SPI)
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public NRF51SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
 #if defined(NRF52_SERIES) && defined(FASTLED_ALL_PINS_HARDWARE_SPI)
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
@@ -49,26 +49,26 @@ class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDE
 
 #if defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
 
 #if defined(SPI2_DATA)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED, 0x4002C000> {};
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI2_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI2_CLOCK, SPI_SPEED, 0x4002C000> {};
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI2_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI2_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
 #endif
 
 #elif defined(FASTLED_TEENSYLC) && defined(ARM_HARDWARE_SPI)
 
-#define DECLARE_SPI0(__DATA,__CLOCK) template<uint8_t SPI_SPEED>\
+#define DECLARE_SPI0(__DATA,__CLOCK) template<uint32_t SPI_SPEED>\
  class SPIOutput<__DATA, __CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<__DATA, __CLOCK, SPI_SPEED, 0x40076000> {};
- #define DECLARE_SPI1(__DATA,__CLOCK) template<uint8_t SPI_SPEED>\
+ #define DECLARE_SPI1(__DATA,__CLOCK) template<uint32_t SPI_SPEED>\
   class SPIOutput<__DATA, __CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<__DATA, __CLOCK, SPI_SPEED, 0x40077000> {};
 
 DECLARE_SPI0(7,13);
@@ -85,24 +85,24 @@ DECLARE_SPI1(21,20);
 
 #elif defined(__SAM3X8E__)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public SAMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
 
 #elif defined(AVR_HARDWARE_SPI)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public AVRHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
 
 #if defined(SPI_UART0_DATA)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_UART0_DATA, SPI_UART0_CLOCK, SPI_SPEED> : public AVRUSART0SPIOutput<SPI_UART0_DATA, SPI_UART0_CLOCK, SPI_SPEED> {};
 
 #endif
 
 #if defined(SPI_UART1_DATA)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_UART1_DATA, SPI_UART1_CLOCK, SPI_SPEED> : public AVRUSART1SPIOutput<SPI_UART1_DATA, SPI_UART1_CLOCK, SPI_SPEED> {};
 
 #endif
@@ -120,7 +120,7 @@ class SPIOutput<SPI_UART1_DATA, SPI_UART1_CLOCK, SPI_SPEED> : public AVRUSART1SP
 #endif
 
 // #if defined(USART_DATA) && defined(USART_CLOCK)
-// template<uint8_t SPI_SPEED>
+// template<uint32_t SPI_SPEED>
 // class AVRSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> : public AVRUSARTSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> {};
 // #endif
 
diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h
index d48e32bcb3..292c8ecd2b 100644
--- a/fastspi_bitbang.h
+++ b/fastspi_bitbang.h
@@ -15,7 +15,7 @@ FASTLED_NAMESPACE_BEGIN
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SPI_SPEED>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint32_t SPI_SPEED>
 class AVRSoftwareSPIOutput {
 	// The data types for pointers to the pin port - typedef'd here from the Pin definition because on avr these
 	// are pointers to 8 bit values, while on arm they are 32 bit
diff --git a/fastspi_nop.h b/fastspi_nop.h
index 5c5da010a8..1dcd2961e0 100644
--- a/fastspi_nop.h
+++ b/fastspi_nop.h
@@ -10,7 +10,7 @@ FASTLED_NAMESPACE_BEGIN
 /// A nop/stub class, mostly to show the SPI methods that are needed/used by the various SPI chipset implementations.  Should
 /// be used as a definition for the set of methods that the spi implementation classes should use (since C++ doesn't support the
 /// idea of interfaces - it's possible this could be done with virtual classes, need to decide if i want that overhead)
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class NOPSPIOutput {
 	Selectable *m_pSelect;
 
diff --git a/fastspi_ref.h b/fastspi_ref.h
index f68e63ef46..00c41d345d 100644
--- a/fastspi_ref.h
+++ b/fastspi_ref.h
@@ -8,7 +8,7 @@ FASTLED_NAMESPACE_BEGIN
 
 // A skeletal implementation of hardware SPI support.  Fill in the necessary code for init, waiting, and writing.  The rest of
 // the method implementations should provide a starting point, even if not hte most efficient to start with
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class REFHardwareSPIOutput {
 	Selectable *m_pSelect;
 public:
diff --git a/platforms/arm/k20/fastspi_arm_k20.h b/platforms/arm/k20/fastspi_arm_k20.h
index 70210a396a..0512324368 100644
--- a/platforms/arm/k20/fastspi_arm_k20.h
+++ b/platforms/arm/k20/fastspi_arm_k20.h
@@ -94,7 +94,7 @@ template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint
 
 #define SPIX (*(SPI_t*)pSPIX)
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
 class ARMHardwareSPIOutput {
 	Selectable *m_pSelect;
 	SPIState gState;
diff --git a/platforms/arm/k66/fastspi_arm_k66.h b/platforms/arm/k66/fastspi_arm_k66.h
index 7e598cff4b..a40e598522 100644
--- a/platforms/arm/k66/fastspi_arm_k66.h
+++ b/platforms/arm/k66/fastspi_arm_k66.h
@@ -102,7 +102,7 @@ template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint
 
 #define SPIX (*(SPI_t*)pSPIX)
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
 class ARMHardwareSPIOutput {
 	Selectable *m_pSelect;
 	SPIState gState;
diff --git a/platforms/arm/kl26/fastspi_arm_kl26.h b/platforms/arm/kl26/fastspi_arm_kl26.h
index 869b60546f..b1e766774d 100644
--- a/platforms/arm/kl26/fastspi_arm_kl26.h
+++ b/platforms/arm/kl26/fastspi_arm_kl26.h
@@ -82,7 +82,7 @@ template <int VAL> void getScalars(uint8_t & sppr, uint8_t & spr) {
 #define SPIX (*(KINETISL_SPI_t*)pSPIX)
 #define ARM_HARDWARE_SPI
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
 class ARMHardwareSPIOutput {
   Selectable *m_pSelect;
 
diff --git a/platforms/arm/nrf51/fastspi_arm_nrf51.h b/platforms/arm/nrf51/fastspi_arm_nrf51.h
index 539fd65646..6299e89d96 100644
--- a/platforms/arm/nrf51/fastspi_arm_nrf51.h
+++ b/platforms/arm/nrf51/fastspi_arm_nrf51.h
@@ -9,7 +9,7 @@
 // A nop/stub class, mostly to show the SPI methods that are needed/used by the various SPI chipset implementations.  Should
 // be used as a definition for the set of methods that the spi implementation classes should use (since C++ doesn't support the
 // idea of interfaces - it's possible this could be done with virtual classes, need to decide if i want that overhead)
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class NRF51SPIOutput {
 
   struct saveData {
diff --git a/platforms/arm/nrf52/fastspi_arm_nrf52.h b/platforms/arm/nrf52/fastspi_arm_nrf52.h
index 8492282bae..9c1a219826 100644
--- a/platforms/arm/nrf52/fastspi_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastspi_arm_nrf52.h
@@ -21,7 +21,7 @@
      */
 
     /// SPI_CLOCK_DIVIDER is number of CPU clock cycles per SPI transmission bit?
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     class NRF52SPIOutput {
 
     private:
@@ -325,13 +325,13 @@
 
     // Static member definition and initialization using templates.
     // see https://stackoverflow.com/questions/3229883/static-member-initialization-in-a-class-template#answer-3229919
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     bool NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_InUse = false;
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     bool NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_NeedToWait = false;
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     uint8_t NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_BufferIndex = 0;
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     uint8_t NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_Buffer[2][2] = {{0,0},{0,0}};
 
 #endif // #ifndef FASTLED_FORCE_SOFTWARE_SPI
diff --git a/platforms/arm/sam/fastspi_arm_sam.h b/platforms/arm/sam/fastspi_arm_sam.h
index eb9abe4cb7..a9446439b8 100644
--- a/platforms/arm/sam/fastspi_arm_sam.h
+++ b/platforms/arm/sam/fastspi_arm_sam.h
@@ -6,7 +6,7 @@ FASTLED_NAMESPACE_BEGIN
 #if defined(__SAM3X8E__)
 #define m_SPI ((Spi*)SPI0)
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SAMHardwareSPIOutput {
 	Selectable *m_pSelect;
 
diff --git a/platforms/avr/fastspi_avr.h b/platforms/avr/fastspi_avr.h
index fc14d59638..d2edc9660c 100644
--- a/platforms/avr/fastspi_avr.h
+++ b/platforms/avr/fastspi_avr.h
@@ -20,7 +20,7 @@ FASTLED_NAMESPACE_BEGIN
 #define UCPHA1 1
 #endif
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRUSART1SPIOutput {
 	Selectable *m_pSelect;
 
@@ -167,7 +167,7 @@ class AVRUSART1SPIOutput {
 #endif
 
 #if defined(UBRR0)
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRUSART0SPIOutput {
 	Selectable *m_pSelect;
 
@@ -329,7 +329,7 @@ class AVRUSART0SPIOutput {
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRHardwareSPIOutput {
 	Selectable *m_pSelect;
 	bool mWait;
@@ -506,7 +506,7 @@ class AVRHardwareSPIOutput {
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRHardwareSPIOutput {
 	Selectable *m_pSelect;
 	bool mWait;

From 031d022d4ce8285f83c4e836defb105a8bf2f131 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 11 Aug 2019 20:49:58 -0700
Subject: [PATCH 079/204] Some tweaks to chipset definitions to help out the
 Teensy 4 implementation

---
 chipsets.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/chipsets.h b/chipsets.h
index 477842b38e..8e9051d5cf 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -398,10 +398,15 @@ class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 	void writeHeader() {
 		// Write out 50 zeros to the spi line (6 blocks of 8 followed by two single bit writes)
 		mSPI.select();
-		mSPI.writeBytesValueRaw(0, 6);
-		mSPI.waitFully();
 		mSPI.template writeBit<0>(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
 		mSPI.template writeBit<0>(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
+		mSPI.waitFully();
 		mSPI.release();
 	}
 
@@ -524,7 +529,13 @@ class PL9823Controller : public ClocklessController<DATA_PIN, 3 * FMUL, 8 * FMUL
 
 // Similar to NS() macro, this calculates the number of cycles for
 // the clockless chipset (which may differ from CPU cycles)
+
+#ifdef FASTLED_TEENSY4
+// just use raw nanosecond values for the teensy4
+#define C_NS(_NS) _NS
+#else
 #define C_NS(_NS) (((_NS * ((CLOCKLESS_FREQUENCY / 1000000L)) + 999)) / 1000)
+#endif
 
 // GE8822 - 350ns 660ns 350ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>

From 0b337fb865a26dfa350b5220b0c54a3819edd82a Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 11 Aug 2019 20:50:38 -0700
Subject: [PATCH 080/204] Updating the pintest program w/Teensy 4 defs

---
 examples/Pintest/Pintest.ino | 49 +++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/examples/Pintest/Pintest.ino b/examples/Pintest/Pintest.ino
index a63f0d46ff..f0a0dadc43 100644
--- a/examples/Pintest/Pintest.ino
+++ b/examples/Pintest/Pintest.ino
@@ -1,7 +1,10 @@
 
-#include <FastSPI_LED.h>
+#include <FastLED.h>
+
+char fullstrBuffer[64];
 
 const char *getPort(void *portPtr) {
+// AVR port checks
 #ifdef PORTA
 	if(portPtr == (void*)&PORTA) { return "PORTA"; }
 #endif
@@ -38,6 +41,8 @@ const char *getPort(void *portPtr) {
 #ifdef PORTL
 	if(portPtr == (void*)&PORTL) { return "PORTL"; }
 #endif
+
+// Teensy 3.x port checks
 #ifdef GPIO_A_PDOR
 	if(portPtr == (void*)&GPIO_A_PDOR) { return "GPIO_A_PDOR"; }
 #endif
@@ -65,7 +70,24 @@ const char *getPort(void *portPtr) {
 #ifdef REG_PIO_D_ODSR
 	if(portPtr == (void*)&REG_PIO_D_ODSR) { return "REG_PIO_D_ODSR"; }
 #endif
-	return "unknown";
+
+// Teensy 4 port checks
+#ifdef GPIO1_DR
+	if(portPtr == (void*)&GPIO1_DR) { return "GPIO1_DR"; }
+#endif
+#ifdef GPIO2_DR
+if(portPtr == (void*)&GPIO2_DR) { return "GPIO21_DR"; }
+#endif
+#ifdef GPIO3_DR
+if(portPtr == (void*)&GPIO3_DR) { return "GPIO3_DR"; }
+#endif
+#ifdef GPIO4_DR
+if(portPtr == (void*)&GPIO4_DR) { return "GPIO4_DR"; }
+#endif
+  String unknown_str = "Unknown: " + String((size_t)portPtr, HEX);
+	strncpy(fullstrBuffer, unknown_str.c_str(), unknown_str.length());
+	fullstrBuffer[sizeof(fullstrBuffer)-1] = '\0';
+	return fullstrBuffer;
 }
 
 template<uint8_t PIN> void CheckPin()
@@ -74,32 +96,35 @@ template<uint8_t PIN> void CheckPin()
 
 	RwReg *systemThinksPortIs = portOutputRegister(digitalPinToPort(PIN));
 	RwReg systemThinksMaskIs = digitalPinToBitMask(PIN);
-	
+
 	Serial.print("Pin "); Serial.print(PIN); Serial.print(": Port ");
-	
-	if(systemThinksPortIs == FastPin<PIN>::port()) { 
+
+	if(systemThinksPortIs == FastPin<PIN>::port()) {
 		Serial.print("valid & mask ");
-	} else { 
-		Serial.print("invalid, is "); Serial.print(getPort((void*)FastPin<PIN>::port())); Serial.print(" should be "); 
+	} else {
+		Serial.print("invalid, is "); Serial.print(getPort((void*)FastPin<PIN>::port())); Serial.print(" should be ");
 		Serial.print(getPort((void*)systemThinksPortIs));
 		Serial.print(" & mask ");
 	}
 
 	if(systemThinksMaskIs == FastPin<PIN>::mask()) {
 		Serial.println("valid.");
-	} else { 
+	} else {
 		Serial.print("invalid, is "); Serial.print(FastPin<PIN>::mask()); Serial.print(" should be "); Serial.println(systemThinksMaskIs);
 	}
-}	
+}
 
 template<> void CheckPin<-1> () {}
 
-void setup() { 
+void setup() {
+	delay(5000);
     Serial.begin(38400);
     Serial.println("resetting!");
 }
 
-void loop() { 
+void loop() {
 	CheckPin<MAX_PIN>();
-	delay(10000);
+	delay(100000);
+
+	Serial.print("GPIO_1_DR is: "); Serial.print(getPort((void*)&(GPIO1_DR)));
 }

From 3f2f9121d591751ecb20b8725dbdcf79d2bd833b Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 11 Aug 2019 20:53:44 -0700
Subject: [PATCH 081/204] Preliminary Teensy 4 support, including hardware SPI
 and clockless chipsets - no support for parallel output or DMA'd output yet -
 also not fully tested for all chipsets on all pins, but smoke tested with
 some chipsets and pin combinations and logic analyzer in the meantime

---
 fastspi.h                                     |  15 ++
 fastspi_bitbang.h                             |  16 +-
 led_sysdefs.h                                 |   3 +
 platforms.h                                   |   3 +
 .../arm/mxrt1062/clockless_arm_mxrt1062.h     | 115 ++++++++++++++
 platforms/arm/mxrt1062/fastled_arm_mxrt1062.h |   7 +
 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h |  90 +++++++++++
 platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h | 140 ++++++++++++++++++
 .../arm/mxrt1062/led_sysdefs_arm_mxrt1062.h   |  43 ++++++
 release_notes.md                              |   4 +
 10 files changed, 431 insertions(+), 5 deletions(-)
 create mode 100644 platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h

diff --git a/fastspi.h b/fastspi.h
index bf6709a9ff..38e8eabf07 100644
--- a/fastspi.h
+++ b/fastspi.h
@@ -13,6 +13,10 @@ FASTLED_NAMESPACE_BEGIN
 #if defined(FASTLED_TEENSY3) && (F_CPU > 48000000)
 #define DATA_RATE_MHZ(X) (((48000000L / 1000000L) / X))
 #define DATA_RATE_KHZ(X) (((48000000L / 1000L) / X))
+#elif defined(FASTLED_TEENSY4) // && (ARM_HARDWARE_SPI)
+// just use clocks
+#define DATA_RATE_MHZ(X) (1000000 * (X))
+#define DATA_RATE_KHZ(X) (1000 * (X))
 #else
 #define DATA_RATE_MHZ(X) ((F_CPU / 1000000L) / X)
 #define DATA_RATE_KHZ(X) ((F_CPU / 1000L) / X)
@@ -64,6 +68,17 @@ template<uint32_t SPI_SPEED>
 class SPIOutput<SPI2_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI2_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
 #endif
 
+#elif defined(FASTLED_TEENSY4) && defined(ARM_HARDWARE_SPI)
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public Teesy4HardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED, SPI, 0> {};
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI1_DATA, SPI_CLOCK, SPI_SPEED> : public Teesy4HardwareSPIOutput<SPI1_DATA, SPI1_CLOCK, SPI_SPEED, SPI1, 1> {};
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED> : public Teesy4HardwareSPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED, SPI2, 2> {};
+
 #elif defined(FASTLED_TEENSYLC) && defined(ARM_HARDWARE_SPI)
 
 #define DECLARE_SPI0(__DATA,__CLOCK) template<uint32_t SPI_SPEED>\
diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h
index 292c8ecd2b..70795e8b3c 100644
--- a/fastspi_bitbang.h
+++ b/fastspi_bitbang.h
@@ -113,10 +113,16 @@ class AVRSoftwareSPIOutput {
 public:
 
 	// We want to make sure that the clock pulse is held high for a nininum of 35ns.
+#if defined(FASTLED_TEENSY4)
+	#define DELAY_NS (1000 / (SPI_SPEED/1000000))
+	#define CLOCK_HI_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
+	#define CLOCK_LO_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
+#else
 	#define MIN_DELAY (NS(35) - 3)
 
-  #define CLOCK_HI_DELAY delaycycles<MIN_DELAY>(); delaycycles<(((SPI_SPEED-6) / 2) - MIN_DELAY)>();
-	#define CLOCK_LO_DELAY delaycycles<(((SPI_SPEED-6) / 4))>();
+	#define CLOCK_HI_DELAY do { delaycycles<MIN_DELAY>(); delaycycles<(((SPI_SPEED-6) / 2) - MIN_DELAY)>(); } while(0);
+	#define CLOCK_LO_DELAY do { delaycycles<(((SPI_SPEED-6) / 4))>(); } while(0);
+#endif
 
 	// write the BIT'th bit out via spi, setting the data pin then strobing the clcok
 	template <uint8_t BIT> __attribute__((always_inline, hot)) inline static void writeBit(uint8_t b) {
@@ -126,8 +132,8 @@ class AVRSoftwareSPIOutput {
 #ifdef ESP32
 			// try to ensure we never have adjacent write opcodes to the same register
 			FastPin<CLOCK_PIN>::lo();
-			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY; 
-			FastPin<CLOCK_PIN>::toggle(); CLOCK_LO_DELAY; 
+			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
+			FastPin<CLOCK_PIN>::toggle(); CLOCK_LO_DELAY;
 #else
 			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
 			FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
@@ -137,7 +143,7 @@ class AVRSoftwareSPIOutput {
 			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
 #ifdef ESP32
 			// try to ensure we never have adjacent write opcodes to the same register
-			FastPin<CLOCK_PIN>::toggle(); CLOCK_HI_DELAY; 
+			FastPin<CLOCK_PIN>::toggle(); CLOCK_HI_DELAY;
 #else
 			FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
 #endif
diff --git a/led_sysdefs.h b/led_sysdefs.h
index 7abcd15e49..27da24a043 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -18,6 +18,9 @@
 #elif defined(__MKL26Z64__)
 // Include kl26/T-LC headers
 #include "platforms/arm/kl26/led_sysdefs_arm_kl26.h"
+#elif defined(__IMXRT1062__)
+// teensy4
+#include "platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h"
 #elif defined(__SAM3X8E__)
 // Include sam/due headers
 #include "platforms/arm/sam/led_sysdefs_arm_sam.h"
diff --git a/platforms.h b/platforms.h
index 82d7d99385..f66599fd3f 100644
--- a/platforms.h
+++ b/platforms.h
@@ -18,6 +18,9 @@
 #elif defined(__MKL26Z64__)
 // Include kl26/T-LC headers
 #include "platforms/arm/kl26/fastled_arm_kl26.h"
+#elif defined(__IMXRT1062__)
+// teensy4
+#include "platforms/arm/mxrt1062/fastled_arm_mxrt1062.h"
 #elif defined(__SAM3X8E__)
 // Include sam/due headers
 #include "platforms/arm/sam/fastled_arm_sam.h"
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
new file mode 100644
index 0000000000..ce0d972e3d
--- /dev/null
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -0,0 +1,115 @@
+#ifndef __INC_CLOCKLESS_ARM_MXRT1062_H
+#define __INC_CLOCKLESS_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the teensy4
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY4)
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+#define _FASTLED_NS_TO_DWT(_NS) (((F_CPU_ACTUAL>>16)*(_NS)) / (1000000000UL>>16))
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+public:
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+		mPinMask = FastPin<DATA_PIN>::mask();
+		mPort = FastPin<DATA_PIN>::port();
+    FastPin<DATA_PIN>::lo();
+	}
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+		if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    mWait.mark();
+  }
+
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint32_t off1, register uint32_t off2, register uint32_t off3, register uint32_t & b)  {
+		for(register uint32_t i = BITS-1; i > 0; i--) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + off1;
+			FastPin<DATA_PIN>::hi();
+			if(b&0x80) {
+				while((next_mark - ARM_DWT_CYCCNT) > off2);
+				FastPin<DATA_PIN>::lo();
+			} else {
+				while((next_mark - ARM_DWT_CYCCNT) > off3);
+				FastPin<DATA_PIN>::lo();
+			}
+			b <<= 1;
+		}
+
+		while(ARM_DWT_CYCCNT < next_mark);
+		next_mark = ARM_DWT_CYCCNT + off1;
+		FastPin<DATA_PIN>::hi();
+
+		if(b&0x80) {
+			while((next_mark - ARM_DWT_CYCCNT) > off2);
+			FastPin<DATA_PIN>::lo();
+		} else {
+			while((next_mark - ARM_DWT_CYCCNT) > off3);
+			FastPin<DATA_PIN>::lo();
+		}
+	}
+
+	uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint32_t b = pixels.loadAndScale0();
+
+		cli();
+    uint32_t off1 = _FASTLED_NS_TO_DWT(T1+T2+T3);
+    uint32_t off2 = _FASTLED_NS_TO_DWT(T3);
+    uint32_t off3 = _FASTLED_NS_TO_DWT(T2+T3);
+    uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
+
+    uint32_t next_mark = ARM_DWT_CYCCNT + off1;
+
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return 0; }
+			}
+			#endif
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(next_mark, off1, off2, off3, b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(next_mark, off1, off2, off3, b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(next_mark, off1, off2, off3, b);
+			b = pixels.advanceAndLoadAndScale0();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		sei();
+		return ARM_DWT_CYCCNT;
+	}
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
new file mode 100644
index 0000000000..313ab0d386
--- /dev/null
+++ b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
@@ -0,0 +1,7 @@
+#ifndef __INC_FASTLED_ARM_MXRT1062_H
+#define __INC_FASTLED_ARM_MXRT1062_H
+
+#include "fastpin_arm_mxrt1062.h"
+#include "fastspi_arm_mxrt1062.h"
+#include "clockless_arm_mxrt1062.h"
+#endif
diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
new file mode 100644
index 0000000000..bfb1cb47c8
--- /dev/null
+++ b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
@@ -0,0 +1,90 @@
+#ifndef __FASTPIN_ARM_MXRT1062_H
+#define __FASTPIN_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+/// Template definition for teensy 4.0 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  It calls through to pinMode for setting input/output on pins
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+template<uint8_t PIN, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET, typename _GPIO_DR_CLEAR, typename _GPIO_DR_TOGGLE> class _ARMPIN {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { _GPIO_DR_SET::r() = _MASK; }
+	inline static void lo() __attribute__ ((always_inline)) { _GPIO_DR_CLEAR::r() = _MASK; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { _GPIO_DR::r() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { _GPIO_DR_TOGGLE::r() = _MASK; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return _GPIO_DR::r() | _MASK; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return _GPIO_DR::r() & ~_MASK; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_GPIO_DR::r(); }
+	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO_DR_SET::r(); }
+	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO_DR_CLEAR::r(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+};
+
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } };
+#define _IO32(L) _RD32(GPIO ## L ## _DR); _RD32(GPIO ## L ## _DR_SET); _RD32(GPIO ## L ## _DR_CLEAR); _RD32(GPIO ## L ## _DR_TOGGLE);
+
+// From the teensy core - it looks like there's the "default set" of port registers at GPIO1-5 - but then there
+// are a mirrored set for GPIO1-4 at GPIO6-9, which in the teensy core is referred to as "fast" - while the pin definitiosn
+// at https://forum.pjrc.com/threads/54711-Teensy-4-0-First-Beta-Test?p=193716&viewfull=1#post193716
+// refer to GPIO1-4, we're going to use GPIO6-9 in the definitions below because the fast registers are what
+// the teensy core is using internally
+#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {};
+
+#if defined(FASTLED_TEENSY4) && defined(CORE_TEENSY)
+_IO32(1); _IO32(2); _IO32(3); _IO32(4); _IO32(5);
+_IO32(6); _IO32(7); _IO32(8); _IO32(9);
+
+#define MAX_PIN 39
+_DEFPIN_T4( 0,6, 3); _DEFPIN_T4( 1,6, 2); _DEFPIN_T4( 2,9, 4); _DEFPIN_T4( 3,9, 5);
+_DEFPIN_T4( 4,9, 6); _DEFPIN_T4( 5,9, 8); _DEFPIN_T4( 6,7,10); _DEFPIN_T4( 7,7,17);
+_DEFPIN_T4( 8,7,16); _DEFPIN_T4( 9,7,11); _DEFPIN_T4(10,7, 0); _DEFPIN_T4(11,7, 2);
+_DEFPIN_T4(12,7, 1); _DEFPIN_T4(13,7, 3); _DEFPIN_T4(14,6,18); _DEFPIN_T4(15,6,19);
+_DEFPIN_T4(16,6,23); _DEFPIN_T4(17,6,22); _DEFPIN_T4(18,6,17); _DEFPIN_T4(19,6,16);
+_DEFPIN_T4(20,6,26); _DEFPIN_T4(21,6,27); _DEFPIN_T4(22,6,24); _DEFPIN_T4(23,6,25);
+_DEFPIN_T4(24,6,12); _DEFPIN_T4(25,6,13); _DEFPIN_T4(26,6,30); _DEFPIN_T4(27,6,31);
+_DEFPIN_T4(28,8,18); _DEFPIN_T4(29,9,31); _DEFPIN_T4(30,8,23); _DEFPIN_T4(31,8,22);
+_DEFPIN_T4(32,7,12); _DEFPIN_T4(33,9, 7); _DEFPIN_T4(34,8,15); _DEFPIN_T4(35,8,14);
+_DEFPIN_T4(36,8,13); _DEFPIN_T4(37,8,12); _DEFPIN_T4(38,8,17); _DEFPIN_T4(39,8,16);
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#define ARM_HARDWARE_SPI
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+
+#define SPI1_DATA 26
+#define SPI1_CLOCK 27
+
+#define SPI2_DATA 35
+#define SPI2_CLOCK 37
+
+#endif // defined FASTLED_TEENSY4
+
+#endif // FASTLED_FORCE_SOFTWARE_PINSs
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
new file mode 100644
index 0000000000..fa6b81ff4a
--- /dev/null
+++ b/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
@@ -0,0 +1,140 @@
+#ifndef __INC_FASTSPI_ARM_MXRT1062_H
+#define __INC_FASTSPI_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined (FASTLED_TEENSY4) && defined(ARM_HARDWARE_SPI)
+#include <SPI.h>
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_RATE, SPIClass & _SPIObject, int _SPI_INDEX>
+class Teesy4HardwareSPIOutput {
+	Selectable *m_pSelect;
+  uint32_t  m_bitCount;
+  uint32_t m_bitData;
+  inline IMXRT_LPSPI_t & port() __attribute__((always_inline)) {
+    switch(_SPI_INDEX) {
+      case 0:
+        return IMXRT_LPSPI4_S;
+      case 1:
+        return IMXRT_LPSPI3_S;
+      case 2:
+        return IMXRT_LPSPI1_S;
+    }
+  }
+
+public:
+	Teesy4HardwareSPIOutput() { m_pSelect = NULL; m_bitCount = 0;}
+	Teesy4HardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; m_bitCount = 0;}
+
+	// set the object representing the selectable -- ignore for now
+	void setSelect(Selectable *pSelect) { /* TODO */ }
+
+	// initialize the SPI subssytem
+	void init() { _SPIObject.begin(); }
+
+	// latch the CS select
+	void inline select() __attribute__((always_inline)) {
+    // begin the SPI transaction
+    _SPIObject.beginTransaction(SPISettings(_SPI_CLOCK_RATE, MSBFIRST, SPI_MODE0));
+    if(m_pSelect != NULL) { m_pSelect->select(); }
+  }
+
+	// release the CS select
+	void inline release() __attribute__((always_inline)) {
+    if(m_pSelect != NULL) { m_pSelect->release(); }
+    _SPIObject.endTransaction();
+  }
+
+	// wait until all queued up data has been written
+	static void waitFully() { /* TODO */ }
+
+	// write a byte out via SPI (returns immediately on writing register) -
+	void inline writeByte(uint8_t b) __attribute__((always_inline)) {
+    if(m_bitCount == 0) {
+      _SPIObject.transfer(b);
+    } else {
+      // There's been a bit of data written, add that to the output as well
+      uint32_t outData = (m_bitData << 8) | b;
+      uint32_t tcr = port().TCR;
+      port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8+m_bitCount) - 1);  // turn on 9 bit mode
+      port().TDR = outData;		// output 9 bit data.
+      while ((port().RSR & LPSPI_RSR_RXEMPTY)) ;	// wait while the RSR fifo is empty...
+			port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8) - 1);  // turn back on 8 bit mode
+      port().RDR;
+      m_bitCount = 0;
+    }
+  }
+
+	// write a word out via SPI (returns immediately on writing register)
+	void inline writeWord(uint16_t w) __attribute__((always_inline)) {
+    writeByte(((w>>8) & 0xFF));
+    _SPIObject.transfer(w & 0xFF);
+  }
+
+	// A raw set of writing byte values, assumes setup/init/waiting done elsewhere
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { _SPIObject.transfer(value); }
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytesValue(uint8_t value, int len) {
+		select(); writeBytesValueRaw(value, len); release();
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		uint8_t *end = data + len;
+		select();
+		// could be optimized to write 16bit words out instead of 8bit bytes
+		while(data != end) {
+			writeByte(D::adjust(*data++));
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a single bit out, which bit from the passed in byte is determined by template parameter
+	template <uint8_t BIT> inline void writeBit(uint8_t b) {
+    m_bitData = (m_bitData<<1) | ((b&(1<<BIT)) != 0);
+    // If this is the 8th bit we've collected, just write it out raw
+    register uint32_t bc = m_bitCount;
+    bc = (bc + 1) & 0x07;
+    if (!bc) {
+      m_bitCount = 0;
+      _SPIObject.transfer(m_bitData);
+    }
+    m_bitCount = bc;
+  }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		select();
+    int len = pixels.mLen;
+
+		while(pixels.has(1)) {
+			if(FLAGS & FLAG_START_BIT) {
+				writeBit<0>(1);
+			}
+			writeByte(D::adjust(pixels.loadAndScale0()));
+			writeByte(D::adjust(pixels.loadAndScale1()));
+			writeByte(D::adjust(pixels.loadAndScale2()));
+
+			pixels.advanceData();
+			pixels.stepDithering();
+		}
+		D::postBlock(len);
+		release();
+	}
+
+};
+
+
+#endif
+
+FASTLED_NAMESPACE_END
+#endif
diff --git a/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h b/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
new file mode 100644
index 0000000000..ac4908254c
--- /dev/null
+++ b/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
@@ -0,0 +1,43 @@
+#ifndef __INC_LED_SYSDEFS_ARM_MXRT1062_H
+#define __INC_LED_SYSDEFS_ARM_MXRT1062_H
+
+#define FASTLED_TEENSY4
+#define FASTLED_ARM
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#if (F_CPU == 96000000)
+#define CLK_DBL 1
+#endif
+
+// Get some system include files
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+#if defined(ARDUINO) // && ARDUINO < 150
+typedef volatile       uint32_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint32_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+#endif
+
+// extern volatile uint32_t systick_millis_count;
+// #  define MS_COUNTER systick_millis_count
+
+// Teensy4 provides progmem
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+
+#endif
diff --git a/release_notes.md b/release_notes.md
index 81d16f3318..925f054a29 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,7 @@
+FastLED 3.2.11
+==============
+* Preliminary Teensy 4 support
+
 FastLED 3.2.10
 ==============
 * Adafruit Metro M4 Airlift support

From d55fea29265f5915e194ff65e6ece1dde5ebbff9 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Mon, 12 Aug 2019 00:05:32 -0700
Subject: [PATCH 082/204] Checking in initial block clockless output - it
 compiles, but no testing yet, so it shouldn't be hooked up anywhere yet.

---
 .../mxrt1062/block_clockless_arm_mxrt1062.h   | 214 ++++++++++++++++++
 .../arm/mxrt1062/clockless_arm_mxrt1062.h     |  45 ++--
 platforms/arm/mxrt1062/fastled_arm_mxrt1062.h |   2 +
 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h |   5 +-
 4 files changed, 248 insertions(+), 18 deletions(-)
 create mode 100644 platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h

diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
new file mode 100644
index 0000000000..de09087c2e
--- /dev/null
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -0,0 +1,214 @@
+#ifndef __INC_BLOCK_CLOCKLESS_ARM_MXRT1062_H
+#define __INC_BLOCK_CLOCKLESS_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the teensy4
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY4)
+
+#define __FL_T4_MASK ((1<<(LANES))-1)
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, __FL_T4_MASK> {
+
+  uint8_t m_bitOffsets[16];
+  uint8_t m_nActualLanes;
+  uint8_t m_nLowBit;
+  uint8_t m_nHighBit;
+  uint32_t m_nWriteMask;
+  uint8_t m_nOutBlocks;
+  uint32_t m_offsets[3];
+  CMinWait<WAIT_TIME> mWait;
+public:
+
+  virtual int size() { return CLEDController::size() * m_nActualLanes; }
+
+// For each pin, if we've hit our lane count, break, otherwise set the pin to output,
+// store the bit offset in our offset array, add this pin to the write mask, and if this
+// pin ends a block sequence, then break out of the switch as well
+#define _BLOCK_PIN(P) case P: {                           \
+  if(m_nActualLanes == LANES) break;                      \
+  FastPin<P>::setOutput();                                \
+  m_bitOffsets[m_nActualLanes++] = FastPin<P>::pinbit();  \
+  m_nWriteMask |= FastPin<P>::mask();                     \
+  if( P == 27 || P == 7 || P == 30) break;                \
+}
+
+  virtual void init() {
+    // pre-initialize
+    memset(m_bitOffsets,0,16);
+    m_nActualLanes = 0;
+    m_nLowBit = 33;
+    m_nHighBit = 0;
+    m_nWriteMask = 0;
+
+    // setup the bits and data tracking for parallel output
+    switch(FIRST_PIN) {
+      // GPIO6 block output
+      _BLOCK_PIN( 1);
+			_BLOCK_PIN( 0);
+			_BLOCK_PIN(24);
+			_BLOCK_PIN(25);
+			_BLOCK_PIN(19);
+			_BLOCK_PIN(18);
+			_BLOCK_PIN(14);
+			_BLOCK_PIN(15);
+			_BLOCK_PIN(17);
+			_BLOCK_PIN(16);
+			_BLOCK_PIN(22);
+			_BLOCK_PIN(23);
+			_BLOCK_PIN(20);
+			_BLOCK_PIN(21);
+			_BLOCK_PIN(26);
+			_BLOCK_PIN(27);
+      // GPIO7 block output
+			_BLOCK_PIN(10);
+			_BLOCK_PIN(12);
+			_BLOCK_PIN(11);
+			_BLOCK_PIN(13);
+			_BLOCK_PIN( 6);
+			_BLOCK_PIN( 9);
+			_BLOCK_PIN(32);
+			_BLOCK_PIN( 8);
+			_BLOCK_PIN( 7);
+      // GPIO 37 block output
+			_BLOCK_PIN(37);
+			_BLOCK_PIN(36);
+			_BLOCK_PIN(35);
+			_BLOCK_PIN(34);
+			_BLOCK_PIN(39);
+			_BLOCK_PIN(38);
+			_BLOCK_PIN(28);
+			_BLOCK_PIN(31);
+			_BLOCK_PIN(30);
+    }
+
+    for(int i = 0; i < m_nActualLanes; i++) {
+      if(m_bitOffsets[i] < m_nLowBit) { m_nLowBit = m_bitOffsets[i]; }
+      if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; }
+    }
+
+    m_nOutBlocks = (m_nHighBit - m_nLowBit + 8)/8;
+
+    for(int i = 0; i < m_nActualLanes; i++) {
+      m_bitOffsets[i] -= m_nLowBit;
+    }
+  }
+
+
+  virtual void showPixels(PixelController<RGB_ORDER, LANES, __FL_T4_MASK> & pixels) {
+		mWait.wait();
+    #if FASTLED_ALLOW_INTERRUPTS == 0
+		uint32_t clocks = showRGBInternal(pixels);
+		// Adjust the timer
+		long microsTaken = CLKS_TO_MICROS(clocks);
+		MS_COUNTER += (1 + (microsTaken / 1000));
+		#else
+      showRGBInternal(pixels);
+    #endif
+
+		mWait.mark();
+	}
+
+  typedef union {
+    uint8_t bytes[32];
+    uint8_t bg[4][8];
+    uint16_t shorts[16];
+    uint32_t raw[8];
+  } _outlines;
+
+
+  template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
+    _outlines b2;
+    switch(m_nOutBlocks) {
+      case 3: transpose8x1_noinline(b.bg[3], b2.bg[3]);
+      case 2: transpose8x1_noinline(b.bg[2], b2.bg[2]);
+      case 1: transpose8x1_noinline(b.bg[1], b2.bg[1]);
+      case 0: transpose8x1_noinline(b.bg[0], b2.bg[0]);
+    }
+
+    register uint8_t d = pixels.template getd<PX>(pixels);
+    register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+    int x = 0;
+    for(uint32_t i = 8; i > 0;) {
+      i--;
+      while(ARM_DWT_CYCCNT < next_mark);
+      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+      *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
+
+      uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
+
+      out <<= m_nLowBit;
+
+      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
+      *FastPin<FIRST_PIN>::cport() = ((~out) & m_nWriteMask);
+
+      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
+      *FastPin<FIRST_PIN>::cport() = m_nWriteMask;
+
+      // Read and store up to two bytes
+      if (x < m_nActualLanes) {
+        b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+        x++;
+        if (x < m_nActualLanes) {
+          b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+          x++;
+        }
+      }
+    }
+  }
+
+  uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, __FL_T4_MASK> &allpixels) {
+    allpixels.preStepFirstByteDithering();
+    _outlines b0;
+    uint32_t start = ARM_DWT_CYCCNT;
+
+    for(int i = 0; i < m_nActualLanes; i++) {
+      b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
+    }
+
+    cli();
+    m_offsets[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
+    m_offsets[1] = _FASTLED_NS_TO_DWT(T3);
+    m_offsets[2] = _FASTLED_NS_TO_DWT(T2+T3);
+    uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
+
+    uint32_t next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+    while(allpixels.has(1)) {
+      allpixels.stepDithering();
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; }
+			}
+			#endif
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+    }
+
+    sei();
+
+    return ARM_DWT_CYCCNT - start;
+  }
+};
+
+#endif //defined(FASTLED_TEENSY4)
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
index ce0d972e3d..d9175f85db 100644
--- a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -19,7 +19,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+	uint32_t off[3];
+
 public:
+	static constexpr int __DATA_PIN() { return DATA_PIN; }
+	static constexpr int __T1() { return T1; }
+	static constexpr int __T2() { return T2; }
+	static constexpr int __T3() { return T3; }
+	static constexpr EOrder __RGB_ORDER() { return RGB_ORDER; }
+	static constexpr int __XTRA0() { return XTRA0; }
+	static constexpr bool __FLIP() { return FLIP; }
+	static constexpr int __WAIT_TIME() { return WAIT_TIME; }
+
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
 		mPinMask = FastPin<DATA_PIN>::mask();
@@ -38,46 +49,48 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
     mWait.mark();
   }
 
-	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint32_t off1, register uint32_t off2, register uint32_t off3, register uint32_t & b)  {
+	template<int BITS> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register uint32_t & b)  {
 		for(register uint32_t i = BITS-1; i > 0; i--) {
 			while(ARM_DWT_CYCCNT < next_mark);
-			next_mark = ARM_DWT_CYCCNT + off1;
+			next_mark = ARM_DWT_CYCCNT + off[0];
 			FastPin<DATA_PIN>::hi();
 			if(b&0x80) {
-				while((next_mark - ARM_DWT_CYCCNT) > off2);
+				while((next_mark - ARM_DWT_CYCCNT) > off[1]);
 				FastPin<DATA_PIN>::lo();
 			} else {
-				while((next_mark - ARM_DWT_CYCCNT) > off3);
+				while((next_mark - ARM_DWT_CYCCNT) > off[2]);
 				FastPin<DATA_PIN>::lo();
 			}
 			b <<= 1;
 		}
 
 		while(ARM_DWT_CYCCNT < next_mark);
-		next_mark = ARM_DWT_CYCCNT + off1;
+		next_mark = ARM_DWT_CYCCNT + off[1];
 		FastPin<DATA_PIN>::hi();
 
 		if(b&0x80) {
-			while((next_mark - ARM_DWT_CYCCNT) > off2);
+			while((next_mark - ARM_DWT_CYCCNT) > off[2]);
 			FastPin<DATA_PIN>::lo();
 		} else {
-			while((next_mark - ARM_DWT_CYCCNT) > off3);
+			while((next_mark - ARM_DWT_CYCCNT) > off[2]);
 			FastPin<DATA_PIN>::lo();
 		}
 	}
 
 	uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+		uint32_t start = ARM_DWT_CYCCNT;
+
 		// Setup the pixel controller and load/scale the first byte
 		pixels.preStepFirstByteDithering();
 		register uint32_t b = pixels.loadAndScale0();
 
 		cli();
-    uint32_t off1 = _FASTLED_NS_TO_DWT(T1+T2+T3);
-    uint32_t off2 = _FASTLED_NS_TO_DWT(T3);
-    uint32_t off3 = _FASTLED_NS_TO_DWT(T2+T3);
+    off[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
+    off[1] = _FASTLED_NS_TO_DWT(T3);
+    off[2] = _FASTLED_NS_TO_DWT(T2+T3);
     uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
 
-    uint32_t next_mark = ARM_DWT_CYCCNT + off1;
+    uint32_t next_mark = ARM_DWT_CYCCNT + off[0];
 
 		while(pixels.has(1)) {
 			pixels.stepDithering();
@@ -85,19 +98,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 			cli();
 			// if interrupts took longer than 45µs, punt on the current frame
 			if(ARM_DWT_CYCCNT > next_mark) {
-				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return 0; }
+				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; }
 			}
 			#endif
 			// Write first byte, read next byte
-			writeBits<8+XTRA0>(next_mark, off1, off2, off3, b);
+			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.loadAndScale1();
 
 			// Write second byte, read 3rd byte
-			writeBits<8+XTRA0>(next_mark, off1, off2, off3, b);
+			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.loadAndScale2();
 
 			// Write third byte, read 1st byte of next pixel
-			writeBits<8+XTRA0>(next_mark, off1, off2, off3, b);
+			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.advanceAndLoadAndScale0();
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			sei();
@@ -105,7 +118,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		};
 
 		sei();
-		return ARM_DWT_CYCCNT;
+		return ARM_DWT_CYCCNT - start;
 	}
 };
 #endif
diff --git a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
index 313ab0d386..0814c7fad4 100644
--- a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
@@ -4,4 +4,6 @@
 #include "fastpin_arm_mxrt1062.h"
 #include "fastspi_arm_mxrt1062.h"
 #include "clockless_arm_mxrt1062.h"
+#include "block_clockless_arm_mxrt1062.h"
+
 #endif
diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
index bfb1cb47c8..e1b15674b7 100644
--- a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
@@ -13,7 +13,7 @@ FASTLED_NAMESPACE_BEGIN
 /// Template definition for teensy 4.0 style ARM pins, providing direct access to the various GPIO registers.  Note that this
 /// uses the full port GPIO registers.  It calls through to pinMode for setting input/output on pins
 /// The registers are data output, set output, clear output, toggle output, input, and direction
-template<uint8_t PIN, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET, typename _GPIO_DR_CLEAR, typename _GPIO_DR_TOGGLE> class _ARMPIN {
+template<uint8_t PIN, uint32_t _BIT, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET, typename _GPIO_DR_CLEAR, typename _GPIO_DR_TOGGLE> class _ARMPIN {
 public:
 	typedef volatile uint32_t * port_ptr_t;
 	typedef uint32_t port_t;
@@ -39,6 +39,7 @@ template<uint8_t PIN, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET,
 	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO_DR_SET::r(); }
 	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO_DR_CLEAR::r(); }
 	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+  inline static uint32_t pinbit() __attribute__ ((always_inline)) { return _BIT; }
 };
 
 
@@ -51,7 +52,7 @@ template<uint8_t PIN, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET,
 // at https://forum.pjrc.com/threads/54711-Teensy-4-0-First-Beta-Test?p=193716&viewfull=1#post193716
 // refer to GPIO1-4, we're going to use GPIO6-9 in the definitions below because the fast registers are what
 // the teensy core is using internally
-#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {};
+#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {};
 
 #if defined(FASTLED_TEENSY4) && defined(CORE_TEENSY)
 _IO32(1); _IO32(2); _IO32(3); _IO32(4); _IO32(5);

From 5e4ca223a5ad16b2e8c647abd3c09cd7d1deaaea Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Mon, 12 Aug 2019 10:36:36 -0700
Subject: [PATCH 083/204] Tweak and fix parallel output - still need  to hook
 it up to the default addLeds setup

---
 .../mxrt1062/block_clockless_arm_mxrt1062.h   | 21 +++++++------------
 .../arm/mxrt1062/clockless_arm_mxrt1062.h     |  4 ++--
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index de09087c2e..3ff8dd2d77 100644
--- a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -88,11 +88,8 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
       if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; }
     }
 
-    m_nOutBlocks = (m_nHighBit - m_nLowBit + 8)/8;
+    m_nOutBlocks = (m_nHighBit + 8)/8;
 
-    for(int i = 0; i < m_nActualLanes; i++) {
-      m_bitOffsets[i] -= m_nLowBit;
-    }
   }
 
 
@@ -120,12 +117,10 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 
   template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
     _outlines b2;
-    switch(m_nOutBlocks) {
-      case 3: transpose8x1_noinline(b.bg[3], b2.bg[3]);
-      case 2: transpose8x1_noinline(b.bg[2], b2.bg[2]);
-      case 1: transpose8x1_noinline(b.bg[1], b2.bg[1]);
-      case 0: transpose8x1_noinline(b.bg[0], b2.bg[0]);
-    }
+    transpose8x1(b.bg[3], b2.bg[3]);
+    transpose8x1(b.bg[2], b2.bg[2]);
+    transpose8x1(b.bg[1], b2.bg[1]);
+    transpose8x1(b.bg[0], b2.bg[0]);
 
     register uint8_t d = pixels.template getd<PX>(pixels);
     register uint8_t scale = pixels.template getscale<PX>(pixels);
@@ -139,8 +134,6 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 
       uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
 
-      out <<= m_nLowBit;
-
       while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
       *FastPin<FIRST_PIN>::cport() = ((~out) & m_nWriteMask);
 
@@ -170,8 +163,8 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 
     cli();
     m_offsets[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
-    m_offsets[1] = _FASTLED_NS_TO_DWT(T3);
-    m_offsets[2] = _FASTLED_NS_TO_DWT(T2+T3);
+    m_offsets[1] = _FASTLED_NS_TO_DWT(T2+T3);
+    m_offsets[2] = _FASTLED_NS_TO_DWT(T3);
     uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
 
     uint32_t next_mark = ARM_DWT_CYCCNT + m_offsets[0];
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
index d9175f85db..468c15ddfb 100644
--- a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -86,8 +86,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 		cli();
     off[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
-    off[1] = _FASTLED_NS_TO_DWT(T3);
-    off[2] = _FASTLED_NS_TO_DWT(T2+T3);
+    off[1] = _FASTLED_NS_TO_DWT(T2+T3);
+		off[2] = _FASTLED_NS_TO_DWT(T3);
     uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
 
     uint32_t next_mark = ARM_DWT_CYCCNT + off[0];

From 526f7f5eaca5310225dd10b5a9b2fd3b40ed5f02 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Mon, 12 Aug 2019 11:01:23 -0700
Subject: [PATCH 084/204] more kicking

---
 platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index 3ff8dd2d77..6655ca793c 100644
--- a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -129,16 +129,18 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
     for(uint32_t i = 8; i > 0;) {
       i--;
       while(ARM_DWT_CYCCNT < next_mark);
-      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
       *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
+      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
 
       uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
 
+      out = ((~out) & m_nWriteMask);
       while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
-      *FastPin<FIRST_PIN>::cport() = ((~out) & m_nWriteMask);
+      *FastPin<FIRST_PIN>::cport() = out;
 
+      out = m_nWriteMask;
       while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
-      *FastPin<FIRST_PIN>::cport() = m_nWriteMask;
+      *FastPin<FIRST_PIN>::cport() = out;
 
       // Read and store up to two bytes
       if (x < m_nActualLanes) {

From 1a40bd20209d12d89ed436459665494484f20b71 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Mon, 12 Aug 2019 18:39:58 -0700
Subject: [PATCH 085/204] Wiring parallel clockless support for the Teensy 4
 into FastLED.addLeds - updaitng parallel output example - time to merge back
 to master\!

---
 FastLED.h                                         |  8 ++++++++
 .../ParallelOutputDemo/ParallelOutputDemo.ino     | 15 ++++++++++++---
 .../arm/mxrt1062/block_clockless_arm_mxrt1062.h   |  6 +++++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index b4302662de..3903eaa687 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -315,6 +315,14 @@ class CFastLED {
 		return addLeds(&c, data, nLedsOrOffset, nLedsIfOffset);
 	}
 
+#if defined(__FASTLED_HAS_FIBCC) && (__FASTLED_HAS_FIBCC == 1)
+  template<uint8_t NUM_LANES, template<uint8_t DATA_PIN, EOrder RGB_ORDER> class CHIPSET, uint8_t DATA_PIN, EOrder RGB_ORDER=RGB>
+  static CLEDController &addLeds(struct CRGB *data, int nLeds) {
+    static __FIBCC<CHIPSET, DATA_PIN, NUM_LANES, RGB_ORDER> c;
+    return addLeds(&c, data, nLeds);
+  }
+#endif
+
 	#ifdef FASTSPI_USE_DMX_SIMPLE
 	template<EClocklessChipsets CHIPSET, uint8_t DATA_PIN, EOrder RGB_ORDER=RGB>
 	static CLEDController &addLeds(struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0)
diff --git a/examples/Multiple/ParallelOutputDemo/ParallelOutputDemo.ino b/examples/Multiple/ParallelOutputDemo/ParallelOutputDemo.ino
index fc7c9e7732..8c447b54d6 100644
--- a/examples/Multiple/ParallelOutputDemo/ParallelOutputDemo.ino
+++ b/examples/Multiple/ParallelOutputDemo/ParallelOutputDemo.ino
@@ -1,6 +1,6 @@
 #include <FastLED.h>
 
-#define NUM_LEDS_PER_STRIP 64
+#define NUM_LEDS_PER_STRIP 16
 // Note: this can be 12 if you're using a teensy 3 and don't mind soldering the pads on the back
 #define NUM_STRIPS 16
 
@@ -17,15 +17,24 @@ CRGB leds[NUM_STRIPS * NUM_LEDS_PER_STRIP];
 // WS2811_PORTD: 25,26,27,28,14,15,29,11
 //
 
+
+// IBCC<WS2811, 1, 16> outputs;
+
 void setup() {
+  delay(5000);
+  Serial.begin(57600);
+  Serial.println("Starting...");
   // LEDS.addLeds<WS2811_PORTA,NUM_STRIPS>(leds, NUM_LEDS_PER_STRIP);
   // LEDS.addLeds<WS2811_PORTB,NUM_STRIPS>(leds, NUM_LEDS_PER_STRIP);
   // LEDS.addLeds<WS2811_PORTD,NUM_STRIPS>(leds, NUM_LEDS_PER_STRIP).setCorrection(TypicalLEDStrip);
   LEDS.addLeds<WS2811_PORTDC,NUM_STRIPS>(leds, NUM_LEDS_PER_STRIP);
-  LEDS.setBrightness(32);
+
+  // Teensy 4 parallel output example
+  // LEDS.addLeds<NUM_STRIPS, WS2811, 1>(leds,NUM_LEDS_PER_STRIP);
 }
 
 void loop() {
+  Serial.println("Loop....");
   static uint8_t hue = 0;
   for(int i = 0; i < NUM_STRIPS; i++) {
     for(int j = 0; j < NUM_LEDS_PER_STRIP; j++) {
@@ -43,5 +52,5 @@ void loop() {
   hue++;
 
   LEDS.show();
-  LEDS.delay(10);
+  // LEDS.delay(100);
 }
diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index 6655ca793c..5ee22458be 100644
--- a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -9,7 +9,7 @@ FASTLED_NAMESPACE_BEGIN
 
 #define __FL_T4_MASK ((1<<(LANES))-1)
 template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
-class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, __FL_T4_MASK> {
+class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, __FL_T4_MASK> {
 
   uint8_t m_bitOffsets[16];
   uint8_t m_nActualLanes;
@@ -202,6 +202,10 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
   }
 };
 
+template<template<uint8_t DATA_PIN, EOrder RGB_ORDER> class CHIPSET, uint8_t DATA_PIN, int NUM_LANES, EOrder RGB_ORDER=GRB>
+class __FIBCC : public FlexibleInlineBlockClocklessController<NUM_LANES,DATA_PIN,CHIPSET<DATA_PIN,RGB_ORDER>::__T1(),CHIPSET<DATA_PIN,RGB_ORDER>::__T2(),CHIPSET<DATA_PIN,RGB_ORDER>::__T3(),RGB_ORDER,CHIPSET<DATA_PIN,RGB_ORDER>::__XTRA0(),CHIPSET<DATA_PIN,RGB_ORDER>::__FLIP(),CHIPSET<DATA_PIN,RGB_ORDER>::__WAIT_TIME()> {};
+
+#define __FASTLED_HAS_FIBCC 1
 #endif //defined(FASTLED_TEENSY4)
 
 FASTLED_NAMESPACE_END

From 032ae7c606b918d7e135bffe62f49fbb8b4828ea Mon Sep 17 00:00:00 2001
From: Daniel Garcia <danielgarcia@gmail.com>
Date: Mon, 12 Aug 2019 18:42:12 -0700
Subject: [PATCH 086/204] Teensy 4 support! (#864)

* Pre-teensy4 work - with a 600Mhz clock, a 1Mhz clock was giving us a clock divider that overflowed a uint8_t - whoops...

* Some tweaks to chipset definitions to help out the Teensy 4 implementation

* Updating the pintest program w/Teensy 4 defs

* Preliminary Teensy 4 support, including hardware SPI and clockless chipsets - no support for parallel output or DMA'd output yet - also not fully tested for all chipsets on all pins, but smoke tested with some chipsets and pin combinations and logic analyzer in the meantime

* Checking in initial block clockless output - it compiles, but no testing yet, so it shouldn't be hooked up anywhere yet.

* Tweak and fix parallel output - still need  to hook it up to the default addLeds setup

* more kicking
---
 chipsets.h                                    |  31 ++-
 examples/Pintest/Pintest.ino                  |  49 +++-
 fastspi.h                                     |  45 ++--
 fastspi_bitbang.h                             |  18 +-
 fastspi_nop.h                                 |   2 +-
 fastspi_ref.h                                 |   2 +-
 led_sysdefs.h                                 |   3 +
 platforms.h                                   |   3 +
 platforms/arm/k20/fastspi_arm_k20.h           |   2 +-
 platforms/arm/k66/fastspi_arm_k66.h           |   2 +-
 platforms/arm/kl26/fastspi_arm_kl26.h         |   2 +-
 .../mxrt1062/block_clockless_arm_mxrt1062.h   | 209 ++++++++++++++++++
 .../arm/mxrt1062/clockless_arm_mxrt1062.h     | 128 +++++++++++
 platforms/arm/mxrt1062/fastled_arm_mxrt1062.h |   9 +
 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h |  91 ++++++++
 platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h | 140 ++++++++++++
 .../arm/mxrt1062/led_sysdefs_arm_mxrt1062.h   |  43 ++++
 platforms/arm/nrf51/fastspi_arm_nrf51.h       |   2 +-
 platforms/arm/nrf52/fastspi_arm_nrf52.h       |  10 +-
 platforms/arm/sam/fastspi_arm_sam.h           |   2 +-
 platforms/avr/fastspi_avr.h                   |   8 +-
 release_notes.md                              |   4 +
 22 files changed, 746 insertions(+), 59 deletions(-)
 create mode 100644 platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
 create mode 100644 platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h

diff --git a/chipsets.h b/chipsets.h
index d452abfe87..8e9051d5cf 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -77,7 +77,7 @@ class PixieController : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(12)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB,  uint8_t SPI_SPEED = DATA_RATE_MHZ(12) >
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB,  uint32_t SPI_SPEED = DATA_RATE_MHZ(12) >
 class LPD8806Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 
@@ -118,7 +118,7 @@ class LPD8806Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(1)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(1)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(1)>
 class WS2801Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -140,7 +140,7 @@ class WS2801Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 };
 
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(25)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(25)>
 class WS2803Controller : public WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_SPEED> {};
 
 /// LPD6803 controller class (LPD1101).
@@ -151,7 +151,7 @@ class WS2803Controller : public WS2801Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER,
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(12)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(12)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(12)>
 class LPD6803Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -201,7 +201,7 @@ class LPD6803Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(12)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(12)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(12)>
 class APA102Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -266,7 +266,7 @@ class APA102Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(24)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(24)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(24)>
 class SK9822Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -340,7 +340,7 @@ class SK9822Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(10)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(10)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(10)>
 class P9813Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -390,7 +390,7 @@ class P9813Controller : public CPixelLEDController<RGB_ORDER> {
 /// @tparam CLOCK_PIN the clock pin for these leds
 /// @tparam RGB_ORDER the RGB ordering for these leds
 /// @tparam SPI_SPEED the clock divider used for these leds.  Set using the DATA_RATE_MHZ/DATA_RATE_KHZ macros.  Defaults to DATA_RATE_MHZ(16)
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint8_t SPI_SPEED = DATA_RATE_MHZ(16)>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER = RGB, uint32_t SPI_SPEED = DATA_RATE_MHZ(16)>
 class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
@@ -398,10 +398,15 @@ class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 	void writeHeader() {
 		// Write out 50 zeros to the spi line (6 blocks of 8 followed by two single bit writes)
 		mSPI.select();
-		mSPI.writeBytesValueRaw(0, 6);
-		mSPI.waitFully();
 		mSPI.template writeBit<0>(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
 		mSPI.template writeBit<0>(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
+		mSPI.writeByte(0);
+		mSPI.waitFully();
 		mSPI.release();
 	}
 
@@ -524,7 +529,13 @@ class PL9823Controller : public ClocklessController<DATA_PIN, 3 * FMUL, 8 * FMUL
 
 // Similar to NS() macro, this calculates the number of cycles for
 // the clockless chipset (which may differ from CPU cycles)
+
+#ifdef FASTLED_TEENSY4
+// just use raw nanosecond values for the teensy4
+#define C_NS(_NS) _NS
+#else
 #define C_NS(_NS) (((_NS * ((CLOCKLESS_FREQUENCY / 1000000L)) + 999)) / 1000)
+#endif
 
 // GE8822 - 350ns 660ns 350ns
 template <uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
diff --git a/examples/Pintest/Pintest.ino b/examples/Pintest/Pintest.ino
index a63f0d46ff..f0a0dadc43 100644
--- a/examples/Pintest/Pintest.ino
+++ b/examples/Pintest/Pintest.ino
@@ -1,7 +1,10 @@
 
-#include <FastSPI_LED.h>
+#include <FastLED.h>
+
+char fullstrBuffer[64];
 
 const char *getPort(void *portPtr) {
+// AVR port checks
 #ifdef PORTA
 	if(portPtr == (void*)&PORTA) { return "PORTA"; }
 #endif
@@ -38,6 +41,8 @@ const char *getPort(void *portPtr) {
 #ifdef PORTL
 	if(portPtr == (void*)&PORTL) { return "PORTL"; }
 #endif
+
+// Teensy 3.x port checks
 #ifdef GPIO_A_PDOR
 	if(portPtr == (void*)&GPIO_A_PDOR) { return "GPIO_A_PDOR"; }
 #endif
@@ -65,7 +70,24 @@ const char *getPort(void *portPtr) {
 #ifdef REG_PIO_D_ODSR
 	if(portPtr == (void*)&REG_PIO_D_ODSR) { return "REG_PIO_D_ODSR"; }
 #endif
-	return "unknown";
+
+// Teensy 4 port checks
+#ifdef GPIO1_DR
+	if(portPtr == (void*)&GPIO1_DR) { return "GPIO1_DR"; }
+#endif
+#ifdef GPIO2_DR
+if(portPtr == (void*)&GPIO2_DR) { return "GPIO21_DR"; }
+#endif
+#ifdef GPIO3_DR
+if(portPtr == (void*)&GPIO3_DR) { return "GPIO3_DR"; }
+#endif
+#ifdef GPIO4_DR
+if(portPtr == (void*)&GPIO4_DR) { return "GPIO4_DR"; }
+#endif
+  String unknown_str = "Unknown: " + String((size_t)portPtr, HEX);
+	strncpy(fullstrBuffer, unknown_str.c_str(), unknown_str.length());
+	fullstrBuffer[sizeof(fullstrBuffer)-1] = '\0';
+	return fullstrBuffer;
 }
 
 template<uint8_t PIN> void CheckPin()
@@ -74,32 +96,35 @@ template<uint8_t PIN> void CheckPin()
 
 	RwReg *systemThinksPortIs = portOutputRegister(digitalPinToPort(PIN));
 	RwReg systemThinksMaskIs = digitalPinToBitMask(PIN);
-	
+
 	Serial.print("Pin "); Serial.print(PIN); Serial.print(": Port ");
-	
-	if(systemThinksPortIs == FastPin<PIN>::port()) { 
+
+	if(systemThinksPortIs == FastPin<PIN>::port()) {
 		Serial.print("valid & mask ");
-	} else { 
-		Serial.print("invalid, is "); Serial.print(getPort((void*)FastPin<PIN>::port())); Serial.print(" should be "); 
+	} else {
+		Serial.print("invalid, is "); Serial.print(getPort((void*)FastPin<PIN>::port())); Serial.print(" should be ");
 		Serial.print(getPort((void*)systemThinksPortIs));
 		Serial.print(" & mask ");
 	}
 
 	if(systemThinksMaskIs == FastPin<PIN>::mask()) {
 		Serial.println("valid.");
-	} else { 
+	} else {
 		Serial.print("invalid, is "); Serial.print(FastPin<PIN>::mask()); Serial.print(" should be "); Serial.println(systemThinksMaskIs);
 	}
-}	
+}
 
 template<> void CheckPin<-1> () {}
 
-void setup() { 
+void setup() {
+	delay(5000);
     Serial.begin(38400);
     Serial.println("resetting!");
 }
 
-void loop() { 
+void loop() {
 	CheckPin<MAX_PIN>();
-	delay(10000);
+	delay(100000);
+
+	Serial.print("GPIO_1_DR is: "); Serial.print(getPort((void*)&(GPIO1_DR)));
 }
diff --git a/fastspi.h b/fastspi.h
index fc0843be7a..38e8eabf07 100644
--- a/fastspi.h
+++ b/fastspi.h
@@ -13,6 +13,10 @@ FASTLED_NAMESPACE_BEGIN
 #if defined(FASTLED_TEENSY3) && (F_CPU > 48000000)
 #define DATA_RATE_MHZ(X) (((48000000L / 1000000L) / X))
 #define DATA_RATE_KHZ(X) (((48000000L / 1000L) / X))
+#elif defined(FASTLED_TEENSY4) // && (ARM_HARDWARE_SPI)
+// just use clocks
+#define DATA_RATE_MHZ(X) (1000000 * (X))
+#define DATA_RATE_KHZ(X) (1000 * (X))
 #else
 #define DATA_RATE_MHZ(X) ((F_CPU / 1000000L) / X)
 #define DATA_RATE_KHZ(X) ((F_CPU / 1000L) / X)
@@ -26,22 +30,22 @@ FASTLED_NAMESPACE_BEGIN
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 #if !defined(FASTLED_ALL_PINS_HARDWARE_SPI)
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SoftwareSPIOutput : public AVRSoftwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 
 #ifndef FASTLED_FORCE_SOFTWARE_SPI
 
 #if defined(NRF51) && defined(FASTLED_ALL_PINS_HARDWARE_SPI)
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public NRF51SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
 #if defined(NRF52_SERIES) && defined(FASTLED_ALL_PINS_HARDWARE_SPI)
-template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
@@ -49,26 +53,37 @@ class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDE
 
 #if defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
 
 #if defined(SPI2_DATA)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED, 0x4002C000> {};
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI2_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI2_CLOCK, SPI_SPEED, 0x4002C000> {};
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI2_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI2_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
 #endif
 
+#elif defined(FASTLED_TEENSY4) && defined(ARM_HARDWARE_SPI)
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public Teesy4HardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED, SPI, 0> {};
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI1_DATA, SPI_CLOCK, SPI_SPEED> : public Teesy4HardwareSPIOutput<SPI1_DATA, SPI1_CLOCK, SPI_SPEED, SPI1, 1> {};
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED> : public Teesy4HardwareSPIOutput<SPI2_DATA, SPI2_CLOCK, SPI_SPEED, SPI2, 2> {};
+
 #elif defined(FASTLED_TEENSYLC) && defined(ARM_HARDWARE_SPI)
 
-#define DECLARE_SPI0(__DATA,__CLOCK) template<uint8_t SPI_SPEED>\
+#define DECLARE_SPI0(__DATA,__CLOCK) template<uint32_t SPI_SPEED>\
  class SPIOutput<__DATA, __CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<__DATA, __CLOCK, SPI_SPEED, 0x40076000> {};
- #define DECLARE_SPI1(__DATA,__CLOCK) template<uint8_t SPI_SPEED>\
+ #define DECLARE_SPI1(__DATA,__CLOCK) template<uint32_t SPI_SPEED>\
   class SPIOutput<__DATA, __CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<__DATA, __CLOCK, SPI_SPEED, 0x40077000> {};
 
 DECLARE_SPI0(7,13);
@@ -85,24 +100,24 @@ DECLARE_SPI1(21,20);
 
 #elif defined(__SAM3X8E__)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public SAMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
 
 #elif defined(AVR_HARDWARE_SPI)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public AVRHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
 
 #if defined(SPI_UART0_DATA)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_UART0_DATA, SPI_UART0_CLOCK, SPI_SPEED> : public AVRUSART0SPIOutput<SPI_UART0_DATA, SPI_UART0_CLOCK, SPI_SPEED> {};
 
 #endif
 
 #if defined(SPI_UART1_DATA)
 
-template<uint8_t SPI_SPEED>
+template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_UART1_DATA, SPI_UART1_CLOCK, SPI_SPEED> : public AVRUSART1SPIOutput<SPI_UART1_DATA, SPI_UART1_CLOCK, SPI_SPEED> {};
 
 #endif
@@ -120,7 +135,7 @@ class SPIOutput<SPI_UART1_DATA, SPI_UART1_CLOCK, SPI_SPEED> : public AVRUSART1SP
 #endif
 
 // #if defined(USART_DATA) && defined(USART_CLOCK)
-// template<uint8_t SPI_SPEED>
+// template<uint32_t SPI_SPEED>
 // class AVRSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> : public AVRUSARTSPIOutput<USART_DATA, USART_CLOCK, SPI_SPEED> {};
 // #endif
 
diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h
index d48e32bcb3..70795e8b3c 100644
--- a/fastspi_bitbang.h
+++ b/fastspi_bitbang.h
@@ -15,7 +15,7 @@ FASTLED_NAMESPACE_BEGIN
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint8_t SPI_SPEED>
+template <uint8_t DATA_PIN, uint8_t CLOCK_PIN, uint32_t SPI_SPEED>
 class AVRSoftwareSPIOutput {
 	// The data types for pointers to the pin port - typedef'd here from the Pin definition because on avr these
 	// are pointers to 8 bit values, while on arm they are 32 bit
@@ -113,10 +113,16 @@ class AVRSoftwareSPIOutput {
 public:
 
 	// We want to make sure that the clock pulse is held high for a nininum of 35ns.
+#if defined(FASTLED_TEENSY4)
+	#define DELAY_NS (1000 / (SPI_SPEED/1000000))
+	#define CLOCK_HI_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
+	#define CLOCK_LO_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
+#else
 	#define MIN_DELAY (NS(35) - 3)
 
-  #define CLOCK_HI_DELAY delaycycles<MIN_DELAY>(); delaycycles<(((SPI_SPEED-6) / 2) - MIN_DELAY)>();
-	#define CLOCK_LO_DELAY delaycycles<(((SPI_SPEED-6) / 4))>();
+	#define CLOCK_HI_DELAY do { delaycycles<MIN_DELAY>(); delaycycles<(((SPI_SPEED-6) / 2) - MIN_DELAY)>(); } while(0);
+	#define CLOCK_LO_DELAY do { delaycycles<(((SPI_SPEED-6) / 4))>(); } while(0);
+#endif
 
 	// write the BIT'th bit out via spi, setting the data pin then strobing the clcok
 	template <uint8_t BIT> __attribute__((always_inline, hot)) inline static void writeBit(uint8_t b) {
@@ -126,8 +132,8 @@ class AVRSoftwareSPIOutput {
 #ifdef ESP32
 			// try to ensure we never have adjacent write opcodes to the same register
 			FastPin<CLOCK_PIN>::lo();
-			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY; 
-			FastPin<CLOCK_PIN>::toggle(); CLOCK_LO_DELAY; 
+			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
+			FastPin<CLOCK_PIN>::toggle(); CLOCK_LO_DELAY;
 #else
 			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
 			FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
@@ -137,7 +143,7 @@ class AVRSoftwareSPIOutput {
 			FastPin<CLOCK_PIN>::hi(); CLOCK_HI_DELAY;
 #ifdef ESP32
 			// try to ensure we never have adjacent write opcodes to the same register
-			FastPin<CLOCK_PIN>::toggle(); CLOCK_HI_DELAY; 
+			FastPin<CLOCK_PIN>::toggle(); CLOCK_HI_DELAY;
 #else
 			FastPin<CLOCK_PIN>::lo(); CLOCK_LO_DELAY;
 #endif
diff --git a/fastspi_nop.h b/fastspi_nop.h
index 5c5da010a8..1dcd2961e0 100644
--- a/fastspi_nop.h
+++ b/fastspi_nop.h
@@ -10,7 +10,7 @@ FASTLED_NAMESPACE_BEGIN
 /// A nop/stub class, mostly to show the SPI methods that are needed/used by the various SPI chipset implementations.  Should
 /// be used as a definition for the set of methods that the spi implementation classes should use (since C++ doesn't support the
 /// idea of interfaces - it's possible this could be done with virtual classes, need to decide if i want that overhead)
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class NOPSPIOutput {
 	Selectable *m_pSelect;
 
diff --git a/fastspi_ref.h b/fastspi_ref.h
index f68e63ef46..00c41d345d 100644
--- a/fastspi_ref.h
+++ b/fastspi_ref.h
@@ -8,7 +8,7 @@ FASTLED_NAMESPACE_BEGIN
 
 // A skeletal implementation of hardware SPI support.  Fill in the necessary code for init, waiting, and writing.  The rest of
 // the method implementations should provide a starting point, even if not hte most efficient to start with
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class REFHardwareSPIOutput {
 	Selectable *m_pSelect;
 public:
diff --git a/led_sysdefs.h b/led_sysdefs.h
index 7abcd15e49..27da24a043 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -18,6 +18,9 @@
 #elif defined(__MKL26Z64__)
 // Include kl26/T-LC headers
 #include "platforms/arm/kl26/led_sysdefs_arm_kl26.h"
+#elif defined(__IMXRT1062__)
+// teensy4
+#include "platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h"
 #elif defined(__SAM3X8E__)
 // Include sam/due headers
 #include "platforms/arm/sam/led_sysdefs_arm_sam.h"
diff --git a/platforms.h b/platforms.h
index 82d7d99385..f66599fd3f 100644
--- a/platforms.h
+++ b/platforms.h
@@ -18,6 +18,9 @@
 #elif defined(__MKL26Z64__)
 // Include kl26/T-LC headers
 #include "platforms/arm/kl26/fastled_arm_kl26.h"
+#elif defined(__IMXRT1062__)
+// teensy4
+#include "platforms/arm/mxrt1062/fastled_arm_mxrt1062.h"
 #elif defined(__SAM3X8E__)
 // Include sam/due headers
 #include "platforms/arm/sam/fastled_arm_sam.h"
diff --git a/platforms/arm/k20/fastspi_arm_k20.h b/platforms/arm/k20/fastspi_arm_k20.h
index 70210a396a..0512324368 100644
--- a/platforms/arm/k20/fastspi_arm_k20.h
+++ b/platforms/arm/k20/fastspi_arm_k20.h
@@ -94,7 +94,7 @@ template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint
 
 #define SPIX (*(SPI_t*)pSPIX)
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
 class ARMHardwareSPIOutput {
 	Selectable *m_pSelect;
 	SPIState gState;
diff --git a/platforms/arm/k66/fastspi_arm_k66.h b/platforms/arm/k66/fastspi_arm_k66.h
index 7e598cff4b..a40e598522 100644
--- a/platforms/arm/k66/fastspi_arm_k66.h
+++ b/platforms/arm/k66/fastspi_arm_k66.h
@@ -102,7 +102,7 @@ template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint
 
 #define SPIX (*(SPI_t*)pSPIX)
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
 class ARMHardwareSPIOutput {
 	Selectable *m_pSelect;
 	SPIState gState;
diff --git a/platforms/arm/kl26/fastspi_arm_kl26.h b/platforms/arm/kl26/fastspi_arm_kl26.h
index 869b60546f..b1e766774d 100644
--- a/platforms/arm/kl26/fastspi_arm_kl26.h
+++ b/platforms/arm/kl26/fastspi_arm_kl26.h
@@ -82,7 +82,7 @@ template <int VAL> void getScalars(uint8_t & sppr, uint8_t & spr) {
 #define SPIX (*(KINETISL_SPI_t*)pSPIX)
 #define ARM_HARDWARE_SPI
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER, uint32_t pSPIX>
 class ARMHardwareSPIOutput {
   Selectable *m_pSelect;
 
diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
new file mode 100644
index 0000000000..6655ca793c
--- /dev/null
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -0,0 +1,209 @@
+#ifndef __INC_BLOCK_CLOCKLESS_ARM_MXRT1062_H
+#define __INC_BLOCK_CLOCKLESS_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the teensy4
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY4)
+
+#define __FL_T4_MASK ((1<<(LANES))-1)
+template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, __FL_T4_MASK> {
+
+  uint8_t m_bitOffsets[16];
+  uint8_t m_nActualLanes;
+  uint8_t m_nLowBit;
+  uint8_t m_nHighBit;
+  uint32_t m_nWriteMask;
+  uint8_t m_nOutBlocks;
+  uint32_t m_offsets[3];
+  CMinWait<WAIT_TIME> mWait;
+public:
+
+  virtual int size() { return CLEDController::size() * m_nActualLanes; }
+
+// For each pin, if we've hit our lane count, break, otherwise set the pin to output,
+// store the bit offset in our offset array, add this pin to the write mask, and if this
+// pin ends a block sequence, then break out of the switch as well
+#define _BLOCK_PIN(P) case P: {                           \
+  if(m_nActualLanes == LANES) break;                      \
+  FastPin<P>::setOutput();                                \
+  m_bitOffsets[m_nActualLanes++] = FastPin<P>::pinbit();  \
+  m_nWriteMask |= FastPin<P>::mask();                     \
+  if( P == 27 || P == 7 || P == 30) break;                \
+}
+
+  virtual void init() {
+    // pre-initialize
+    memset(m_bitOffsets,0,16);
+    m_nActualLanes = 0;
+    m_nLowBit = 33;
+    m_nHighBit = 0;
+    m_nWriteMask = 0;
+
+    // setup the bits and data tracking for parallel output
+    switch(FIRST_PIN) {
+      // GPIO6 block output
+      _BLOCK_PIN( 1);
+			_BLOCK_PIN( 0);
+			_BLOCK_PIN(24);
+			_BLOCK_PIN(25);
+			_BLOCK_PIN(19);
+			_BLOCK_PIN(18);
+			_BLOCK_PIN(14);
+			_BLOCK_PIN(15);
+			_BLOCK_PIN(17);
+			_BLOCK_PIN(16);
+			_BLOCK_PIN(22);
+			_BLOCK_PIN(23);
+			_BLOCK_PIN(20);
+			_BLOCK_PIN(21);
+			_BLOCK_PIN(26);
+			_BLOCK_PIN(27);
+      // GPIO7 block output
+			_BLOCK_PIN(10);
+			_BLOCK_PIN(12);
+			_BLOCK_PIN(11);
+			_BLOCK_PIN(13);
+			_BLOCK_PIN( 6);
+			_BLOCK_PIN( 9);
+			_BLOCK_PIN(32);
+			_BLOCK_PIN( 8);
+			_BLOCK_PIN( 7);
+      // GPIO 37 block output
+			_BLOCK_PIN(37);
+			_BLOCK_PIN(36);
+			_BLOCK_PIN(35);
+			_BLOCK_PIN(34);
+			_BLOCK_PIN(39);
+			_BLOCK_PIN(38);
+			_BLOCK_PIN(28);
+			_BLOCK_PIN(31);
+			_BLOCK_PIN(30);
+    }
+
+    for(int i = 0; i < m_nActualLanes; i++) {
+      if(m_bitOffsets[i] < m_nLowBit) { m_nLowBit = m_bitOffsets[i]; }
+      if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; }
+    }
+
+    m_nOutBlocks = (m_nHighBit + 8)/8;
+
+  }
+
+
+  virtual void showPixels(PixelController<RGB_ORDER, LANES, __FL_T4_MASK> & pixels) {
+		mWait.wait();
+    #if FASTLED_ALLOW_INTERRUPTS == 0
+		uint32_t clocks = showRGBInternal(pixels);
+		// Adjust the timer
+		long microsTaken = CLKS_TO_MICROS(clocks);
+		MS_COUNTER += (1 + (microsTaken / 1000));
+		#else
+      showRGBInternal(pixels);
+    #endif
+
+		mWait.mark();
+	}
+
+  typedef union {
+    uint8_t bytes[32];
+    uint8_t bg[4][8];
+    uint16_t shorts[16];
+    uint32_t raw[8];
+  } _outlines;
+
+
+  template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
+    _outlines b2;
+    transpose8x1(b.bg[3], b2.bg[3]);
+    transpose8x1(b.bg[2], b2.bg[2]);
+    transpose8x1(b.bg[1], b2.bg[1]);
+    transpose8x1(b.bg[0], b2.bg[0]);
+
+    register uint8_t d = pixels.template getd<PX>(pixels);
+    register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+    int x = 0;
+    for(uint32_t i = 8; i > 0;) {
+      i--;
+      while(ARM_DWT_CYCCNT < next_mark);
+      *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
+      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+      uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
+
+      out = ((~out) & m_nWriteMask);
+      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
+      *FastPin<FIRST_PIN>::cport() = out;
+
+      out = m_nWriteMask;
+      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
+      *FastPin<FIRST_PIN>::cport() = out;
+
+      // Read and store up to two bytes
+      if (x < m_nActualLanes) {
+        b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+        x++;
+        if (x < m_nActualLanes) {
+          b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+          x++;
+        }
+      }
+    }
+  }
+
+  uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, __FL_T4_MASK> &allpixels) {
+    allpixels.preStepFirstByteDithering();
+    _outlines b0;
+    uint32_t start = ARM_DWT_CYCCNT;
+
+    for(int i = 0; i < m_nActualLanes; i++) {
+      b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
+    }
+
+    cli();
+    m_offsets[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
+    m_offsets[1] = _FASTLED_NS_TO_DWT(T2+T3);
+    m_offsets[2] = _FASTLED_NS_TO_DWT(T3);
+    uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
+
+    uint32_t next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+    while(allpixels.has(1)) {
+      allpixels.stepDithering();
+      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; }
+			}
+			#endif
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+			allpixels.advanceData();
+
+			// Write third byte
+			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+    }
+
+    sei();
+
+    return ARM_DWT_CYCCNT - start;
+  }
+};
+
+#endif //defined(FASTLED_TEENSY4)
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
new file mode 100644
index 0000000000..468c15ddfb
--- /dev/null
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -0,0 +1,128 @@
+#ifndef __INC_CLOCKLESS_ARM_MXRT1062_H
+#define __INC_CLOCKLESS_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+// Definition for a single channel clockless controller for the teensy4
+// See clockless.h for detailed info on how the template parameters are used.
+#if defined(FASTLED_TEENSY4)
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+#define _FASTLED_NS_TO_DWT(_NS) (((F_CPU_ACTUAL>>16)*(_NS)) / (1000000000UL>>16))
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+	data_t mPinMask;
+	data_ptr_t mPort;
+	CMinWait<WAIT_TIME> mWait;
+	uint32_t off[3];
+
+public:
+	static constexpr int __DATA_PIN() { return DATA_PIN; }
+	static constexpr int __T1() { return T1; }
+	static constexpr int __T2() { return T2; }
+	static constexpr int __T3() { return T3; }
+	static constexpr EOrder __RGB_ORDER() { return RGB_ORDER; }
+	static constexpr int __XTRA0() { return XTRA0; }
+	static constexpr bool __FLIP() { return FLIP; }
+	static constexpr int __WAIT_TIME() { return WAIT_TIME; }
+
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+		mPinMask = FastPin<DATA_PIN>::mask();
+		mPort = FastPin<DATA_PIN>::port();
+    FastPin<DATA_PIN>::lo();
+	}
+
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+		if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    mWait.mark();
+  }
+
+	template<int BITS> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register uint32_t & b)  {
+		for(register uint32_t i = BITS-1; i > 0; i--) {
+			while(ARM_DWT_CYCCNT < next_mark);
+			next_mark = ARM_DWT_CYCCNT + off[0];
+			FastPin<DATA_PIN>::hi();
+			if(b&0x80) {
+				while((next_mark - ARM_DWT_CYCCNT) > off[1]);
+				FastPin<DATA_PIN>::lo();
+			} else {
+				while((next_mark - ARM_DWT_CYCCNT) > off[2]);
+				FastPin<DATA_PIN>::lo();
+			}
+			b <<= 1;
+		}
+
+		while(ARM_DWT_CYCCNT < next_mark);
+		next_mark = ARM_DWT_CYCCNT + off[1];
+		FastPin<DATA_PIN>::hi();
+
+		if(b&0x80) {
+			while((next_mark - ARM_DWT_CYCCNT) > off[2]);
+			FastPin<DATA_PIN>::lo();
+		} else {
+			while((next_mark - ARM_DWT_CYCCNT) > off[2]);
+			FastPin<DATA_PIN>::lo();
+		}
+	}
+
+	uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+		uint32_t start = ARM_DWT_CYCCNT;
+
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint32_t b = pixels.loadAndScale0();
+
+		cli();
+    off[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
+    off[1] = _FASTLED_NS_TO_DWT(T2+T3);
+		off[2] = _FASTLED_NS_TO_DWT(T3);
+    uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
+
+    uint32_t next_mark = ARM_DWT_CYCCNT + off[0];
+
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			// if interrupts took longer than 45µs, punt on the current frame
+			if(ARM_DWT_CYCCNT > next_mark) {
+				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; }
+			}
+			#endif
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(next_mark, b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(next_mark, b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(next_mark, b);
+			b = pixels.advanceAndLoadAndScale0();
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		sei();
+		return ARM_DWT_CYCCNT - start;
+	}
+};
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
new file mode 100644
index 0000000000..0814c7fad4
--- /dev/null
+++ b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
@@ -0,0 +1,9 @@
+#ifndef __INC_FASTLED_ARM_MXRT1062_H
+#define __INC_FASTLED_ARM_MXRT1062_H
+
+#include "fastpin_arm_mxrt1062.h"
+#include "fastspi_arm_mxrt1062.h"
+#include "clockless_arm_mxrt1062.h"
+#include "block_clockless_arm_mxrt1062.h"
+
+#endif
diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
new file mode 100644
index 0000000000..e1b15674b7
--- /dev/null
+++ b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
@@ -0,0 +1,91 @@
+#ifndef __FASTPIN_ARM_MXRT1062_H
+#define __FASTPIN_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+/// Template definition for teensy 4.0 style ARM pins, providing direct access to the various GPIO registers.  Note that this
+/// uses the full port GPIO registers.  It calls through to pinMode for setting input/output on pins
+/// The registers are data output, set output, clear output, toggle output, input, and direction
+template<uint8_t PIN, uint32_t _BIT, uint32_t _MASK, typename _GPIO_DR, typename _GPIO_DR_SET, typename _GPIO_DR_CLEAR, typename _GPIO_DR_TOGGLE> class _ARMPIN {
+public:
+	typedef volatile uint32_t * port_ptr_t;
+	typedef uint32_t port_t;
+
+	inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+	inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+
+	inline static void hi() __attribute__ ((always_inline)) { _GPIO_DR_SET::r() = _MASK; }
+	inline static void lo() __attribute__ ((always_inline)) { _GPIO_DR_CLEAR::r() = _MASK; }
+	inline static void set(register port_t val) __attribute__ ((always_inline)) { _GPIO_DR::r() = val; }
+
+	inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+	inline static void toggle() __attribute__ ((always_inline)) { _GPIO_DR_TOGGLE::r() = _MASK; }
+
+	inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+	inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+	inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+
+	inline static port_t hival() __attribute__ ((always_inline)) { return _GPIO_DR::r() | _MASK; }
+	inline static port_t loval() __attribute__ ((always_inline)) { return _GPIO_DR::r() & ~_MASK; }
+	inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_GPIO_DR::r(); }
+	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO_DR_SET::r(); }
+	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO_DR_CLEAR::r(); }
+	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+  inline static uint32_t pinbit() __attribute__ ((always_inline)) { return _BIT; }
+};
+
+
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } };
+#define _IO32(L) _RD32(GPIO ## L ## _DR); _RD32(GPIO ## L ## _DR_SET); _RD32(GPIO ## L ## _DR_CLEAR); _RD32(GPIO ## L ## _DR_TOGGLE);
+
+// From the teensy core - it looks like there's the "default set" of port registers at GPIO1-5 - but then there
+// are a mirrored set for GPIO1-4 at GPIO6-9, which in the teensy core is referred to as "fast" - while the pin definitiosn
+// at https://forum.pjrc.com/threads/54711-Teensy-4-0-First-Beta-Test?p=193716&viewfull=1#post193716
+// refer to GPIO1-4, we're going to use GPIO6-9 in the definitions below because the fast registers are what
+// the teensy core is using internally
+#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {};
+
+#if defined(FASTLED_TEENSY4) && defined(CORE_TEENSY)
+_IO32(1); _IO32(2); _IO32(3); _IO32(4); _IO32(5);
+_IO32(6); _IO32(7); _IO32(8); _IO32(9);
+
+#define MAX_PIN 39
+_DEFPIN_T4( 0,6, 3); _DEFPIN_T4( 1,6, 2); _DEFPIN_T4( 2,9, 4); _DEFPIN_T4( 3,9, 5);
+_DEFPIN_T4( 4,9, 6); _DEFPIN_T4( 5,9, 8); _DEFPIN_T4( 6,7,10); _DEFPIN_T4( 7,7,17);
+_DEFPIN_T4( 8,7,16); _DEFPIN_T4( 9,7,11); _DEFPIN_T4(10,7, 0); _DEFPIN_T4(11,7, 2);
+_DEFPIN_T4(12,7, 1); _DEFPIN_T4(13,7, 3); _DEFPIN_T4(14,6,18); _DEFPIN_T4(15,6,19);
+_DEFPIN_T4(16,6,23); _DEFPIN_T4(17,6,22); _DEFPIN_T4(18,6,17); _DEFPIN_T4(19,6,16);
+_DEFPIN_T4(20,6,26); _DEFPIN_T4(21,6,27); _DEFPIN_T4(22,6,24); _DEFPIN_T4(23,6,25);
+_DEFPIN_T4(24,6,12); _DEFPIN_T4(25,6,13); _DEFPIN_T4(26,6,30); _DEFPIN_T4(27,6,31);
+_DEFPIN_T4(28,8,18); _DEFPIN_T4(29,9,31); _DEFPIN_T4(30,8,23); _DEFPIN_T4(31,8,22);
+_DEFPIN_T4(32,7,12); _DEFPIN_T4(33,9, 7); _DEFPIN_T4(34,8,15); _DEFPIN_T4(35,8,14);
+_DEFPIN_T4(36,8,13); _DEFPIN_T4(37,8,12); _DEFPIN_T4(38,8,17); _DEFPIN_T4(39,8,16);
+
+#define HAS_HARDWARE_PIN_SUPPORT
+
+#define ARM_HARDWARE_SPI
+#define SPI_DATA 11
+#define SPI_CLOCK 13
+
+#define SPI1_DATA 26
+#define SPI1_CLOCK 27
+
+#define SPI2_DATA 35
+#define SPI2_CLOCK 37
+
+#endif // defined FASTLED_TEENSY4
+
+#endif // FASTLED_FORCE_SOFTWARE_PINSs
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
new file mode 100644
index 0000000000..fa6b81ff4a
--- /dev/null
+++ b/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
@@ -0,0 +1,140 @@
+#ifndef __INC_FASTSPI_ARM_MXRT1062_H
+#define __INC_FASTSPI_ARM_MXRT1062_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined (FASTLED_TEENSY4) && defined(ARM_HARDWARE_SPI)
+#include <SPI.h>
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_RATE, SPIClass & _SPIObject, int _SPI_INDEX>
+class Teesy4HardwareSPIOutput {
+	Selectable *m_pSelect;
+  uint32_t  m_bitCount;
+  uint32_t m_bitData;
+  inline IMXRT_LPSPI_t & port() __attribute__((always_inline)) {
+    switch(_SPI_INDEX) {
+      case 0:
+        return IMXRT_LPSPI4_S;
+      case 1:
+        return IMXRT_LPSPI3_S;
+      case 2:
+        return IMXRT_LPSPI1_S;
+    }
+  }
+
+public:
+	Teesy4HardwareSPIOutput() { m_pSelect = NULL; m_bitCount = 0;}
+	Teesy4HardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; m_bitCount = 0;}
+
+	// set the object representing the selectable -- ignore for now
+	void setSelect(Selectable *pSelect) { /* TODO */ }
+
+	// initialize the SPI subssytem
+	void init() { _SPIObject.begin(); }
+
+	// latch the CS select
+	void inline select() __attribute__((always_inline)) {
+    // begin the SPI transaction
+    _SPIObject.beginTransaction(SPISettings(_SPI_CLOCK_RATE, MSBFIRST, SPI_MODE0));
+    if(m_pSelect != NULL) { m_pSelect->select(); }
+  }
+
+	// release the CS select
+	void inline release() __attribute__((always_inline)) {
+    if(m_pSelect != NULL) { m_pSelect->release(); }
+    _SPIObject.endTransaction();
+  }
+
+	// wait until all queued up data has been written
+	static void waitFully() { /* TODO */ }
+
+	// write a byte out via SPI (returns immediately on writing register) -
+	void inline writeByte(uint8_t b) __attribute__((always_inline)) {
+    if(m_bitCount == 0) {
+      _SPIObject.transfer(b);
+    } else {
+      // There's been a bit of data written, add that to the output as well
+      uint32_t outData = (m_bitData << 8) | b;
+      uint32_t tcr = port().TCR;
+      port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8+m_bitCount) - 1);  // turn on 9 bit mode
+      port().TDR = outData;		// output 9 bit data.
+      while ((port().RSR & LPSPI_RSR_RXEMPTY)) ;	// wait while the RSR fifo is empty...
+			port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8) - 1);  // turn back on 8 bit mode
+      port().RDR;
+      m_bitCount = 0;
+    }
+  }
+
+	// write a word out via SPI (returns immediately on writing register)
+	void inline writeWord(uint16_t w) __attribute__((always_inline)) {
+    writeByte(((w>>8) & 0xFF));
+    _SPIObject.transfer(w & 0xFF);
+  }
+
+	// A raw set of writing byte values, assumes setup/init/waiting done elsewhere
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { _SPIObject.transfer(value); }
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytesValue(uint8_t value, int len) {
+		select(); writeBytesValueRaw(value, len); release();
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		uint8_t *end = data + len;
+		select();
+		// could be optimized to write 16bit words out instead of 8bit bytes
+		while(data != end) {
+			writeByte(D::adjust(*data++));
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a single bit out, which bit from the passed in byte is determined by template parameter
+	template <uint8_t BIT> inline void writeBit(uint8_t b) {
+    m_bitData = (m_bitData<<1) | ((b&(1<<BIT)) != 0);
+    // If this is the 8th bit we've collected, just write it out raw
+    register uint32_t bc = m_bitCount;
+    bc = (bc + 1) & 0x07;
+    if (!bc) {
+      m_bitCount = 0;
+      _SPIObject.transfer(m_bitData);
+    }
+    m_bitCount = bc;
+  }
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		select();
+    int len = pixels.mLen;
+
+		while(pixels.has(1)) {
+			if(FLAGS & FLAG_START_BIT) {
+				writeBit<0>(1);
+			}
+			writeByte(D::adjust(pixels.loadAndScale0()));
+			writeByte(D::adjust(pixels.loadAndScale1()));
+			writeByte(D::adjust(pixels.loadAndScale2()));
+
+			pixels.advanceData();
+			pixels.stepDithering();
+		}
+		D::postBlock(len);
+		release();
+	}
+
+};
+
+
+#endif
+
+FASTLED_NAMESPACE_END
+#endif
diff --git a/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h b/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
new file mode 100644
index 0000000000..ac4908254c
--- /dev/null
+++ b/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
@@ -0,0 +1,43 @@
+#ifndef __INC_LED_SYSDEFS_ARM_MXRT1062_H
+#define __INC_LED_SYSDEFS_ARM_MXRT1062_H
+
+#define FASTLED_TEENSY4
+#define FASTLED_ARM
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#if (F_CPU == 96000000)
+#define CLK_DBL 1
+#endif
+
+// Get some system include files
+#include <avr/io.h>
+#include <avr/interrupt.h> // for cli/se definitions
+
+// Define the register types
+#if defined(ARDUINO) // && ARDUINO < 150
+typedef volatile       uint32_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint32_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+#endif
+
+// extern volatile uint32_t systick_millis_count;
+// #  define MS_COUNTER systick_millis_count
+
+// Teensy4 provides progmem
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 1
+#endif
+
+
+#endif
diff --git a/platforms/arm/nrf51/fastspi_arm_nrf51.h b/platforms/arm/nrf51/fastspi_arm_nrf51.h
index 539fd65646..6299e89d96 100644
--- a/platforms/arm/nrf51/fastspi_arm_nrf51.h
+++ b/platforms/arm/nrf51/fastspi_arm_nrf51.h
@@ -9,7 +9,7 @@
 // A nop/stub class, mostly to show the SPI methods that are needed/used by the various SPI chipset implementations.  Should
 // be used as a definition for the set of methods that the spi implementation classes should use (since C++ doesn't support the
 // idea of interfaces - it's possible this could be done with virtual classes, need to decide if i want that overhead)
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class NRF51SPIOutput {
 
   struct saveData {
diff --git a/platforms/arm/nrf52/fastspi_arm_nrf52.h b/platforms/arm/nrf52/fastspi_arm_nrf52.h
index 8492282bae..9c1a219826 100644
--- a/platforms/arm/nrf52/fastspi_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastspi_arm_nrf52.h
@@ -21,7 +21,7 @@
      */
 
     /// SPI_CLOCK_DIVIDER is number of CPU clock cycles per SPI transmission bit?
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     class NRF52SPIOutput {
 
     private:
@@ -325,13 +325,13 @@
 
     // Static member definition and initialization using templates.
     // see https://stackoverflow.com/questions/3229883/static-member-initialization-in-a-class-template#answer-3229919
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     bool NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_InUse = false;
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     bool NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_NeedToWait = false;
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     uint8_t NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_BufferIndex = 0;
-    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+    template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     uint8_t NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER>::s_Buffer[2][2] = {{0,0},{0,0}};
 
 #endif // #ifndef FASTLED_FORCE_SOFTWARE_SPI
diff --git a/platforms/arm/sam/fastspi_arm_sam.h b/platforms/arm/sam/fastspi_arm_sam.h
index eb9abe4cb7..a9446439b8 100644
--- a/platforms/arm/sam/fastspi_arm_sam.h
+++ b/platforms/arm/sam/fastspi_arm_sam.h
@@ -6,7 +6,7 @@ FASTLED_NAMESPACE_BEGIN
 #if defined(__SAM3X8E__)
 #define m_SPI ((Spi*)SPI0)
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SAMHardwareSPIOutput {
 	Selectable *m_pSelect;
 
diff --git a/platforms/avr/fastspi_avr.h b/platforms/avr/fastspi_avr.h
index fc14d59638..d2edc9660c 100644
--- a/platforms/avr/fastspi_avr.h
+++ b/platforms/avr/fastspi_avr.h
@@ -20,7 +20,7 @@ FASTLED_NAMESPACE_BEGIN
 #define UCPHA1 1
 #endif
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRUSART1SPIOutput {
 	Selectable *m_pSelect;
 
@@ -167,7 +167,7 @@ class AVRUSART1SPIOutput {
 #endif
 
 #if defined(UBRR0)
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRUSART0SPIOutput {
 	Selectable *m_pSelect;
 
@@ -329,7 +329,7 @@ class AVRUSART0SPIOutput {
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRHardwareSPIOutput {
 	Selectable *m_pSelect;
 	bool mWait;
@@ -506,7 +506,7 @@ class AVRHardwareSPIOutput {
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint8_t _SPI_CLOCK_DIVIDER>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRHardwareSPIOutput {
 	Selectable *m_pSelect;
 	bool mWait;
diff --git a/release_notes.md b/release_notes.md
index 81d16f3318..925f054a29 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,7 @@
+FastLED 3.2.11
+==============
+* Preliminary Teensy 4 support
+
 FastLED 3.2.10
 ==============
 * Adafruit Metro M4 Airlift support

From 443259740cc7e6b120d1e4aae0c4b4b8d1dfef52 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Mon, 12 Aug 2019 18:46:25 -0700
Subject: [PATCH 087/204] Fixes #856. (#860)

Initialization of pins may cause change in pin output.
Fix may be to initialize th epin state in Init(), and
call mWait.mark() to prevent showPixels() from sending
actual LED data to the strand until any potential
prior data had time to latch.
---
 platforms/arm/nrf52/clockless_arm_nrf52.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/platforms/arm/nrf52/clockless_arm_nrf52.h b/platforms/arm/nrf52/clockless_arm_nrf52.h
index d8a5da9885..94fd3ed803 100644
--- a/platforms/arm/nrf52/clockless_arm_nrf52.h
+++ b/platforms/arm/nrf52/clockless_arm_nrf52.h
@@ -156,6 +156,11 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
         FASTLED_NRF52_DEBUGPRINT("    T0H == %d", _T0H);
         FASTLED_NRF52_DEBUGPRINT("    T1H == %d", _T1H);
         FASTLED_NRF52_DEBUGPRINT("    TOP == %d\n", _TOP);
+        // to avoid pin initialization from causing first LED to have invalid color,
+        // call mWait.mark() to ensure data latches before color data gets sent.
+        startPwmPlayback_InitializePinState();
+        mWait.mark();
+
     }
     virtual uint16_t getMaxRefreshRate() const { return 800; }
 

From 535188dfab7551d6053724a5dd74e0b2b04a7be6 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Mon, 12 Aug 2019 18:46:38 -0700
Subject: [PATCH 088/204] Fix nRF52 race condition / hard lock (#857)

* Textbook example of where volatile is useful.

* Use intrinsics that ensure memory barrier for sequence buffer lock.

* Update wait time to ensure prior data had a chance to latch.
---
 platforms/arm/nrf52/clockless_arm_nrf52.h | 25 ++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/platforms/arm/nrf52/clockless_arm_nrf52.h b/platforms/arm/nrf52/clockless_arm_nrf52.h
index 94fd3ed803..56a1dbe097 100644
--- a/platforms/arm/nrf52/clockless_arm_nrf52.h
+++ b/platforms/arm/nrf52/clockless_arm_nrf52.h
@@ -46,7 +46,7 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
     // may as well be static, as can only attach one LED string per _DATA_PIN....
     static uint16_t s_SequenceBuffer[_PWM_BUFFER_COUNT];
     static uint16_t s_SequenceBufferValidElements;
-    static uint32_t s_SequenceBufferInUse;
+    static volatile uint32_t s_SequenceBufferInUse;
     static CMinWait<_WAIT_TIME_MICROSECONDS> mWait;  // ensure data has time to latch
 
     FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_InitializePinState() {
@@ -123,6 +123,18 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
     FASTLED_NRF52_INLINE_ATTRIBUTE static void startPwmPlayback_StartTask(NRF_PWM_Type * pwm) {
         nrf_pwm_task_trigger(pwm, NRF_PWM_TASK_SEQSTART0);
     }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void spinAcquireSequenceBuffer() {
+        while (!tryAcquireSequenceBuffer());
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static bool tryAcquireSequenceBuffer() {
+        return __sync_bool_compare_and_swap(&s_SequenceBufferInUse, 0, 1);
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void releaseSequenceBuffer() {
+        uint32_t tmp = __sync_val_compare_and_swap(&s_SequenceBufferInUse, 1, 0);
+        if (tmp != 1) {
+            // TODO: Error / Assert / log ?
+        }
+    }
 
 public:
     static void isr_handler() {
@@ -134,8 +146,10 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
         if (nrf_pwm_event_check(pwm,NRF_PWM_EVENT_STOPPED)) {
             nrf_pwm_event_clear(pwm,NRF_PWM_EVENT_STOPPED);
 
+            // update the minimum time to next call
+            mWait.mark();
             // mark the sequence as no longer in use -- pointer, comparator, exchange value
-            __sync_fetch_and_and(&s_SequenceBufferInUse, 0);
+            releaseSequenceBuffer();
             // prevent further interrupts from PWM events
             nrf_pwm_int_set(pwm, 0);
             // disable PWM interrupts - None of the PWM IRQs are shared
@@ -166,9 +180,10 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
 
     virtual void showPixels(PixelController<_RGB_ORDER> & pixels) {
         // wait for the only sequence buffer to become available
-        while (s_SequenceBufferInUse != 0);
+        spinAcquireSequenceBuffer();
         prepareSequenceBuffers(pixels);
-        mWait.wait(); // ensure min time between updates
+        // ensure any prior data had time to latch
+        mWait.wait();
         startPwmPlayback(s_SequenceBufferValidElements);
         return;
     }
@@ -307,7 +322,7 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
 template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
 uint16_t ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBufferValidElements = 0;
 template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
-uint32_t ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBufferInUse = 0;
+uint32_t volatile ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBufferInUse = 0;
 template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>
 uint16_t ClocklessController<_DATA_PIN, _T1, _T2, _T3, _RGB_ORDER, _XTRA0, _FLIP, _WAIT_TIME_MICROSECONDS>::s_SequenceBuffer[_PWM_BUFFER_COUNT];
 template <uint8_t _DATA_PIN, int _T1, int _T2, int _T3, EOrder _RGB_ORDER, int _XTRA0, bool _FLIP, int _WAIT_TIME_MICROSECONDS>

From dc122772074fc57634a97e27fe0e9027f7a9fb6c Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Mon, 12 Aug 2019 18:48:39 -0700
Subject: [PATCH 089/204] Spinning FastLED 3.3 - adding Teensy 4 support.

---
 FastLED.h          | 6 +++---
 library.json       | 2 +-
 library.properties | 2 +-
 release_notes.md   | 5 ++++-
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index 3903eaa687..48a6446336 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -8,12 +8,12 @@
 #define FASTLED_HAS_PRAGMA_MESSAGE
 #endif
 
-#define FASTLED_VERSION 3002009
+#define FASTLED_VERSION 3003000
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.002.010"
+#    pragma message "FastLED version 3.003.000"
 #  else
-#    warning FastLED version 3.002.010  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.003.000  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index bcdd17562b..312e29bf26 100644
--- a/library.json
+++ b/library.json
@@ -18,7 +18,7 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.2.10",
+    "version": "3.3.0",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index 93c90e0fe4..8f2dd95d57 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.2.10
+version=3.3.0
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index 925f054a29..aa858dc014 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,6 +1,9 @@
-FastLED 3.2.11
+FastLED 3.3.0
 ==============
 * Preliminary Teensy 4 support
+* Fix #861 - power computation for OctoWS2811
+* keywords and other minor changes for compilers (#854, #845)
+* Fix some nrf52 issues (#856), #840
 
 FastLED 3.2.10
 ==============

From dae69768c643f69a8856dfc1b769d62ae051b624 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Mon, 12 Aug 2019 18:54:56 -0700
Subject: [PATCH 090/204] kicking readme file, not worth spinning a release
 for, though

---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 11a4fd7119..0069329eb8 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 IMPORTANT NOTE: For AVR based systems, avr-gcc 4.8.x is supported and tested.  This means Arduino 1.6.5 and later.
 
 
-FastLED 3.2
+FastLED 3.3
 ===========
 
 This is a library for easily & efficiently controlling a wide variety of LED chipsets, like the ones
@@ -68,13 +68,14 @@ Right now the library is supported on a variety of arduino compatable platforms.
 * Arduino & compatibles - straight up arduino devices, uno, duo, leonardo, mega, nano, etc...
 * Arduino Yún
 * Adafruit Trinket & Gemma - Trinket Pro may be supported, but haven't tested to confirm yet
-* Teensy 2, Teensy++ 2, Teensy 3.0, Teensy 3.1/3.2, Teensy LC - arduino compataible from pjrc.com with some extra goodies (note the teensy 3, 3.1, and LC are ARM, not AVR!)
+* Teensy 2, Teensy++ 2, Teensy 3.0, Teensy 3.1/3.2, Teensy LC, Teensy 3.5, Teensy 3.6, and Teensy 4.0 - arduino compataible from pjrc.com with some extra goodies (note the teensy 3, 3.1, and LC are ARM, not AVR!)
 * Arduino Due and the digistump DigiX
 * RFDuino
 * SparkCore
 * Arduino Zero
 * ESP8266 using the arduino board definitions from http://arduino.esp8266.com/stable/package_esp8266com_index.json - please be sure to also read https://github.com/FastLED/FastLED/wiki/ESP8266-notes for information specific to the 8266.
 * The wino board - http://wino-board.com
+* ESP32 based boards
 
 What types of platforms are we thinking about supporting in the future?  Here's a short list:  ChipKit32, Maple, Beagleboard
 

From 5efea077bd3fe3e12c0900880317ab6800d743a8 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 18 Aug 2019 19:23:04 -0400
Subject: [PATCH 091/204] Improvements to RMT driver (#867)

* Support for ESP32

Credit to Rina Shkrabova for the first cut.

* Clean up interrupt handling

I think there was actually an error in the interrupt enabling/disabling, but I also cleaned it up so that it is more clear how interrupts are handled.

* Better interrupt handling

* Added RMT version

Not fully portable yet, though. The timing numbers are hard-wired for WS2812, and the RMT channel is also hard-wired.

* Fixed the timing

Timing is now computed from T1, T2, amd T3 instead of being hard-wired.

* Better buffer management

The RMT signal is sent in 10-pixel chunks, using double-buffering to hide the latency when possible. Also: assign RMT channels sequentially.

* Total rewrite using Martin's code

* Better comments

* Fixed the timing calculation

We were not doing the conversion from ESP32 cycles to RMT cycles correctly. Now it all works!

* Added Martin's changes

* Removed confusing comments

* Added my name!

* Fixed ESP32 compile problem

On ESP platforms the dev kit provides the function __cxa_pure_virtual, so there is no need to define it.

* honor WAIT_TIME

for chipsets that need it (for example TM1829)

* Better interrupt handling

Suggested by @h3ndrik : allocated the interrupt once at the initialization and then just turn it on and off. This is the strategy that the ESP32 core uses also.

* Major refactoring

Two major changes to the RMT driver. First, I realized that we can have only one interrupt handler attached to the RMT peripheral, so it needs to be able to handle all of the attached strips. To accomplish this, I store each ClocklessController in an array indexed by its RMT channel. The interrupt handler can then take the channel that triggered it and index into the array to get the right controller.

The second major change is that I replaced all of the explicit bit twiddling of the RMT configurartion with calls to the proper functions in ESP32 core. That should make the code more stable if the core changes.

* Fixed the interrupt dispatch

Since the interrupt handler is global for all channels, we need to store not just the controller, but also the buffer refill function for each strip.

* Added a demo

This version of DemoReel100 spins off a separate task on core 0 that just performs the FastLED.show() operations. Regular code running on core 1 (the default for Arduino) signals this task to request a show().

* Avoid unnecessary timeouts

Replaced a 500ms delay in the show task with MAX_DELAY. There's really no point in timing out (and crashing the program) just because the application hasn't called show.

* Parallel output

Reworked the code again in order to support parallel output, which is now the default mode. You can also now ask it to use the built-in RMT driver if you have other parts of your code that need the RMT peripheral.

Two #defines control choices -- put either or both of these before including FastLED.h:

#define FASTLED_RMT_CORE_DRIVER

Uses the ESP core RMT driver. To do this, though, it allocates a big buffer to hold all of the pixel bits, so there is a memory and compute cost.

#define FASTLED_RMT_SERIAL_OUTPUT

Force serial output of each strip.

* Documentation

Describing the implementation and the compile-time switches

* Removing files that should not be there

* Fixed synchronization

The previous checkin had bugs in the syncronization that caused problems in parallel mode when strips are different lengths.

* Fixed a stupid bug

Made the code bullet-proof in a few ways, but most importantly fixed a terrible integer underflow bug in the code that fills the RMT buffer.

* Another major overhaul

The big change in this version is the ability to support more than 8 controllers. Instead of assigning RMT channels to controllers in a fixed mapping, channels are assigned on the fly, allowing the driver to reuse channels as they become available.

* Oops

Didn't mean to check these in.

* Fixed built-in driver mode

Fixed the code so that it works with the built-in RMT driver. There's nothing special to do to enable it -- just #define FASTLED_RMT_BUILTIN_DRIVER true

* Cleanup

Fixing some documentation and configuration stuff

* Rewrite of fastpin

I've been needing to rewrite fastpin_esp32.h for the ESP32 ports and masks. This file also makes sure we don't use pins that won't work, even with clockless chips like the WS2812.

* Got rid of tabs

Which were making the code ugly.

* Minor tweaks

Added proper definitions for port() and toggle() to use the GPIO.out register. Changed the pin number test to avoid unnecessary conditions.

* Allow TX and RX pins

* Fixed pin access methods

This should be the right set of definitions -- consistent with the other platforms.

* Experimental

Do not merge this code

* Change pixel buffering

The previous version of this code saved a copy of the PixelController every time show() is called. It appears that this causes massive memory fragmentation, eventually locking up the processor. This new version saves the pixel data is a separate buffer that is allocated only one time.

* Some rearranging of the code

Nothing major here. Added comments and put the functions is a better order. Added some defensive programming.

* New I2S driver for ESP32

* Two updates: (1) avoid copying all the pixel data up front, and (2) use T1, T2, and T3 to encode thepulse patterns

* Trying to get the timing better.

* This version seems pretty solid

* Yves' very cool changes to improve performance and accuracy

* First attempt at merging the two drivers

* Complete I2S implementation, with switch to choose it over the RMT

* Removed the old header

* This was added by accident

* Changed the RMT driver so that it no longer needs to copy all the pixel data up front, which was slowing it down and using a lot of extra memory

* Fixed a typo: make sure to load a different channel each time

* Commented out all the Serial.print output

* fillHalfRMTBuffer needs to be virtual in order to preserve the color channel order from the template parameters

* Two mods: (1) convert CPU cycles directly to RMT cycles without going through nanoseconds; (2) improve performance of fill buffer by using a pointer into RMT memory rather than a bunch of indexes, and by inlining the getNextByte routine.

* Minor cleanup

* Cleaned up the conversion of CPU cycles to RMT cycles
---
 platforms/esp/32/clockless_rmt_esp32.h | 43 +++++++++++++-------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index accd60081c..6368bc9328 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -118,21 +118,16 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
 #define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
 
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+// -- Convert ESP32 CPU cycles to RMT device cycles, taking into account the divider
+#define F_CPU_RMT                   (  80000000L)
+#define RMT_CYCLES_PER_SEC          (F_CPU_RMT/DIVIDER)
+#define RMT_CYCLES_PER_ESP_CYCLE    (F_CPU / RMT_CYCLES_PER_SEC)
+#define ESP_TO_RMT_CYCLES(n)        ((n) / (RMT_CYCLES_PER_ESP_CYCLE))
 
 // -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+#define NS_PER_CYCLE                ( 1000000000L / RMT_CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n)             ( (n) / NS_PER_CYCLE )
+#define RMT_RESET_DURATION          NS_TO_CYCLES(50000)
 
 // -- Core or custom driver
 #ifndef FASTLED_RMT_BUILTIN_DRIVER
@@ -190,6 +185,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     PixelController<RGB_ORDER> * mPixels;
     int            mCurColor;
     uint16_t       mCurPulse;
+    volatile uint32_t * mRMT_mem_ptr;
 
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
@@ -208,17 +204,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    according to the timing values given in the template instantiation
         // T1H
         mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        mOne.duration0 = ESP_TO_RMT_CYCLES(T1+T2); // TO_RMT_CYCLES(T1+T2);
         // T1L
         mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
+        mOne.duration1 = ESP_TO_RMT_CYCLES(T3); // TO_RMT_CYCLES(T3);
 
         // T0H
         mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
+        mZero.duration0 = ESP_TO_RMT_CYCLES(T1); // TO_RMT_CYCLES(T1);
         // T0L
         mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+        mZero.duration1 = ESP_TO_RMT_CYCLES(T2+T3); // TO_RMT_CYCLES(T2 + T3);
 
         gControllers[gNumControllers] = this;
         gNumControllers++;
@@ -414,6 +410,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
+            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
             mCurColor = 0;
 
@@ -494,7 +491,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-    uint8_t IRAM_ATTR getNextByte()
+    uint8_t IRAM_ATTR getNextByte() __attribute__ ((always_inline))
     {
         uint8_t byte;
 
@@ -543,7 +540,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // rmt_item32_t value corresponding to the buffered bit value
             for (register uint32_t j = 0; j < 8; j++) {
                 uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                * mRMT_mem_ptr++ = val;
+                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
                 byteval <<= 1;
                 mCurPulse++;
             }
@@ -554,15 +552,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //    RMT buffer with 0's, which signals to the device that we're done.
         if ( ! mPixels->has(1) ) {
             while (pulses < 32) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                * mRMT_mem_ptr++ = 0;
+                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
                 mCurPulse++;
                 pulses++;
             }
         }
         
         // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
+        if (mCurPulse >= MAX_PULSES*2) {
+            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
+        }            
     }
 };
 

From 3e9628634c448697c9b756fda950be7b03817c65 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 18 Aug 2019 16:34:57 -0700
Subject: [PATCH 092/204] Fix #868 for Teensy LC

---
 platforms/arm/kl26/led_sysdefs_arm_kl26.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/arm/kl26/led_sysdefs_arm_kl26.h b/platforms/arm/kl26/led_sysdefs_arm_kl26.h
index 466d72953c..575e639992 100644
--- a/platforms/arm/kl26/led_sysdefs_arm_kl26.h
+++ b/platforms/arm/kl26/led_sysdefs_arm_kl26.h
@@ -13,7 +13,7 @@
 
 // Default to allowing interrupts
 #ifndef FASTLED_ALLOW_INTERRUPTS
-#define FASTLED_ALLOW_INTERRUPTS 1
+// #define FASTLED_ALLOW_INTERRUPTS 1
 #endif
 
 #if FASTLED_ALLOW_INTERRUPTS == 1

From 24311f4fac7e3dfd28c34e134950d92dbbd103a4 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 18 Aug 2019 16:36:26 -0700
Subject: [PATCH 093/204] Spinning 3.3.1 release to get some bug fixes in

---
 FastLED.h          | 6 +++---
 library.json       | 2 +-
 library.properties | 2 +-
 release_notes.md   | 6 ++++++
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index 48a6446336..cafbdb7815 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -8,12 +8,12 @@
 #define FASTLED_HAS_PRAGMA_MESSAGE
 #endif
 
-#define FASTLED_VERSION 3003000
+#define FASTLED_VERSION 3003001
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.003.000"
+#    pragma message "FastLED version 3.003.001"
 #  else
-#    warning FastLED version 3.003.000  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.003.001  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index 312e29bf26..b95708ab64 100644
--- a/library.json
+++ b/library.json
@@ -18,7 +18,7 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.3.0",
+    "version": "3.3.1",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index 8f2dd95d57..b6f22eb893 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.3.0
+version=3.3.1
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index aa858dc014..c18c95ca50 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,9 @@
+FastLED 3.3.1
+=============
+
+* Fix teensy build issue 
+* Bring in sam's RMT timing fix
+
 FastLED 3.3.0
 ==============
 * Preliminary Teensy 4 support

From 3c5484c336230f8346f2fd6ed8fb5a18ce835edc Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Fri, 23 Aug 2019 16:26:46 -0700
Subject: [PATCH 094/204] FIx #870 -- make sure that the spi clock times, now
 that the template parameter is a 32-bit integer, do not wrap around to super
 huge numbers

---
 fastspi_bitbang.h | 6 +++---
 release_notes.md  | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h
index 70795e8b3c..019b6dc0ec 100644
--- a/fastspi_bitbang.h
+++ b/fastspi_bitbang.h
@@ -118,10 +118,10 @@ class AVRSoftwareSPIOutput {
 	#define CLOCK_HI_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
 	#define CLOCK_LO_DELAY do { delayNanoseconds((DELAY_NS/4)); } while(0);
 #else
-	#define MIN_DELAY (NS(35) - 3)
+	#define MIN_DELAY ((NS(35)>3) ? (NS(35) - 3) : 1)
 
-	#define CLOCK_HI_DELAY do { delaycycles<MIN_DELAY>(); delaycycles<(((SPI_SPEED-6) / 2) - MIN_DELAY)>(); } while(0);
-	#define CLOCK_LO_DELAY do { delaycycles<(((SPI_SPEED-6) / 4))>(); } while(0);
+	#define CLOCK_HI_DELAY do { delaycycles<MIN_DELAY>(); delaycycles<((SPI_SPEED > 10) ? (((SPI_SPEED-6) / 2) - MIN_DELAY) : (SPI_SPEED))>(); } while(0);
+	#define CLOCK_LO_DELAY do { delaycycles<((SPI_SPEED > 10) ? ((SPI_SPEED-6) / 2) : (SPI_SPEED))>(); } while(0);
 #endif
 
 	// write the BIT'th bit out via spi, setting the data pin then strobing the clcok
diff --git a/release_notes.md b/release_notes.md
index c18c95ca50..158737c5ad 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,8 @@
+FastLED 3.3.2pre
+=============
+
+* Fix APA102 compile error #870 
+
 FastLED 3.3.1
 =============
 

From 8ac3dd7f00e933a376530ecf86d360d167e1b82a Mon Sep 17 00:00:00 2001
From: Daniel Garcia <danielgarcia@gmail.com>
Date: Sun, 25 Aug 2019 16:58:19 -0700
Subject: [PATCH 095/204] Defpin cleanup (#866)

* Bring fastpin_avr in line with standard defpin macros (to simplify porting document notes

* checkpoint - bring all the arm and esp platforms in line w/defpin macro naming/ordering

* checkpoint - update PORTING.md to include information around just adding pin definitions if needed

* Kick all the pin definitions to allow for some runtime querying of ports and tweak pintest to have it provide pin definitions for platforms that have port definitions but might be missing pin specifics (e.g. not yet-supported avr platforms
---
 PORTING.md                                    |  26 +
 examples/Pintest/Pintest.ino                  |  79 ++-
 fastpin.h                                     |  22 +
 platforms/arm/d21/fastpin_arm_d21.h           | 142 ++---
 platforms/arm/d51/fastpin_arm_d51.h           |  52 +-
 platforms/arm/k20/fastpin_arm_k20.h           |  26 +-
 platforms/arm/k66/fastpin_arm_k66.h           |  40 +-
 platforms/arm/kl26/fastpin_arm_kl26.h         |  22 +-
 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h |  28 +-
 platforms/arm/nrf51/fastpin_arm_nrf51.h       |  20 +-
 platforms/arm/nrf52/fastpin_arm_nrf52.h       |  22 +-
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 538 +++++++++---------
 platforms/arm/sam/fastpin_arm_sam.h           |  65 +--
 platforms/arm/stm32/fastpin_arm_stm32.h       | 144 +++--
 platforms/avr/fastpin_avr.h                   | 268 +++++----
 platforms/esp/32/fastpin_esp32.h              |  67 ++-
 platforms/esp/8266/fastpin_esp8266.h          |  58 +-
 17 files changed, 886 insertions(+), 733 deletions(-)

diff --git a/PORTING.md b/PORTING.md
index 2b4ade2ed7..2f925ab2d8 100644
--- a/PORTING.md
+++ b/PORTING.md
@@ -1,5 +1,31 @@
 =New platform porting guide=
 
+== Fast porting for a new board on existing hardware ==
+
+Sometimes "porting" FastLED simply consists of supplying new pin definitions for the given platform.  For example, platforms/avr/fastpin_avr.h contains various pin definitions for all the AVR variant chipsets/boards that FastLED supports.  Defining a set of pins involves setting up a set of definitions - for example here's one full set from the avr fastpin file:
+
+```
+#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__)
+
+_FL_IO(A); _FL_IO(B); _FL_IO(C); _FL_IO(D);
+
+#define MAX_PIN 31
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);
+_FL_DEFPIN(4, 4, B); _FL_DEFPIN(5, 5, B); _FL_DEFPIN(6, 6, B); _FL_DEFPIN(7, 7, B);
+_FL_DEFPIN(8, 0, D); _FL_DEFPIN(9, 1, D); _FL_DEFPIN(10, 2, D); _FL_DEFPIN(11, 3, D);
+_FL_DEFPIN(12, 4, D); _FL_DEFPIN(13, 5, D); _FL_DEFPIN(14, 6, D); _FL_DEFPIN(15, 7, D);
+_FL_DEFPIN(16, 0, C); _FL_DEFPIN(17, 1, C); _FL_DEFPIN(18, 2, C); _FL_DEFPIN(19, 3, C);
+_FL_DEFPIN(20, 4, C); _FL_DEFPIN(21, 5, C); _FL_DEFPIN(22, 6, C); _FL_DEFPIN(23, 7, C);
+_FL_DEFPIN(24, 0, A); _FL_DEFPIN(25, 1, A); _FL_DEFPIN(26, 2, A); _FL_DEFPIN(27, 3, A);
+_FL_DEFPIN(28, 4, A); _FL_DEFPIN(29, 5, A); _FL_DEFPIN(30, 6, A); _FL_DEFPIN(31, 7, A);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+```
+
+The ```_FL_IO``` macro is used to define the port registers for the platform while the ```_FL_DEFPIN``` macro is used to define pins.  The parameters to the macro are the pin number, the bit on the port that represents that pin, and the port identifier itself.  On some platforms, like the AVR, ports are identified by letter.  On other platforms, like arm, ports are identified by number.
+
+The ```HAS_HARDWARE_PIN_SUPPORT``` define tells the rest of the FastLED library that there is hardware pin support available.  There may be other platform specific defines for things like hardware SPI ports and such.
+
 == Setting up the basic files/folders ==
 
 * Create platform directory (e.g. platforms/arm/kl26)
diff --git a/examples/Pintest/Pintest.ino b/examples/Pintest/Pintest.ino
index f0a0dadc43..a8141520a7 100644
--- a/examples/Pintest/Pintest.ino
+++ b/examples/Pintest/Pintest.ino
@@ -94,12 +94,12 @@ template<uint8_t PIN> void CheckPin()
 {
 	CheckPin<PIN - 1>();
 
-	RwReg *systemThinksPortIs = portOutputRegister(digitalPinToPort(PIN));
+	void *systemThinksPortIs = (void*)portOutputRegister(digitalPinToPort(PIN));
 	RwReg systemThinksMaskIs = digitalPinToBitMask(PIN);
 
 	Serial.print("Pin "); Serial.print(PIN); Serial.print(": Port ");
 
-	if(systemThinksPortIs == FastPin<PIN>::port()) {
+	if(systemThinksPortIs == (void*)FastPin<PIN>::port()) {
 		Serial.print("valid & mask ");
 	} else {
 		Serial.print("invalid, is "); Serial.print(getPort((void*)FastPin<PIN>::port())); Serial.print(" should be ");
@@ -114,8 +114,68 @@ template<uint8_t PIN> void CheckPin()
 	}
 }
 
-template<> void CheckPin<-1> () {}
+template<> void CheckPin<255> () {}
+
+
+template<uint8_t _PORT> const char *_GetPinPort(void *ptr) {
+	if (__FL_PORT_INFO<_PORT>::hasPort() && (ptr == (void*)__FL_PORT_INFO<_PORT>::portAddr())) {
+		return __FL_PORT_INFO<_PORT>::portName();
+	} else {
+		return _GetPinPort<_PORT - 1>(ptr);
+	}
+}
+template<> const char *_GetPinPort<-1>(void *ptr) {
+	return NULL;
+}
+
+const char *GetPinPort(void *ptr) {
+	return _GetPinPort<'Z'>(ptr);
+}
+
+static uint8_t pcount = 0;
+
+
+template<uint8_t PIN> void PrintPins() {
+	PrintPins<PIN - 1>();
+
+	RwReg *systemThinksPortIs = portOutputRegister(digitalPinToPort(PIN));
+	RwReg systemThinksMaskIs = digitalPinToBitMask(PIN);
+
+	int maskBit = 0;
+	while(systemThinksMaskIs > 1) { systemThinksMaskIs >>= 1; maskBit++; }
 
+	const char *pinport = GetPinPort((void*)systemThinksPortIs);
+	if (pinport) {
+		Serial.print("__FL_DEFPIN("); Serial.print(PIN);
+		Serial.print(","); Serial.print(maskBit);
+		Serial.print(","); Serial.print(pinport);
+		Serial.print("); ");
+		pcount++;
+		if(pcount == 4) { pcount = 0; Serial.println(""); }
+	} else {
+		// Serial.print("Not found for pin "); Serial.println(PIN);
+	}
+}
+
+template<> void PrintPins<0>() {
+	RwReg *systemThinksPortIs = portOutputRegister(digitalPinToPort(0));
+	RwReg systemThinksMaskIs = digitalPinToBitMask(0);
+
+	int maskBit = 0;
+	while(systemThinksMaskIs > 1) { systemThinksMaskIs >>= 1; maskBit++; }
+
+	const char *pinport = GetPinPort((void*)systemThinksPortIs);
+	if (pinport) {
+		Serial.print("__FL_DEFPIN("); Serial.print(0);
+		Serial.print(","); Serial.print(maskBit);
+		Serial.print(","); Serial.print(pinport);
+		Serial.print("); ");
+		pcount++;
+		if(pcount == 4) { pcount = 0; Serial.println(""); }
+	}
+}
+
+int counter = 0;
 void setup() {
 	delay(5000);
     Serial.begin(38400);
@@ -123,8 +183,17 @@ void setup() {
 }
 
 void loop() {
+	Serial.println(counter);
+
+#ifdef MAX_PIN
 	CheckPin<MAX_PIN>();
-	delay(100000);
+#endif
 
-	Serial.print("GPIO_1_DR is: "); Serial.print(getPort((void*)&(GPIO1_DR)));
+	Serial.println("-----");
+#ifdef NUM_DIGITAL_PINS
+	PrintPins<NUM_DIGITAL_PINS>();
+#endif
+	Serial.println("------");
+
+	delay(100000);
 }
diff --git a/fastpin.h b/fastpin.h
index beb3a2d819..ed2b8e7ebf 100644
--- a/fastpin.h
+++ b/fastpin.h
@@ -241,6 +241,28 @@ template<uint8_t PIN> class FastPinBB : public FastPin<PIN> {};
 typedef volatile uint32_t & reg32_t;
 typedef volatile uint32_t * ptr_reg32_t;
 
+// Utility templates for tracking down information about pins and ports
+template<uint8_t port> struct __FL_PORT_INFO {
+	static bool hasPort() { return 0; }
+	static const char *portName() { return "--"; }
+	static const void *portAddr() { return NULL; }
+};
+
+// Give us our instantiations for defined ports - we're going to abuse this later for
+// auto discovery of pin/port mappings for new variants.  Use _FL_DEFINE_PORT for ports that
+// are numeric in nature, e.g. GPIO0, GPIO1.  Use _FL_DEFINE_PORT3 for ports that are letters.
+// The first parameter will be the letter, the second parameter will be an integer/counter of smoe kind
+// (this is because attempts to turn macro parameters into character constants break in some compilers)
+#define _FL_DEFINE_PORT(L, BASE) template<> struct __FL_PORT_INFO<L> { static bool hasPort() { return 1; } \
+										static const char *portName() { return #L; } \
+										typedef BASE __t_baseType;  \
+										static const void *portAddr() { return (void*)&__t_baseType::r(); } };
+
+#define _FL_DEFINE_PORT3(L, LC, BASE) template<> struct __FL_PORT_INFO<LC> { static bool hasPort() { return 1; } \
+										static const char *portName() { return #L; } \
+										typedef BASE __t_baseType;  \
+										static const void *portAddr() { return (void*)&__t_baseType::r(); } };
+
 FASTLED_NAMESPACE_END
 
 #pragma GCC diagnostic pop
diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index 997fb06af4..84b0738c3b 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -57,19 +57,19 @@ template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, int _GRP> class _ARMPIN {
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile PortGroup * r() { return T; } };
 
-#define _IO32(L) _RD32(GPIO ## L)
+#define _FL_IO(L) _RD32(GPIO ## L)
 
-#define _DEFPIN_ARM(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, L> {};
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, L> {};
 
 // Actual pin definitions
 #if defined(ARDUINO_SAMD_CIRCUITPLAYGROUND_EXPRESS)
 
 #define MAX_PIN 17
-_DEFPIN_ARM( 8,1,23);
-_DEFPIN_ARM( 0,1, 9); _DEFPIN_ARM( 1,1, 8); _DEFPIN_ARM( 2,1, 2); _DEFPIN_ARM( 3,1, 3);
-_DEFPIN_ARM( 6,0, 5); _DEFPIN_ARM( 9,0, 6); _DEFPIN_ARM(10,0, 7); _DEFPIN_ARM(12,0, 2);
-_DEFPIN_ARM(A6,1, 9); _DEFPIN_ARM(A7,1, 8); _DEFPIN_ARM(A5,1, 2); _DEFPIN_ARM(A4,1, 3);
-_DEFPIN_ARM(A1,0, 5); _DEFPIN_ARM(A2,0, 6); _DEFPIN_ARM(A3,0, 7); _DEFPIN_ARM(A0,0, 2);
+_FL_DEFPIN( 8,23,1);
+_FL_DEFPIN( 0, 9,1); _FL_DEFPIN( 1, 8,1); _FL_DEFPIN( 2, 2,1); _FL_DEFPIN( 3, 3,1);
+_FL_DEFPIN( 6, 5,0); _FL_DEFPIN( 9, 6,0); _FL_DEFPIN(10, 7,0); _FL_DEFPIN(12, 2,0);
+_FL_DEFPIN(A6, 9,1); _FL_DEFPIN(A7, 8,1); _FL_DEFPIN(A5, 2,1); _FL_DEFPIN(A4, 3,1);
+_FL_DEFPIN(A1, 5,0); _FL_DEFPIN(A2, 6,0); _FL_DEFPIN(A3, 7,0); _FL_DEFPIN(A0, 2,0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
@@ -78,19 +78,19 @@ _DEFPIN_ARM(A1,0, 5); _DEFPIN_ARM(A2,0, 6); _DEFPIN_ARM(A3,0, 7); _DEFPIN_ARM(A0
 
 #define MAX_PIN 20
 // 0 & 1
-_DEFPIN_ARM( 0, 0, 9);    _DEFPIN_ARM( 1, 0, 10);
+_FL_DEFPIN( 0, 9, 0);    _FL_DEFPIN( 1, 10, 0);
 // 2, 3, 4
-_DEFPIN_ARM( 2, 0, 14);   _DEFPIN_ARM( 3, 0, 11);   _DEFPIN_ARM( 4, 0, 8);
+_FL_DEFPIN( 2, 14, 0);   _FL_DEFPIN( 3, 11, 0);   _FL_DEFPIN( 4, 8, 0);
 // 5, 6, 7
-_DEFPIN_ARM( 5, 0, 15);   _DEFPIN_ARM( 6, 0, 18);   _DEFPIN_ARM( 7, 0, 0);
+_FL_DEFPIN( 5, 15, 0);   _FL_DEFPIN( 6, 18, 0);   _FL_DEFPIN( 7, 0, 0);
 // 8, 9, 10
-_DEFPIN_ARM( 8, 0, 12);   _DEFPIN_ARM( 9, 0, 19);   _DEFPIN_ARM(10, 0, 20);
+_FL_DEFPIN( 8, 12, 0);   _FL_DEFPIN( 9, 19, 0);   _FL_DEFPIN(10, 20, 0);
 // 11, 12, 13
-_DEFPIN_ARM(11, 0, 21);   _DEFPIN_ARM(12, 0, 22);   _DEFPIN_ARM(13, 0, 23);
+_FL_DEFPIN(11, 21, 0);   _FL_DEFPIN(12, 22, 0);   _FL_DEFPIN(13, 23, 0);
 // 14, 15, 16 (A0 - A2)
-_DEFPIN_ARM(14, 0,  2);   _DEFPIN_ARM(15, 1,  8);   _DEFPIN_ARM(16, 1, 9);
+_FL_DEFPIN(14,  2, 0);   _FL_DEFPIN(15,  8, 1);   _FL_DEFPIN(16, 9, 1);
 // 17, 18, 19 (A3 - A5)
-_DEFPIN_ARM(17, 0,  4);   _DEFPIN_ARM(18, 0,  5);   _DEFPIN_ARM(19, 0, 6);
+_FL_DEFPIN(17,  4, 0);   _FL_DEFPIN(18,  5, 0);   _FL_DEFPIN(19, 6, 0);
 
 #define SPI_DATA  PIN_SPI_MOSI
 #define SPI_CLOCK PIN_SPI_SCK
@@ -101,17 +101,17 @@ _DEFPIN_ARM(17, 0,  4);   _DEFPIN_ARM(18, 0,  5);   _DEFPIN_ARM(19, 0, 6);
 #elif defined(ARDUINO_SAMD_ZERO)
 
 #define MAX_PIN 42
-_DEFPIN_ARM( 0,0,10); _DEFPIN_ARM( 1,0,11); _DEFPIN_ARM( 2,0, 8); _DEFPIN_ARM( 3,0, 9);
-_DEFPIN_ARM( 4,0,14); _DEFPIN_ARM( 5,0,15); _DEFPIN_ARM( 6,0,20); _DEFPIN_ARM( 7,0,21);
-_DEFPIN_ARM( 8,0, 6); _DEFPIN_ARM( 9,0, 7); _DEFPIN_ARM(10,0,18); _DEFPIN_ARM(11,0,16);
-_DEFPIN_ARM(12,0,19); _DEFPIN_ARM(13,0,17); _DEFPIN_ARM(14,0, 2); _DEFPIN_ARM(15,1, 8);
-_DEFPIN_ARM(16,1, 9); _DEFPIN_ARM(17,0, 4); _DEFPIN_ARM(18,0, 5); _DEFPIN_ARM(19,1, 2);
-_DEFPIN_ARM(20,0,22); _DEFPIN_ARM(21,0,23); _DEFPIN_ARM(22,0,12); _DEFPIN_ARM(23,1,11);
-_DEFPIN_ARM(24,1,10); _DEFPIN_ARM(25,1, 3); _DEFPIN_ARM(26,0,27); _DEFPIN_ARM(27,0,28);
-_DEFPIN_ARM(28,0,24); _DEFPIN_ARM(29,0,25); _DEFPIN_ARM(30,1,22); _DEFPIN_ARM(31,1,23);
-_DEFPIN_ARM(32,0,22); _DEFPIN_ARM(33,0,23); _DEFPIN_ARM(34,0,19); _DEFPIN_ARM(35,0,16);
-_DEFPIN_ARM(36,0,18); _DEFPIN_ARM(37,0,17); _DEFPIN_ARM(38,0,13); _DEFPIN_ARM(39,0,21);
-_DEFPIN_ARM(40,0, 6); _DEFPIN_ARM(41,0, 7); _DEFPIN_ARM(42,0, 3);
+_FL_DEFPIN( 0,10,0); _FL_DEFPIN( 1,11,0); _FL_DEFPIN( 2, 8,0); _FL_DEFPIN( 3, 9,0);
+_FL_DEFPIN( 4,14,0); _FL_DEFPIN( 5,15,0); _FL_DEFPIN( 6,20,0); _FL_DEFPIN( 7,21,0);
+_FL_DEFPIN( 8, 6,0); _FL_DEFPIN( 9, 7,0); _FL_DEFPIN(10,18,0); _FL_DEFPIN(11,16,0);
+_FL_DEFPIN(12,19,0); _FL_DEFPIN(13,17,0); _FL_DEFPIN(14, 2,0); _FL_DEFPIN(15, 8,1);
+_FL_DEFPIN(16, 9,1); _FL_DEFPIN(17, 4,0); _FL_DEFPIN(18, 5,0); _FL_DEFPIN(19, 2,1);
+_FL_DEFPIN(20,22,0); _FL_DEFPIN(21,23,0); _FL_DEFPIN(22,12,0); _FL_DEFPIN(23,11,1);
+_FL_DEFPIN(24,10,1); _FL_DEFPIN(25, 3,1); _FL_DEFPIN(26,27,0); _FL_DEFPIN(27,28,0);
+_FL_DEFPIN(28,24,0); _FL_DEFPIN(29,25,0); _FL_DEFPIN(30,22,1); _FL_DEFPIN(31,23,1);
+_FL_DEFPIN(32,22,0); _FL_DEFPIN(33,23,0); _FL_DEFPIN(34,19,0); _FL_DEFPIN(35,16,0);
+_FL_DEFPIN(36,18,0); _FL_DEFPIN(37,17,0); _FL_DEFPIN(38,13,0); _FL_DEFPIN(39,21,0);
+_FL_DEFPIN(40, 6,0); _FL_DEFPIN(41, 7,0); _FL_DEFPIN(42, 3,0);
 
 #define SPI_DATA 24
 #define SPI_CLOCK 23
@@ -121,21 +121,21 @@ _DEFPIN_ARM(40,0, 6); _DEFPIN_ARM(41,0, 7); _DEFPIN_ARM(42,0, 3);
 #elif defined(ARDUINO_SODAQ_AUTONOMO)
 
 #define MAX_PIN 56
-_DEFPIN_ARM( 0,0, 9); _DEFPIN_ARM( 1,0,10); _DEFPIN_ARM( 2,0,11); _DEFPIN_ARM( 3,1,10);
-_DEFPIN_ARM( 4,1,11); _DEFPIN_ARM( 5,1,12); _DEFPIN_ARM( 6,1,13); _DEFPIN_ARM( 7,1,14);
-_DEFPIN_ARM( 8,1,15); _DEFPIN_ARM( 9,0,14); _DEFPIN_ARM(10,0,15); _DEFPIN_ARM(11,0,16);
-_DEFPIN_ARM(12,0,17); _DEFPIN_ARM(13,0,18); _DEFPIN_ARM(14,0,19); _DEFPIN_ARM(15,1,16);
-_DEFPIN_ARM(16,0, 8); _DEFPIN_ARM(17,0,28); _DEFPIN_ARM(18,1,17); _DEFPIN_ARM(19,0, 2);
-_DEFPIN_ARM(20,0, 6); _DEFPIN_ARM(21,0, 5); _DEFPIN_ARM(22,0, 4); _DEFPIN_ARM(23,1, 9);
-_DEFPIN_ARM(24,1, 8); _DEFPIN_ARM(25,1, 7); _DEFPIN_ARM(26,1, 6); _DEFPIN_ARM(27,1, 5);
-_DEFPIN_ARM(28,1, 4); _DEFPIN_ARM(29,0, 7); _DEFPIN_ARM(30,1, 3); _DEFPIN_ARM(31,1, 2);
-_DEFPIN_ARM(32,1, 1); _DEFPIN_ARM(33,1, 0); _DEFPIN_ARM(34,0, 3); _DEFPIN_ARM(35,0, 3);
-_DEFPIN_ARM(36,1,30); _DEFPIN_ARM(37,1,31); _DEFPIN_ARM(38,1,22); _DEFPIN_ARM(39,1,23);
-_DEFPIN_ARM(40,0,12); _DEFPIN_ARM(41,0,13); _DEFPIN_ARM(42,0,22); _DEFPIN_ARM(43,0,23);
-_DEFPIN_ARM(44,0,20); _DEFPIN_ARM(45,0,21); _DEFPIN_ARM(46,0,27); _DEFPIN_ARM(47,0,24);
-_DEFPIN_ARM(48,0,25); _DEFPIN_ARM(49,1,13); _DEFPIN_ARM(50,1,14); _DEFPIN_ARM(51,0,17);
-_DEFPIN_ARM(52,0,18); _DEFPIN_ARM(53,1,12); _DEFPIN_ARM(54,1,13); _DEFPIN_ARM(55,1,14);
-_DEFPIN_ARM(56,1,15);
+_FL_DEFPIN( 0, 9,0); _FL_DEFPIN( 1,10,0); _FL_DEFPIN( 2,11,0); _FL_DEFPIN( 3,10,1);
+_FL_DEFPIN( 4,11,1); _FL_DEFPIN( 5,12,1); _FL_DEFPIN( 6,13,1); _FL_DEFPIN( 7,14,1);
+_FL_DEFPIN( 8,15,1); _FL_DEFPIN( 9,14,0); _FL_DEFPIN(10,15,0); _FL_DEFPIN(11,16,0);
+_FL_DEFPIN(12,17,0); _FL_DEFPIN(13,18,0); _FL_DEFPIN(14,19,0); _FL_DEFPIN(15,16,1);
+_FL_DEFPIN(16, 8,0); _FL_DEFPIN(17,28,0); _FL_DEFPIN(18,17,1); _FL_DEFPIN(19, 2,0);
+_FL_DEFPIN(20, 6,0); _FL_DEFPIN(21, 5,0); _FL_DEFPIN(22, 4,0); _FL_DEFPIN(23, 9,1);
+_FL_DEFPIN(24, 8,1); _FL_DEFPIN(25, 7,1); _FL_DEFPIN(26, 6,1); _FL_DEFPIN(27, 5,1);
+_FL_DEFPIN(28, 4,1); _FL_DEFPIN(29, 7,0); _FL_DEFPIN(30, 3,1); _FL_DEFPIN(31, 2,1);
+_FL_DEFPIN(32, 1,1); _FL_DEFPIN(33, 0,1); _FL_DEFPIN(34, 3,0); _FL_DEFPIN(35, 3,0);
+_FL_DEFPIN(36,30,1); _FL_DEFPIN(37,31,1); _FL_DEFPIN(38,22,1); _FL_DEFPIN(39,23,1);
+_FL_DEFPIN(40,12,0); _FL_DEFPIN(41,13,0); _FL_DEFPIN(42,22,0); _FL_DEFPIN(43,23,0);
+_FL_DEFPIN(44,20,0); _FL_DEFPIN(45,21,0); _FL_DEFPIN(46,27,0); _FL_DEFPIN(47,24,0);
+_FL_DEFPIN(48,25,0); _FL_DEFPIN(49,13,1); _FL_DEFPIN(50,14,1); _FL_DEFPIN(51,17,0);
+_FL_DEFPIN(52,18,0); _FL_DEFPIN(53,12,1); _FL_DEFPIN(54,13,1); _FL_DEFPIN(55,14,1);
+_FL_DEFPIN(56,15,1);
 
 #define SPI_DATA 44
 #define SPI_CLOCK 45
@@ -145,24 +145,24 @@ _DEFPIN_ARM(56,1,15);
 #elif defined(ARDUINO_SAMD_WINO)
 
 #define MAX_PIN 22
-_DEFPIN_ARM(  0, 0, 23); _DEFPIN_ARM(  1, 0, 22); _DEFPIN_ARM(  2, 0, 16); _DEFPIN_ARM(  3, 0, 17);
-_DEFPIN_ARM(  4, 0, 18); _DEFPIN_ARM(  5, 0, 19); _DEFPIN_ARM(  6, 0, 24); _DEFPIN_ARM(  7, 0, 25);
-_DEFPIN_ARM(  8, 0, 27); _DEFPIN_ARM(  9, 0, 28); _DEFPIN_ARM( 10, 0, 30); _DEFPIN_ARM( 11, 0, 31);
-_DEFPIN_ARM( 12, 0, 15); _DEFPIN_ARM( 13, 0, 14); _DEFPIN_ARM( 14, 0,  2); _DEFPIN_ARM( 15, 0,  3);
-_DEFPIN_ARM( 16, 0,  4); _DEFPIN_ARM( 17, 0,  5); _DEFPIN_ARM( 18, 0,  6); _DEFPIN_ARM( 19, 0,  7);
-_DEFPIN_ARM( 20, 0,  8); _DEFPIN_ARM( 21, 0,  9); _DEFPIN_ARM( 22, 0, 10); _DEFPIN_ARM( 23, 0, 11);
+_FL_DEFPIN(  0, 23, 0); _FL_DEFPIN(  1, 22, 0); _FL_DEFPIN(  2, 16, 0); _FL_DEFPIN(  3, 17, 0);
+_FL_DEFPIN(  4, 18, 0); _FL_DEFPIN(  5, 19, 0); _FL_DEFPIN(  6, 24, 0); _FL_DEFPIN(  7, 25, 0);
+_FL_DEFPIN(  8, 27, 0); _FL_DEFPIN(  9, 28, 0); _FL_DEFPIN( 10, 30, 0); _FL_DEFPIN( 11, 31, 0);
+_FL_DEFPIN( 12, 15, 0); _FL_DEFPIN( 13, 14, 0); _FL_DEFPIN( 14,  2, 0); _FL_DEFPIN( 15,  3, 0);
+_FL_DEFPIN( 16,  4, 0); _FL_DEFPIN( 17,  5, 0); _FL_DEFPIN( 18,  6, 0); _FL_DEFPIN( 19,  7, 0);
+_FL_DEFPIN( 20,  8, 0); _FL_DEFPIN( 21,  9, 0); _FL_DEFPIN( 22, 10, 0); _FL_DEFPIN( 23, 11, 0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_SAMD_MKR1000)
 
 #define MAX_PIN 22
-_DEFPIN_ARM(  0, 0, 22); _DEFPIN_ARM(  1, 0, 23); _DEFPIN_ARM(  2, 0, 10); _DEFPIN_ARM(  3, 0, 11);
-_DEFPIN_ARM(  4, 1, 10); _DEFPIN_ARM(  5, 1, 11); _DEFPIN_ARM(  6, 0, 20); _DEFPIN_ARM(  7, 0, 21);
-_DEFPIN_ARM(  8, 0, 16); _DEFPIN_ARM(  9, 0, 17); _DEFPIN_ARM( 10, 0, 19); _DEFPIN_ARM( 11, 0,  8);
-_DEFPIN_ARM( 12, 0,  9); _DEFPIN_ARM( 13, 1, 23); _DEFPIN_ARM( 14, 1, 22); _DEFPIN_ARM( 15, 0,  2);
-_DEFPIN_ARM( 16, 1,  2); _DEFPIN_ARM( 17, 1,  3); _DEFPIN_ARM( 18, 0,  4); _DEFPIN_ARM( 19, 0,  5);
-_DEFPIN_ARM( 20, 0,  6); _DEFPIN_ARM( 21, 0,  7);
+_FL_DEFPIN(  0, 22, 0); _FL_DEFPIN(  1, 23, 0); _FL_DEFPIN(  2, 10, 0); _FL_DEFPIN(  3, 11, 0);
+_FL_DEFPIN(  4, 10, 1); _FL_DEFPIN(  5, 11, 1); _FL_DEFPIN(  6, 20, 0); _FL_DEFPIN(  7, 21, 0);
+_FL_DEFPIN(  8, 16, 0); _FL_DEFPIN(  9, 17, 0); _FL_DEFPIN( 10, 19, 0); _FL_DEFPIN( 11,  8, 0);
+_FL_DEFPIN( 12,  9, 0); _FL_DEFPIN( 13, 23, 1); _FL_DEFPIN( 14, 22, 1); _FL_DEFPIN( 15,  2, 0);
+_FL_DEFPIN( 16,  2, 1); _FL_DEFPIN( 17,  3, 1); _FL_DEFPIN( 18,  4, 0); _FL_DEFPIN( 19,  5, 0);
+_FL_DEFPIN( 20,  6, 0); _FL_DEFPIN( 21,  7, 0);
 
 #define SPI_DATA 8
 #define SPI_CLOCK 9
@@ -172,13 +172,13 @@ _DEFPIN_ARM( 20, 0,  6); _DEFPIN_ARM( 21, 0,  7);
 #elif defined(ARDUINO_SAMD_NANO_33_IOT)
 
 #define MAX_PIN 25
-_DEFPIN_ARM(  0, 0, 11); _DEFPIN_ARM(  1, 0, 10); _DEFPIN_ARM(  2, 0, 14); _DEFPIN_ARM(  3, 0,  9);
-_DEFPIN_ARM(  4, 0,  8); _DEFPIN_ARM(  5, 0, 15); _DEFPIN_ARM(  6, 0, 20); _DEFPIN_ARM(  7, 0, 21);
-_DEFPIN_ARM(  8, 0,  6); _DEFPIN_ARM(  9, 0,  7); _DEFPIN_ARM( 10, 0, 18); _DEFPIN_ARM( 11, 0, 16);
-_DEFPIN_ARM( 12, 0, 19); _DEFPIN_ARM( 13, 0, 17); _DEFPIN_ARM( 14, 0,  2); _DEFPIN_ARM( 15, 1,  8);
-_DEFPIN_ARM( 16, 1,  9); _DEFPIN_ARM( 17, 0,  4); _DEFPIN_ARM( 18, 0,  5); _DEFPIN_ARM( 19, 1,  2);
-_DEFPIN_ARM( 20, 0, 22); _DEFPIN_ARM( 21, 0, 23); _DEFPIN_ARM( 22, 0, 12); _DEFPIN_ARM( 23, 1, 10);
-_DEFPIN_ARM( 24, 1, 11);
+_FL_DEFPIN(  0, 11, 0); _FL_DEFPIN(  1, 10, 0); _FL_DEFPIN(  2, 14, 0); _FL_DEFPIN(  3,  9, 0);
+_FL_DEFPIN(  4,  8, 0); _FL_DEFPIN(  5, 15, 0); _FL_DEFPIN(  6, 20, 0); _FL_DEFPIN(  7, 21, 0);
+_FL_DEFPIN(  8,  6, 0); _FL_DEFPIN(  9,  7, 0); _FL_DEFPIN( 10, 18, 0); _FL_DEFPIN( 11, 16, 0);
+_FL_DEFPIN( 12, 19, 0); _FL_DEFPIN( 13, 17, 0); _FL_DEFPIN( 14,  2, 0); _FL_DEFPIN( 15,  8, 1);
+_FL_DEFPIN( 16,  9, 1); _FL_DEFPIN( 17,  4, 0); _FL_DEFPIN( 18,  5, 0); _FL_DEFPIN( 19,  2, 1);
+_FL_DEFPIN( 20, 22, 0); _FL_DEFPIN( 21, 23, 0); _FL_DEFPIN( 22, 12, 0); _FL_DEFPIN( 23, 10, 1);
+_FL_DEFPIN( 24, 11, 1);
 
 #define SPI_DATA 23
 #define SPI_CLOCK 24
@@ -188,16 +188,16 @@ _DEFPIN_ARM( 24, 1, 11);
 #elif defined(ARDUINO_GEMMA_M0)
 
 #define MAX_PIN 4
-_DEFPIN_ARM( 0, 0, 4); _DEFPIN_ARM( 1, 0, 2); _DEFPIN_ARM( 2, 0, 5);
-_DEFPIN_ARM( 3, 0, 0); _DEFPIN_ARM( 4, 0, 1);
+_FL_DEFPIN( 0, 4, 0); _FL_DEFPIN( 1, 2, 0); _FL_DEFPIN( 2, 5, 0);
+_FL_DEFPIN( 3, 0, 0); _FL_DEFPIN( 4, 1, 0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ADAFRUIT_TRINKET_M0)
 
 #define MAX_PIN 7
-_DEFPIN_ARM( 0, 0, 8); _DEFPIN_ARM( 1, 0, 2); _DEFPIN_ARM( 2, 0, 9);
-_DEFPIN_ARM( 3, 0, 7); _DEFPIN_ARM( 4, 0, 6); _DEFPIN_ARM( 7, 0, 0); _DEFPIN_ARM( 8, 0, 1);
+_FL_DEFPIN( 0, 8, 0); _FL_DEFPIN( 1, 2, 0); _FL_DEFPIN( 2, 9, 0);
+_FL_DEFPIN( 3, 7, 0); _FL_DEFPIN( 4, 6, 0); _FL_DEFPIN( 7, 0, 0); _FL_DEFPIN( 8, 1, 0);
 
 #define SPI_DATA  4
 #define SPI_CLOCK 3
@@ -207,14 +207,14 @@ _DEFPIN_ARM( 3, 0, 7); _DEFPIN_ARM( 4, 0, 6); _DEFPIN_ARM( 7, 0, 0); _DEFPIN_ARM
 #elif defined(ADAFRUIT_ITSYBITSY_M0)
 
 #define MAX_PIN 16
-_DEFPIN_ARM( 2, 0, 14); _DEFPIN_ARM( 3, 0, 9); _DEFPIN_ARM( 4, 0, 8);
-_DEFPIN_ARM( 5, 0, 15); _DEFPIN_ARM( 6, 0, 20); _DEFPIN_ARM( 7, 0, 21);
-_DEFPIN_ARM( 8, 0, 6); _DEFPIN_ARM( 9, 0, 7); _DEFPIN_ARM( 10, 0, 18);
-_DEFPIN_ARM( 11, 0, 16); _DEFPIN_ARM( 12, 0, 19); _DEFPIN_ARM( 13, 0, 17);
-_DEFPIN_ARM( 29, 0, 10); // MOSI
-_DEFPIN_ARM( 30, 0, 11); // SCK
-_DEFPIN_ARM( 40, 0, 0); //APA102 Clock
-_DEFPIN_ARM( 41, 0, 1) //APA102 Data
+_FL_DEFPIN( 2, 14, 0); _FL_DEFPIN( 3, 9, 0); _FL_DEFPIN( 4, 8, 0);
+_FL_DEFPIN( 5, 15, 0); _FL_DEFPIN( 6, 20, 0); _FL_DEFPIN( 7, 21, 0);
+_FL_DEFPIN( 8, 6, 0); _FL_DEFPIN( 9, 7, 0); _FL_DEFPIN( 10, 18, 0);
+_FL_DEFPIN( 11, 16, 0); _FL_DEFPIN( 12, 19, 0); _FL_DEFPIN( 13, 17, 0);
+_FL_DEFPIN( 29, 10, 0); // MOSI
+_FL_DEFPIN( 30, 11, 0); // SCK
+_FL_DEFPIN( 40, 0, 0); //APA102 Clock
+_FL_DEFPIN( 41, 0, 1) //APA102 Data
 
 #define SPI_DATA  29
 #define SPI_CLOCK 30
diff --git a/platforms/arm/d51/fastpin_arm_d51.h b/platforms/arm/d51/fastpin_arm_d51.h
index 5e36023d8c..dd40dbfd02 100644
--- a/platforms/arm/d51/fastpin_arm_d51.h
+++ b/platforms/arm/d51/fastpin_arm_d51.h
@@ -57,27 +57,27 @@ template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, int _GRP> class _ARMPIN {
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile PortGroup * r() { return T; } };
 
-#define _IO32(L) _RD32(GPIO ## L)
+#define _FL_IO(L) _RD32(GPIO ## L)
 
-#define _DEFPIN_ARM(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, L> {};
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, L> {};
 
 // Actual pin definitions
 #if defined(ADAFRUIT_ITSYBITSY_M4_EXPRESS)
 
 #define MAX_PIN 19
 // D0-D13, including D6+D8 (DotStar CLK + DATA)
-_DEFPIN_ARM( 0, 0, 16); _DEFPIN_ARM( 1, 0, 17); _DEFPIN_ARM( 2, 0,  7); _DEFPIN_ARM( 3, 1, 22);
-_DEFPIN_ARM( 4, 0, 14); _DEFPIN_ARM( 5, 0, 15); _DEFPIN_ARM( 6, 1,  2); _DEFPIN_ARM( 7, 0, 18);
-_DEFPIN_ARM( 8, 1,  3); _DEFPIN_ARM( 9, 0, 19); _DEFPIN_ARM(10, 0, 20); _DEFPIN_ARM(11, 0, 21);
-_DEFPIN_ARM(12, 0, 23); _DEFPIN_ARM(13, 0, 22);
+_FL_DEFPIN( 0, 16, 0); _FL_DEFPIN( 1, 17, 0); _FL_DEFPIN( 2,  7, 0); _FL_DEFPIN( 3, 22, 1);
+_FL_DEFPIN( 4, 14, 0); _FL_DEFPIN( 5, 15, 0); _FL_DEFPIN( 6,  2, 1); _FL_DEFPIN( 7, 18, 0);
+_FL_DEFPIN( 8,  3, 1); _FL_DEFPIN( 9, 19, 0); _FL_DEFPIN(10, 20, 0); _FL_DEFPIN(11, 21, 0);
+_FL_DEFPIN(12, 23, 0); _FL_DEFPIN(13, 22, 0);
 // A0-A5
-_DEFPIN_ARM(14, 0,  2); _DEFPIN_ARM(15, 0,  5); _DEFPIN_ARM(16, 1,  8); _DEFPIN_ARM(17, 1,  9);
-_DEFPIN_ARM(18, 0,  4); _DEFPIN_ARM(19, 0,  6); /* A6 is present in variant.h but couldn't find it on the schematic */
+_FL_DEFPIN(14,  2, 0); _FL_DEFPIN(15,  5, 0); _FL_DEFPIN(16,  8, 1); _FL_DEFPIN(17,  9, 1);
+_FL_DEFPIN(18,  4, 0); _FL_DEFPIN(19,  6, 0); /* A6 is present in variant.h but couldn't find it on the schematic */
 // SDA/SCL
-_DEFPIN_ARM(21, 0, 12); _DEFPIN_ARM(22, 0, 13);
+_FL_DEFPIN(21, 12, 0); _FL_DEFPIN(22, 13, 0);
 
 // 23..25  MISO/SCK/MOSI
-_DEFPIN_ARM(23, 1, 23); _DEFPIN_ARM(24, 0,  1); _DEFPIN_ARM(25, 0,  0);
+_FL_DEFPIN(23, 23, 1); _FL_DEFPIN(24,  1, 0); _FL_DEFPIN(25,  0, 0);
 
 #define SPI_DATA 25
 #define SPI_CLOCK 24
@@ -89,18 +89,18 @@ _DEFPIN_ARM(23, 1, 23); _DEFPIN_ARM(24, 0,  1); _DEFPIN_ARM(25, 0,  0);
 
 #define MAX_PIN 20
 // D0-D13, including D6+D8 (DotStar CLK + DATA)
-_DEFPIN_ARM( 0, 0, 23); _DEFPIN_ARM( 1, 0, 22); _DEFPIN_ARM( 2, 1,  17); _DEFPIN_ARM( 3, 1, 16);
-_DEFPIN_ARM( 4, 1, 13); _DEFPIN_ARM( 5, 1, 14); _DEFPIN_ARM( 6, 1,  15); _DEFPIN_ARM( 7, 1, 12);
-_DEFPIN_ARM( 8, 0,  21); _DEFPIN_ARM( 9, 0, 20); _DEFPIN_ARM(10, 0, 18); _DEFPIN_ARM(11, 0, 19);
-_DEFPIN_ARM(12, 0, 17); _DEFPIN_ARM(13, 0, 16);
+_FL_DEFPIN( 0, 23, 0); _FL_DEFPIN( 1, 22, 0); _FL_DEFPIN( 2,  17, 1); _FL_DEFPIN( 3, 16, 1);
+_FL_DEFPIN( 4, 13, 1); _FL_DEFPIN( 5, 14, 1); _FL_DEFPIN( 6,  15, 1); _FL_DEFPIN( 7, 12, 1);
+_FL_DEFPIN( 8,  21, 0); _FL_DEFPIN( 9, 20, 0); _FL_DEFPIN(10, 18, 0); _FL_DEFPIN(11, 19, 0);
+_FL_DEFPIN(12, 17, 0); _FL_DEFPIN(13, 16, 0);
 // A0-A5
-_DEFPIN_ARM(14, 0,  2); _DEFPIN_ARM(15, 0,  5); _DEFPIN_ARM(16, 0,  6); _DEFPIN_ARM(17, 1,  0);
-_DEFPIN_ARM(18, 1,  8); _DEFPIN_ARM(19, 1,  9); 
+_FL_DEFPIN(14,  2, 0); _FL_DEFPIN(15,  5, 0); _FL_DEFPIN(16,  6, 0); _FL_DEFPIN(17,  0, 1);
+_FL_DEFPIN(18,  8, 1); _FL_DEFPIN(19,  9, 1);
 // SDA/SCL
-_DEFPIN_ARM(22, 1, 2); _DEFPIN_ARM(23, 1, 3);
+_FL_DEFPIN(22, 2, 1); _FL_DEFPIN(23, 3, 1);
 
 // 23..25  MISO/SCK/MOSI
-_DEFPIN_ARM(24, 0, 14); _DEFPIN_ARM(25, 0,  13); _DEFPIN_ARM(26, 0,  12);
+_FL_DEFPIN(24, 14, 0); _FL_DEFPIN(25,  13, 0); _FL_DEFPIN(26,  12, 0);
 
 #define SPI_DATA 26
 #define SPI_CLOCK 25
@@ -111,17 +111,17 @@ _DEFPIN_ARM(24, 0, 14); _DEFPIN_ARM(25, 0,  13); _DEFPIN_ARM(26, 0,  12);
 
 #define MAX_PIN 19
 // D0-D13, including D8 (neopixel)  no pins 2 3
-_DEFPIN_ARM( 0, 1, 17); _DEFPIN_ARM( 1, 1, 16);
-_DEFPIN_ARM( 4, 0, 14); _DEFPIN_ARM( 5, 0, 16); _DEFPIN_ARM( 6, 0,  18);
-_DEFPIN_ARM( 8, 1,  3); _DEFPIN_ARM( 9, 0, 19); _DEFPIN_ARM(10, 0, 20); _DEFPIN_ARM(11, 0, 21);
-_DEFPIN_ARM(12, 0, 22); _DEFPIN_ARM(13, 0, 23);
+_FL_DEFPIN( 0, 17, 1); _FL_DEFPIN( 1, 16, 1);
+_FL_DEFPIN( 4, 14, 0); _FL_DEFPIN( 5, 16, 0); _FL_DEFPIN( 6,  18, 0);
+_FL_DEFPIN( 8,  3, 1); _FL_DEFPIN( 9, 19, 0); _FL_DEFPIN(10, 20, 0); _FL_DEFPIN(11, 21, 0);
+_FL_DEFPIN(12, 22, 0); _FL_DEFPIN(13, 23, 0);
 // A0-A5
-_DEFPIN_ARM(14, 0,  2); _DEFPIN_ARM(15, 0,  5); _DEFPIN_ARM(16, 1,  8); _DEFPIN_ARM(17, 1,  9);
-_DEFPIN_ARM(18, 0,  4); _DEFPIN_ARM(19, 0,  6); /* A6 is present in variant.h but couldn't find it on the schematic */
+_FL_DEFPIN(14,  2, 0); _FL_DEFPIN(15,  5, 0); _FL_DEFPIN(16,  8, 1); _FL_DEFPIN(17,  9, 1);
+_FL_DEFPIN(18,  4, 0); _FL_DEFPIN(19,  6, 0); /* A6 is present in variant.h but couldn't find it on the schematic */
 // SDA/SCL
-_DEFPIN_ARM(21, 0, 12); _DEFPIN_ARM(22, 0, 13);
+_FL_DEFPIN(21, 12, 0); _FL_DEFPIN(22, 13, 0);
 // 23..25  MISO/MOSI/SCK
-_DEFPIN_ARM(23, 1, 22); _DEFPIN_ARM(24, 1,  23); _DEFPIN_ARM(25, 0,  17);
+_FL_DEFPIN(23, 22, 1); _FL_DEFPIN(24,  23, 1); _FL_DEFPIN(25,  17, 0);
 
 #define SPI_DATA 24
 #define SPI_CLOCK 25
diff --git a/platforms/arm/k20/fastpin_arm_k20.h b/platforms/arm/k20/fastpin_arm_k20.h
index b26e56078e..736bd46174 100644
--- a/platforms/arm/k20/fastpin_arm_k20.h
+++ b/platforms/arm/k20/fastpin_arm_k20.h
@@ -78,28 +78,28 @@ template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR,
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
 	template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
-#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR);
+#define _FL_IO(L,C) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR); _FL_DEFINE_PORT3(L,C,_R(GPIO ## L ## _PDOR));
 
-#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
 																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; \
 									template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
  																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
 
 // Actual pin definitions
-#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
+_FL_IO(A,0); _FL_IO(B,1); _FL_IO(C,2); _FL_IO(D,3); _FL_IO(E,4);
 
-_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
+#if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
 
 #define MAX_PIN 33
-_DEFPIN_ARM(0, 16, B); _DEFPIN_ARM(1, 17, B); _DEFPIN_ARM(2, 0, D); _DEFPIN_ARM(3, 12, A);
-_DEFPIN_ARM(4, 13, A); _DEFPIN_ARM(5, 7, D); _DEFPIN_ARM(6, 4, D); _DEFPIN_ARM(7, 2, D);
-_DEFPIN_ARM(8, 3, D); _DEFPIN_ARM(9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C);
-_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C);
-_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B);
-_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C);
-_DEFPIN_ARM(24, 5, A); _DEFPIN_ARM(25, 19, B); _DEFPIN_ARM(26, 1, E); _DEFPIN_ARM(27, 9, C);
-_DEFPIN_ARM(28, 8, C); _DEFPIN_ARM(29, 10, C); _DEFPIN_ARM(30, 11, C); _DEFPIN_ARM(31, 0, E);
-_DEFPIN_ARM(32, 18, B); _DEFPIN_ARM(33, 4, A);
+_FL_DEFPIN(0, 16, B); _FL_DEFPIN(1, 17, B); _FL_DEFPIN(2, 0, D); _FL_DEFPIN(3, 12, A);
+_FL_DEFPIN(4, 13, A); _FL_DEFPIN(5, 7, D); _FL_DEFPIN(6, 4, D); _FL_DEFPIN(7, 2, D);
+_FL_DEFPIN(8, 3, D); _FL_DEFPIN(9, 3, C); _FL_DEFPIN(10, 4, C); _FL_DEFPIN(11, 6, C);
+_FL_DEFPIN(12, 7, C); _FL_DEFPIN(13, 5, C); _FL_DEFPIN(14, 1, D); _FL_DEFPIN(15, 0, C);
+_FL_DEFPIN(16, 0, B); _FL_DEFPIN(17, 1, B); _FL_DEFPIN(18, 3, B); _FL_DEFPIN(19, 2, B);
+_FL_DEFPIN(20, 5, D); _FL_DEFPIN(21, 6, D); _FL_DEFPIN(22, 1, C); _FL_DEFPIN(23, 2, C);
+_FL_DEFPIN(24, 5, A); _FL_DEFPIN(25, 19, B); _FL_DEFPIN(26, 1, E); _FL_DEFPIN(27, 9, C);
+_FL_DEFPIN(28, 8, C); _FL_DEFPIN(29, 10, C); _FL_DEFPIN(30, 11, C); _FL_DEFPIN(31, 0, E);
+_FL_DEFPIN(32, 18, B); _FL_DEFPIN(33, 4, A);
 
 #define SPI_DATA 11
 #define SPI_CLOCK 13
diff --git a/platforms/arm/k66/fastpin_arm_k66.h b/platforms/arm/k66/fastpin_arm_k66.h
index e201096ccb..ef48396c45 100644
--- a/platforms/arm/k66/fastpin_arm_k66.h
+++ b/platforms/arm/k66/fastpin_arm_k66.h
@@ -78,35 +78,35 @@ template<uint8_t PIN, int _BIT, typename _PDOR, typename _PSOR, typename _PCOR,
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
 	template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
-#define _IO32(L) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR);
+#define _FL_IO(L,C) _RD32(GPIO ## L ## _PDOR); _RD32(GPIO ## L ## _PSOR); _RD32(GPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(GPIO ## L ## _PDIR); _RD32(GPIO ## L ## _PDDR); _FL_DEFINE_PORT3(L,C,_R(GPIO ## L ## _PDOR));
 
-#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
 																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; \
 									template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
  																			_R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {};
 
+_FL_IO(A,0); _FL_IO(B,1); _FL_IO(C,2); _FL_IO(D,3); _FL_IO(E,4);
+
 // Actual pin definitions
 #if defined(FASTLED_TEENSY3) && defined(CORE_TEENSY)
 
-_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
-
 #define MAX_PIN 63
-_DEFPIN_ARM( 0, 16, B); _DEFPIN_ARM( 1, 17, B); _DEFPIN_ARM( 2,  0, D); _DEFPIN_ARM( 3, 12, A);
-_DEFPIN_ARM( 4, 13, A); _DEFPIN_ARM( 5,  7, D); _DEFPIN_ARM( 6,  4, D); _DEFPIN_ARM( 7,  2, D);
-_DEFPIN_ARM( 8,  3, D); _DEFPIN_ARM( 9,  3, C); _DEFPIN_ARM(10,  4, C); _DEFPIN_ARM(11,  6, C);
-_DEFPIN_ARM(12,  7, C); _DEFPIN_ARM(13,  5, C); _DEFPIN_ARM(14,  1, D); _DEFPIN_ARM(15,  0, C);
-_DEFPIN_ARM(16,  0, B); _DEFPIN_ARM(17,  1, B); _DEFPIN_ARM(18,  3, B); _DEFPIN_ARM(19,  2, B);
-_DEFPIN_ARM(20,  5, D); _DEFPIN_ARM(21,  6, D); _DEFPIN_ARM(22,  1, C); _DEFPIN_ARM(23,  2, C);
-_DEFPIN_ARM(24, 26, E); _DEFPIN_ARM(25,  5, A); _DEFPIN_ARM(26, 14, A); _DEFPIN_ARM(27, 15, A);
-_DEFPIN_ARM(28, 16, A); _DEFPIN_ARM(29, 18, B); _DEFPIN_ARM(30, 19, B); _DEFPIN_ARM(31, 10, B);
-_DEFPIN_ARM(32, 11, B); _DEFPIN_ARM(33, 24, E); _DEFPIN_ARM(34, 25, E); _DEFPIN_ARM(35,  8, C);
-_DEFPIN_ARM(36,  9, C); _DEFPIN_ARM(37, 10, C); _DEFPIN_ARM(38, 11, C); _DEFPIN_ARM(39, 17, A);
-_DEFPIN_ARM(40, 28, A); _DEFPIN_ARM(41, 29, A); _DEFPIN_ARM(42, 26, A); _DEFPIN_ARM(43, 20, B);
-_DEFPIN_ARM(44, 22, B); _DEFPIN_ARM(45, 23, B); _DEFPIN_ARM(46, 21, B); _DEFPIN_ARM(47,  8, D);
-_DEFPIN_ARM(48,  9, D); _DEFPIN_ARM(49,  4, B); _DEFPIN_ARM(50,  5, B); _DEFPIN_ARM(51, 14, D);
-_DEFPIN_ARM(52, 13, D); _DEFPIN_ARM(53, 12, D); _DEFPIN_ARM(54, 15, D); _DEFPIN_ARM(55, 11, D);
-_DEFPIN_ARM(56, 10, E); _DEFPIN_ARM(57, 11, E); _DEFPIN_ARM(58,  0, E); _DEFPIN_ARM(59,  1, E);
-_DEFPIN_ARM(60,  2, E); _DEFPIN_ARM(61,  3, E); _DEFPIN_ARM(62,  4, E); _DEFPIN_ARM(63,  5, E);
+_FL_DEFPIN( 0, 16, B); _FL_DEFPIN( 1, 17, B); _FL_DEFPIN( 2,  0, D); _FL_DEFPIN( 3, 12, A);
+_FL_DEFPIN( 4, 13, A); _FL_DEFPIN( 5,  7, D); _FL_DEFPIN( 6,  4, D); _FL_DEFPIN( 7,  2, D);
+_FL_DEFPIN( 8,  3, D); _FL_DEFPIN( 9,  3, C); _FL_DEFPIN(10,  4, C); _FL_DEFPIN(11,  6, C);
+_FL_DEFPIN(12,  7, C); _FL_DEFPIN(13,  5, C); _FL_DEFPIN(14,  1, D); _FL_DEFPIN(15,  0, C);
+_FL_DEFPIN(16,  0, B); _FL_DEFPIN(17,  1, B); _FL_DEFPIN(18,  3, B); _FL_DEFPIN(19,  2, B);
+_FL_DEFPIN(20,  5, D); _FL_DEFPIN(21,  6, D); _FL_DEFPIN(22,  1, C); _FL_DEFPIN(23,  2, C);
+_FL_DEFPIN(24, 26, E); _FL_DEFPIN(25,  5, A); _FL_DEFPIN(26, 14, A); _FL_DEFPIN(27, 15, A);
+_FL_DEFPIN(28, 16, A); _FL_DEFPIN(29, 18, B); _FL_DEFPIN(30, 19, B); _FL_DEFPIN(31, 10, B);
+_FL_DEFPIN(32, 11, B); _FL_DEFPIN(33, 24, E); _FL_DEFPIN(34, 25, E); _FL_DEFPIN(35,  8, C);
+_FL_DEFPIN(36,  9, C); _FL_DEFPIN(37, 10, C); _FL_DEFPIN(38, 11, C); _FL_DEFPIN(39, 17, A);
+_FL_DEFPIN(40, 28, A); _FL_DEFPIN(41, 29, A); _FL_DEFPIN(42, 26, A); _FL_DEFPIN(43, 20, B);
+_FL_DEFPIN(44, 22, B); _FL_DEFPIN(45, 23, B); _FL_DEFPIN(46, 21, B); _FL_DEFPIN(47,  8, D);
+_FL_DEFPIN(48,  9, D); _FL_DEFPIN(49,  4, B); _FL_DEFPIN(50,  5, B); _FL_DEFPIN(51, 14, D);
+_FL_DEFPIN(52, 13, D); _FL_DEFPIN(53, 12, D); _FL_DEFPIN(54, 15, D); _FL_DEFPIN(55, 11, D);
+_FL_DEFPIN(56, 10, E); _FL_DEFPIN(57, 11, E); _FL_DEFPIN(58,  0, E); _FL_DEFPIN(59,  1, E);
+_FL_DEFPIN(60,  2, E); _FL_DEFPIN(61,  3, E); _FL_DEFPIN(62,  4, E); _FL_DEFPIN(63,  5, E);
 
 
 
diff --git a/platforms/arm/kl26/fastpin_arm_kl26.h b/platforms/arm/kl26/fastpin_arm_kl26.h
index 4c30cd784f..8b3cbdfef8 100644
--- a/platforms/arm/kl26/fastpin_arm_kl26.h
+++ b/platforms/arm/kl26/fastpin_arm_kl26.h
@@ -50,26 +50,26 @@ template<uint8_t PIN, uint32_t _MASK, typename _PDOR, typename _PSOR, typename _
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
 template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
-#define _IO32(L) _RD32(FGPIO ## L ## _PDOR); _RD32(FGPIO ## L ## _PSOR); _RD32(FGPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(FGPIO ## L ## _PDIR); _RD32(FGPIO ## L ## _PDDR);
+#define _FL_IO(L,C) _RD32(FGPIO ## L ## _PDOR); _RD32(FGPIO ## L ## _PSOR); _RD32(FGPIO ## L ## _PCOR); _RD32(GPIO ## L ## _PTOR); _RD32(FGPIO ## L ## _PDIR); _RD32(FGPIO ## L ## _PDDR); _FL_DEFINE_PORT3(L,C,_R(FGPIO ## L ## _PDOR));
 
-#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(FGPIO ## L ## _PDOR), _R(FGPIO ## L ## _PSOR), _R(FGPIO ## L ## _PCOR), \
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << BIT, _R(FGPIO ## L ## _PDOR), _R(FGPIO ## L ## _PSOR), _R(FGPIO ## L ## _PCOR), \
 _R(GPIO ## L ## _PTOR), _R(FGPIO ## L ## _PDIR), _R(FGPIO ## L ## _PDDR)> {}; \
 /* template<> class FastPinBB<PIN> : public _ARMPIN_BITBAND<PIN, BIT, _R(GPIO ## L ## _PDOR), _R(GPIO ## L ## _PSOR), _R(GPIO ## L ## _PCOR), \
 _R(GPIO ## L ## _PTOR), _R(GPIO ## L ## _PDIR), _R(GPIO ## L ## _PDDR)> {}; */
 
+_FL_IO(A,0); _FL_IO(B,1); _FL_IO(C,2); _FL_IO(D,3); _FL_IO(E,4);
+
 // Actual pin definitions
 #if defined(FASTLED_TEENSYLC) && defined(CORE_TEENSY)
 
-_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E);
-
 #define MAX_PIN 26
-_DEFPIN_ARM(0, 16, B); _DEFPIN_ARM(1, 17, B); _DEFPIN_ARM(2, 0, D); _DEFPIN_ARM(3, 1, A);
-_DEFPIN_ARM(4, 2, A); _DEFPIN_ARM(5, 7, D); _DEFPIN_ARM(6, 4, D); _DEFPIN_ARM(7, 2, D);
-_DEFPIN_ARM(8, 3, D); _DEFPIN_ARM(9, 3, C); _DEFPIN_ARM(10, 4, C); _DEFPIN_ARM(11, 6, C);
-_DEFPIN_ARM(12, 7, C); _DEFPIN_ARM(13, 5, C); _DEFPIN_ARM(14, 1, D); _DEFPIN_ARM(15, 0, C);
-_DEFPIN_ARM(16, 0, B); _DEFPIN_ARM(17, 1, B); _DEFPIN_ARM(18, 3, B); _DEFPIN_ARM(19, 2, B);
-_DEFPIN_ARM(20, 5, D); _DEFPIN_ARM(21, 6, D); _DEFPIN_ARM(22, 1, C); _DEFPIN_ARM(23, 2, C);
-_DEFPIN_ARM(24, 20, E); _DEFPIN_ARM(25, 21, E); _DEFPIN_ARM(26, 30, E);
+_FL_DEFPIN(0, 16, B); _FL_DEFPIN(1, 17, B); _FL_DEFPIN(2, 0, D); _FL_DEFPIN(3, 1, A);
+_FL_DEFPIN(4, 2, A); _FL_DEFPIN(5, 7, D); _FL_DEFPIN(6, 4, D); _FL_DEFPIN(7, 2, D);
+_FL_DEFPIN(8, 3, D); _FL_DEFPIN(9, 3, C); _FL_DEFPIN(10, 4, C); _FL_DEFPIN(11, 6, C);
+_FL_DEFPIN(12, 7, C); _FL_DEFPIN(13, 5, C); _FL_DEFPIN(14, 1, D); _FL_DEFPIN(15, 0, C);
+_FL_DEFPIN(16, 0, B); _FL_DEFPIN(17, 1, B); _FL_DEFPIN(18, 3, B); _FL_DEFPIN(19, 2, B);
+_FL_DEFPIN(20, 5, D); _FL_DEFPIN(21, 6, D); _FL_DEFPIN(22, 1, C); _FL_DEFPIN(23, 2, C);
+_FL_DEFPIN(24, 20, E); _FL_DEFPIN(25, 21, E); _FL_DEFPIN(26, 30, E);
 
 #define SPI_DATA 11
 #define SPI_CLOCK 13
diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
index e1b15674b7..38c8841023 100644
--- a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
@@ -45,30 +45,30 @@ template<uint8_t PIN, uint32_t _BIT, uint32_t _MASK, typename _GPIO_DR, typename
 
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } };
-#define _IO32(L) _RD32(GPIO ## L ## _DR); _RD32(GPIO ## L ## _DR_SET); _RD32(GPIO ## L ## _DR_CLEAR); _RD32(GPIO ## L ## _DR_TOGGLE);
+#define _FL_IO(L) _RD32(GPIO ## L ## _DR); _RD32(GPIO ## L ## _DR_SET); _RD32(GPIO ## L ## _DR_CLEAR); _RD32(GPIO ## L ## _DR_TOGGLE); _FL_DEFINE_PORT(L, _R(GPIO ## L ## _DR));
 
 // From the teensy core - it looks like there's the "default set" of port registers at GPIO1-5 - but then there
 // are a mirrored set for GPIO1-4 at GPIO6-9, which in the teensy core is referred to as "fast" - while the pin definitiosn
 // at https://forum.pjrc.com/threads/54711-Teensy-4-0-First-Beta-Test?p=193716&viewfull=1#post193716
 // refer to GPIO1-4, we're going to use GPIO6-9 in the definitions below because the fast registers are what
 // the teensy core is using internally
-#define _DEFPIN_T4(PIN, L, BIT) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {};
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L ## _DR), _R(GPIO ## L ## _DR_SET), _R(GPIO ## L ## _DR_CLEAR), _R(GPIO ## L ## _DR_TOGGLE)> {};
 
 #if defined(FASTLED_TEENSY4) && defined(CORE_TEENSY)
-_IO32(1); _IO32(2); _IO32(3); _IO32(4); _IO32(5);
-_IO32(6); _IO32(7); _IO32(8); _IO32(9);
+_FL_IO(1); _FL_IO(2); _FL_IO(3); _FL_IO(4); _FL_IO(5);
+_FL_IO(6); _FL_IO(7); _FL_IO(8); _FL_IO(9);
 
 #define MAX_PIN 39
-_DEFPIN_T4( 0,6, 3); _DEFPIN_T4( 1,6, 2); _DEFPIN_T4( 2,9, 4); _DEFPIN_T4( 3,9, 5);
-_DEFPIN_T4( 4,9, 6); _DEFPIN_T4( 5,9, 8); _DEFPIN_T4( 6,7,10); _DEFPIN_T4( 7,7,17);
-_DEFPIN_T4( 8,7,16); _DEFPIN_T4( 9,7,11); _DEFPIN_T4(10,7, 0); _DEFPIN_T4(11,7, 2);
-_DEFPIN_T4(12,7, 1); _DEFPIN_T4(13,7, 3); _DEFPIN_T4(14,6,18); _DEFPIN_T4(15,6,19);
-_DEFPIN_T4(16,6,23); _DEFPIN_T4(17,6,22); _DEFPIN_T4(18,6,17); _DEFPIN_T4(19,6,16);
-_DEFPIN_T4(20,6,26); _DEFPIN_T4(21,6,27); _DEFPIN_T4(22,6,24); _DEFPIN_T4(23,6,25);
-_DEFPIN_T4(24,6,12); _DEFPIN_T4(25,6,13); _DEFPIN_T4(26,6,30); _DEFPIN_T4(27,6,31);
-_DEFPIN_T4(28,8,18); _DEFPIN_T4(29,9,31); _DEFPIN_T4(30,8,23); _DEFPIN_T4(31,8,22);
-_DEFPIN_T4(32,7,12); _DEFPIN_T4(33,9, 7); _DEFPIN_T4(34,8,15); _DEFPIN_T4(35,8,14);
-_DEFPIN_T4(36,8,13); _DEFPIN_T4(37,8,12); _DEFPIN_T4(38,8,17); _DEFPIN_T4(39,8,16);
+_FL_DEFPIN( 0, 3,6); _FL_DEFPIN( 1, 2,6); _FL_DEFPIN( 2, 4,9); _FL_DEFPIN( 3, 5,9);
+_FL_DEFPIN( 4, 6,9); _FL_DEFPIN( 5, 8,9); _FL_DEFPIN( 6,10,7); _FL_DEFPIN( 7,17,7);
+_FL_DEFPIN( 8,16,7); _FL_DEFPIN( 9,11,7); _FL_DEFPIN(10, 0,7); _FL_DEFPIN(11, 2,7);
+_FL_DEFPIN(12, 1,7); _FL_DEFPIN(13, 3,7); _FL_DEFPIN(14,18,6); _FL_DEFPIN(15,19,6);
+_FL_DEFPIN(16,23,6); _FL_DEFPIN(17,22,6); _FL_DEFPIN(18,17,6); _FL_DEFPIN(19,16,6);
+_FL_DEFPIN(20,26,6); _FL_DEFPIN(21,27,6); _FL_DEFPIN(22,24,6); _FL_DEFPIN(23,25,6);
+_FL_DEFPIN(24,12,6); _FL_DEFPIN(25,13,6); _FL_DEFPIN(26,30,6); _FL_DEFPIN(27,31,6);
+_FL_DEFPIN(28,18,8); _FL_DEFPIN(29,31,9); _FL_DEFPIN(30,23,8); _FL_DEFPIN(31,22,8);
+_FL_DEFPIN(32,12,7); _FL_DEFPIN(33, 7,9); _FL_DEFPIN(34,15,8); _FL_DEFPIN(35,14,8);
+_FL_DEFPIN(36,13,8); _FL_DEFPIN(37,12,8); _FL_DEFPIN(38,17,8); _FL_DEFPIN(39,16,8);
 
 #define HAS_HARDWARE_PIN_SUPPORT
 
diff --git a/platforms/arm/nrf51/fastpin_arm_nrf51.h b/platforms/arm/nrf51/fastpin_arm_nrf51.h
index 4125f9a38e..3d02edc1ac 100644
--- a/platforms/arm/nrf51/fastpin_arm_nrf51.h
+++ b/platforms/arm/nrf51/fastpin_arm_nrf51.h
@@ -49,7 +49,7 @@ _RD32_NRF(NR_OUTSET);
 _RD32_NRF(NR_OUTCLR);
 _RD32_NRF(NR_OUT);
 
-#define _DEFPIN_ARM(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN, \
+#define _FL_DEFPIN(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN, \
   _R(NR_DIRSET), _R(NR_DIRCLR), _R(NR_OUTSET), _R(NR_OUTCLR), _R(NR_OUT)> {};
 #else
 
@@ -98,19 +98,19 @@ template<uint8_t PIN, uint32_t _MASK> class _ARMPIN {
 };
 
 
-#define _DEFPIN_ARM(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN> {};
+#define _FL_DEFPIN(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN> {};
 #endif
 
 // Actual pin definitions
 #define MAX_PIN 31
-_DEFPIN_ARM(0); _DEFPIN_ARM(1); _DEFPIN_ARM(2); _DEFPIN_ARM(3);
-_DEFPIN_ARM(4); _DEFPIN_ARM(5); _DEFPIN_ARM(6); _DEFPIN_ARM(7);
-_DEFPIN_ARM(8); _DEFPIN_ARM(9); _DEFPIN_ARM(10); _DEFPIN_ARM(11);
-_DEFPIN_ARM(12); _DEFPIN_ARM(13); _DEFPIN_ARM(14); _DEFPIN_ARM(15);
-_DEFPIN_ARM(16); _DEFPIN_ARM(17); _DEFPIN_ARM(18); _DEFPIN_ARM(19);
-_DEFPIN_ARM(20); _DEFPIN_ARM(21); _DEFPIN_ARM(22); _DEFPIN_ARM(23);
-_DEFPIN_ARM(24); _DEFPIN_ARM(25); _DEFPIN_ARM(26); _DEFPIN_ARM(27);
-_DEFPIN_ARM(28); _DEFPIN_ARM(29); _DEFPIN_ARM(30); _DEFPIN_ARM(31);
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3);
+_FL_DEFPIN(4); _FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7);
+_FL_DEFPIN(8); _FL_DEFPIN(9); _FL_DEFPIN(10); _FL_DEFPIN(11);
+_FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14); _FL_DEFPIN(15);
+_FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23);
+_FL_DEFPIN(24); _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27);
+_FL_DEFPIN(28); _FL_DEFPIN(29); _FL_DEFPIN(30); _FL_DEFPIN(31);
 
 #define HAS_HARDWARE_PIN_SUPPORT
 
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
index 60fb359416..7526000445 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -1,13 +1,13 @@
 #ifndef __FASTPIN_ARM_NRF52_H
 #define __FASTPIN_ARM_NRF52_H
 
-    
+
 /*
 //
 // Background:
 // ===========
 // the nRF52 has more than 32 ports, and thus must support
-// two distinct GPIO port registers.  
+// two distinct GPIO port registers.
 //
 // For the nRF52 series, the structure to control the port is
 // `NRF_GPIO_Type`, with separate addresses mapped for set, clear, etc.
@@ -18,18 +18,18 @@
 //     #define NRF_P0        ((NRF_GPIO_Type*)NRF_P0_BASE)
 //     #define NRF_P1        ((NRF_GPIO_Type*)NRF_P1_BASE)
 //
-// Therefore, ideally, the _DEFPIN_ARM() macro would simply
+// Therefore, ideally, the _FL_DEFPIN() macro would simply
 // conditionally pass either NRF_P0 or NRF_P1 to the underlying
 // FastPin<> template class class.
 //
 // The "pin" provided to the FastLED<> template (and which
-// the _DEFPIN_ARM() macro specializes for valid pins) is NOT
+// the _FL_DEFPIN() macro specializes for valid pins) is NOT
 // the microcontroller port.pin, but the Arduino digital pin.
 // Some boards have an identity mapping (e.g., nRF52832 Feather)
-// but most do not.  Therefore, the _DEFPIN_ARM() macro
+// but most do not.  Therefore, the _FL_DEFPIN() macro
 // must translate the Arduino pin to the mcu port.pin.
 //
-// 
+//
 // Difficulties:
 // =============
 // The goal is to avoid any such lookups, using compile-time
@@ -281,16 +281,16 @@ template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUM
 //
 // BOARD_PIN can be either the pin portion of a port.pin, or the combined NRF_GPIO_PIN_MAP() number.
 // For example both the following two defines refer to P1.15 (pin 47) as Arduino pin 3:
-//     _DEFPIN_ARM(3, 1, 15);
-//     _DEFPIN_ARM(3, 1, 47);
+//     _FL_DEFPIN(3, 15, 1);
+//     _FL_DEFPIN(3, 47, 1);
 //
 // Similarly, the following defines are all equivalent:
 //     _DEFPIN_ARM_IDENTITY_P1(47);
-//     _DEFPIN_ARM(47, 1, 15);
-//     _DEFPIN_ARM(47, 1, 47);
+//     _FL_DEFPIN(47, 15, 1);
+//     _FL_DEFPIN(47, 47, 1);
 //
 
-#define _DEFPIN_ARM(ARDUINO_PIN, BOARD_PORT, BOARD_PIN)  \
+#define FL_DEFPIN(ARDUINO_PIN, BOARD_PIN, BOARD_PORT)    \
     template<> class FastPin<ARDUINO_PIN> :              \
     public _ARMPIN<                                      \
         1u << (BOARD_PIN & 31u),                         \
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index b3b9ff9935..9bb07721fd 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -59,48 +59,48 @@
     #define MAX_PIN (33u) // 34 if wanting to use NFC1 test point
 
     // Arduino pins 0..7
-    _DEFPIN_ARM( 0, 0, 25); // D0  is P0.25 -- UART TX
-    //_DEFPIN_ARM( 1, 0, 24); // D1  is P0.24 -- UART RX
-    _DEFPIN_ARM( 2, 0, 10); // D2  is P0.10 -- NFC2
-    _DEFPIN_ARM( 3, 1, 47); // D3  is P1.15 -- PIN_LED1 (red)
-    _DEFPIN_ARM( 4, 1, 42); // D4  is P1.10 -- PIN_LED2 (blue)
-    _DEFPIN_ARM( 5, 1, 40); // D5  is P1.08 -- SPI/SS
-    _DEFPIN_ARM( 6, 0,  7); // D6  is P0.07
-    _DEFPIN_ARM( 7, 1, 34); // D7  is P1.02 -- PIN_DFU (Button)
+    _FL_DEFPIN( 0, 25, 0); // D0  is P0.25 -- UART TX
+    //_FL_DEFPIN( 1, 24, 0); // D1  is P0.24 -- UART RX
+    _FL_DEFPIN( 2, 10, 0); // D2  is P0.10 -- NFC2
+    _FL_DEFPIN( 3, 47, 1); // D3  is P1.15 -- PIN_LED1 (red)
+    _FL_DEFPIN( 4, 42, 1); // D4  is P1.10 -- PIN_LED2 (blue)
+    _FL_DEFPIN( 5, 40, 1); // D5  is P1.08 -- SPI/SS
+    _FL_DEFPIN( 6,  7, 0); // D6  is P0.07
+    _FL_DEFPIN( 7, 34, 1); // D7  is P1.02 -- PIN_DFU (Button)
     
     // Arduino pins 8..15
-    _DEFPIN_ARM( 8, 0, 16); // D8  is P0.16 -- PIN_NEOPIXEL
-    _DEFPIN_ARM( 9, 0, 26); // D9  is P0.26
-    _DEFPIN_ARM(10, 0, 27); // D10 is P0.27
-    _DEFPIN_ARM(11, 0,  6); // D11 is P0.06
-    _DEFPIN_ARM(12, 0,  8); // D12 is P0.08
-    _DEFPIN_ARM(13, 1, 41); // D13 is P1.09
-    _DEFPIN_ARM(14, 0,  4); // D14 is P0.04 -- A0
-    _DEFPIN_ARM(15, 0,  5); // D15 is P0.05 -- A1
+    _FL_DEFPIN( 8, 16, 0); // D8  is P0.16 -- PIN_NEOPIXEL
+    _FL_DEFPIN( 9, 26, 0); // D9  is P0.26
+    _FL_DEFPIN(10, 27, 0); // D10 is P0.27
+    _FL_DEFPIN(11,  6, 0); // D11 is P0.06
+    _FL_DEFPIN(12,  8, 0); // D12 is P0.08
+    _FL_DEFPIN(13, 41, 1); // D13 is P1.09
+    _FL_DEFPIN(14,  4, 0); // D14 is P0.04 -- A0
+    _FL_DEFPIN(15,  5, 0); // D15 is P0.05 -- A1
 
     // Arduino pins 16..23
-    _DEFPIN_ARM(16, 0, 30); // D16 is P0.30 -- A2
-    _DEFPIN_ARM(17, 0, 28); // D17 is P0.28 -- A3
-    _DEFPIN_ARM(18, 0,  2); // D18 is P0.02 -- A4
-    _DEFPIN_ARM(19, 0,  3); // D19 is P0.03 -- A5
-    //_DEFPIN_ARM(20, 0, 29); // D20 is P0.29 -- A6 -- Connected to battery!
-    //_DEFPIN_ARM(21, 0, 31); // D21 is P0.31 -- A7 -- AREF
-    _DEFPIN_ARM(22, 0, 12); // D22 is P0.12 -- SDA
-    _DEFPIN_ARM(23, 0, 11); // D23 is P0.11 -- SCL
+    _FL_DEFPIN(16, 30, 0); // D16 is P0.30 -- A2
+    _FL_DEFPIN(17, 28, 0); // D17 is P0.28 -- A3
+    _FL_DEFPIN(18,  2, 0); // D18 is P0.02 -- A4
+    _FL_DEFPIN(19,  3, 0); // D19 is P0.03 -- A5
+    //_FL_DEFPIN(20, 29, 0); // D20 is P0.29 -- A6 -- Connected to battery!
+    //_FL_DEFPIN(21, 31, 0); // D21 is P0.31 -- A7 -- AREF
+    _FL_DEFPIN(22, 12, 0); // D22 is P0.12 -- SDA
+    _FL_DEFPIN(23, 11, 0); // D23 is P0.11 -- SCL
 
     // Arduino pins 24..31
-    _DEFPIN_ARM(24, 0, 15); // D24 is P0.15 -- PIN_SPI_MISO
-    _DEFPIN_ARM(25, 0, 13); // D25 is P0.13 -- PIN_SPI_MOSI
-    _DEFPIN_ARM(26, 0, 14); // D26 is P0.14 -- PIN_SPI_SCK
-    //_DEFPIN_ARM(27, 0, 19); // D27 is P0.19 -- PIN_QSPI_SCK
-    //_DEFPIN_ARM(28, 0, 20); // D28 is P0.20 -- PIN_QSPI_CS
-    //_DEFPIN_ARM(29, 0, 17); // D29 is P0.17 -- PIN_QSPI_DATA0
-    //_DEFPIN_ARM(30, 0, 22); // D30 is P0.22 -- PIN_QSPI_DATA1
-    //_DEFPIN_ARM(31, 0, 23); // D31 is P0.23 -- PIN_QSPI_DATA2
+    _FL_DEFPIN(24, 15, 0); // D24 is P0.15 -- PIN_SPI_MISO
+    _FL_DEFPIN(25, 13, 0); // D25 is P0.13 -- PIN_SPI_MOSI
+    _FL_DEFPIN(26, 14, 0); // D26 is P0.14 -- PIN_SPI_SCK
+    //_FL_DEFPIN(27, 19, 0); // D27 is P0.19 -- PIN_QSPI_SCK
+    //_FL_DEFPIN(28, 20, 0); // D28 is P0.20 -- PIN_QSPI_CS
+    //_FL_DEFPIN(29, 17, 0); // D29 is P0.17 -- PIN_QSPI_DATA0
+    //_FL_DEFPIN(30, 22, 0); // D30 is P0.22 -- PIN_QSPI_DATA1
+    //_FL_DEFPIN(31, 23, 0); // D31 is P0.23 -- PIN_QSPI_DATA2
 
     // Arduino pins 32..34
-    //_DEFPIN_ARM(32, 0, 21); // D32 is P0.21 -- PIN_QSPI_DATA3
-    //_DEFPIN_ARM(33, 0,  9); // D33 is NFC1, only accessible via test point
+    //_FL_DEFPIN(32, 21, 0); // D32 is P0.21 -- PIN_QSPI_DATA3
+    //_FL_DEFPIN(33,  9, 0); // D33 is NFC1, only accessible via test point
 #endif // defined (ARDUINO_NRF52840_FEATHER)
 
 // Adafruit Bluefruit nRF52840 Metro Express
@@ -113,46 +113,46 @@
     #endif
     #warning "Adafruit Bluefruit nRF52840 Metro Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
 
-    _DEFPIN_ARM( 0, 0, 25); // D0  is P0.25 (UART TX)
-    _DEFPIN_ARM( 1, 0, 24); // D1  is P0.24 (UART RX)
-    _DEFPIN_ARM( 2, 1, 10); // D2  is P1.10 
-    _DEFPIN_ARM( 3, 1,  4); // D3  is P1.04 
-    _DEFPIN_ARM( 4, 1, 11); // D4  is P1.11 
-    _DEFPIN_ARM( 5, 1, 12); // D5  is P1.12 
-    _DEFPIN_ARM( 6, 1, 14); // D6  is P1.14
-    _DEFPIN_ARM( 7, 0, 26); // D7  is P0.26
-    _DEFPIN_ARM( 8, 0, 27); // D8  is P0.27
-    _DEFPIN_ARM( 9, 0, 12); // D9  is P0.12
-    _DEFPIN_ARM(10, 0,  6); // D10 is P0.06 
-    _DEFPIN_ARM(11, 0,  8); // D11 is P0.08 
-    _DEFPIN_ARM(12, 1,  9); // D12 is P1.09 
-    _DEFPIN_ARM(13, 0, 14); // D13 is P0.14 
-    _DEFPIN_ARM(14, 0,  4); // D14 is P0.04 (A0)
-    _DEFPIN_ARM(15, 0,  5); // D15 is P0.05 (A1)
-    _DEFPIN_ARM(16, 0, 28); // D16 is P0.28 (A2)
-    _DEFPIN_ARM(17, 0, 30); // D17 is P0.30 (A3)
-    _DEFPIN_ARM(18, 0,  2); // D18 is P0.02 (A4)
-    _DEFPIN_ARM(19, 0,  3); // D19 is P0.03 (A5)
-    _DEFPIN_ARM(20, 0, 29); // D20 is P0.29 (A6, battery)
-    _DEFPIN_ARM(21, 0, 31); // D21 is P0.31 (A7, ARef)
-    _DEFPIN_ARM(22, 0, 15); // D22 is P0.15 (SDA)
-    _DEFPIN_ARM(23, 0, 16); // D23 is P0.16 (SCL)
-    _DEFPIN_ARM(24, 0, 11); // D24 is P0.11 (SPI MISO)
-    _DEFPIN_ARM(25, 1,  8); // D25 is P1.08 (SPI MOSI)
-    _DEFPIN_ARM(26, 0,  7); // D26 is P0.07 (SPI SCK )
-    //_DEFPIN_ARM(27, 0, 19); // D27 is P0.19 (QSPI CLK   )
-    //_DEFPIN_ARM(28, 0, 20); // D28 is P0.20 (QSPI CS    )
-    //_DEFPIN_ARM(29, 0, 17); // D29 is P0.17 (QSPI Data 0)
-    //_DEFPIN_ARM(30, 0, 23); // D30 is P0.23 (QSPI Data 1)
-    //_DEFPIN_ARM(31, 0, 22); // D31 is P0.22 (QSPI Data 2)
-    //_DEFPIN_ARM(32, 0, 21); // D32 is P0.21 (QSPI Data 3)
-    _DEFPIN_ARM(33, 1, 13); // D33 is P1.13 LED1
-    _DEFPIN_ARM(34, 1, 15); // D34 is P1.15 LED2
-    _DEFPIN_ARM(35, 0, 13); // D35 is P0.13 NeoPixel
-    _DEFPIN_ARM(36, 1,  0); // D36 is P1.02 Switch
-    _DEFPIN_ARM(37, 1,  0); // D37 is P1.00 SWO/DFU
-    _DEFPIN_ARM(38, 0,  9); // D38 is P0.09 NFC1
-    _DEFPIN_ARM(39, 0, 10); // D39 is P0.10 NFC2
+    _FL_DEFPIN( 0, 25, 0); // D0  is P0.25 (UART TX)
+    _FL_DEFPIN( 1, 24, 0); // D1  is P0.24 (UART RX)
+    _FL_DEFPIN( 2, 10, 1); // D2  is P1.10 
+    _FL_DEFPIN( 3,  4, 1); // D3  is P1.04 
+    _FL_DEFPIN( 4, 11, 1); // D4  is P1.11 
+    _FL_DEFPIN( 5, 12, 1); // D5  is P1.12 
+    _FL_DEFPIN( 6, 14, 1); // D6  is P1.14
+    _FL_DEFPIN( 7, 26, 0); // D7  is P0.26
+    _FL_DEFPIN( 8, 27, 0); // D8  is P0.27
+    _FL_DEFPIN( 9, 12, 0); // D9  is P0.12
+    _FL_DEFPIN(10,  6, 0); // D10 is P0.06 
+    _FL_DEFPIN(11,  8, 0); // D11 is P0.08 
+    _FL_DEFPIN(12,  9, 1); // D12 is P1.09 
+    _FL_DEFPIN(13, 14, 0); // D13 is P0.14 
+    _FL_DEFPIN(14,  4, 0); // D14 is P0.04 (A0)
+    _FL_DEFPIN(15,  5, 0); // D15 is P0.05 (A1)
+    _FL_DEFPIN(16, 28, 0); // D16 is P0.28 (A2)
+    _FL_DEFPIN(17, 30, 0); // D17 is P0.30 (A3)
+    _FL_DEFPIN(18,  2, 0); // D18 is P0.02 (A4)
+    _FL_DEFPIN(19,  3, 0); // D19 is P0.03 (A5)
+    _FL_DEFPIN(20, 29, 0); // D20 is P0.29 (A6, battery)
+    _FL_DEFPIN(21, 31, 0); // D21 is P0.31 (A7, ARef)
+    _FL_DEFPIN(22, 15, 0); // D22 is P0.15 (SDA)
+    _FL_DEFPIN(23, 16, 0); // D23 is P0.16 (SCL)
+    _FL_DEFPIN(24, 11, 0); // D24 is P0.11 (SPI MISO)
+    _FL_DEFPIN(25,  8, 1); // D25 is P1.08 (SPI MOSI)
+    _FL_DEFPIN(26,  7, 0); // D26 is P0.07 (SPI SCK )
+    //_FL_DEFPIN(27, 19, 0); // D27 is P0.19 (QSPI CLK   )
+    //_FL_DEFPIN(28, 20, 0); // D28 is P0.20 (QSPI CS    )
+    //_FL_DEFPIN(29, 17, 0); // D29 is P0.17 (QSPI Data 0)
+    //_FL_DEFPIN(30, 23, 0); // D30 is P0.23 (QSPI Data 1)
+    //_FL_DEFPIN(31, 22, 0); // D31 is P0.22 (QSPI Data 2)
+    //_FL_DEFPIN(32, 21, 0); // D32 is P0.21 (QSPI Data 3)
+    _FL_DEFPIN(33, 13, 1); // D33 is P1.13 LED1
+    _FL_DEFPIN(34, 15, 1); // D34 is P1.15 LED2
+    _FL_DEFPIN(35, 13, 0); // D35 is P0.13 NeoPixel
+    _FL_DEFPIN(36,  0, 1); // D36 is P1.02 Switch
+    _FL_DEFPIN(37,  0, 1); // D37 is P1.00 SWO/DFU
+    _FL_DEFPIN(38,  9, 0); // D38 is P0.09 NFC1
+    _FL_DEFPIN(39, 10, 0); // D39 is P0.10 NFC2
 #endif // defined (ARDUINO_NRF52840_METRO)
 
 // Adafruit Bluefruit on nRF52840DK PCA10056
@@ -169,52 +169,52 @@
         /* pca10056_schematic_and_pcb.pdf
            Page 3 shows the Arduino Pin to GPIO Px.xx mapping
         */
-        _DEFPIN_ARM( 0, 1,  1); // D0  is P1.01 
-        _DEFPIN_ARM( 1, 1,  2); // D1  is P1.02 
-        _DEFPIN_ARM( 2, 1,  3); // D2  is P1.03
-        _DEFPIN_ARM( 3, 1,  4); // D3  is P1.04 
-        _DEFPIN_ARM( 4, 1,  5); // D4  is P1.05 
-        _DEFPIN_ARM( 5, 1,  6); // D5  is P1.06 
-        _DEFPIN_ARM( 6, 1,  7); // D6  is P1.07 (BUTTON1 option)
-        _DEFPIN_ARM( 7, 1,  8); // D7  is P1.08 (BUTTON2 option)
-        _DEFPIN_ARM( 8, 1, 10); // D8  is P1.10 
-        _DEFPIN_ARM( 9, 1, 11); // D9  is P1.11 
-        _DEFPIN_ARM(10, 1, 12); // D10 is P1.12 
-        _DEFPIN_ARM(11, 1, 13); // D11 is P1.13 
-        _DEFPIN_ARM(12, 1, 14); // D12 is P1.14
-        _DEFPIN_ARM(13, 1, 15); // D13 is P1.15 
-        _DEFPIN_ARM(14, 0,  0); // D14 is P0.00 (if SB4 bridged)
-        _DEFPIN_ARM(15, 0,  1); // D15 is P0.01 (if SB3 bridged)
-        _DEFPIN_ARM(16, 0,  5); // D16 is P0.05 (aka AIN3, aka UART RTS)
-        _DEFPIN_ARM(17, 0,  6); // D17 is P0.06 (UART TxD)
-        _DEFPIN_ARM(18, 0,  7); // D18 is P0.07 (UART CTS default)
-        _DEFPIN_ARM(19, 0,  8); // D19 is P0.08 (UART RxD)
-        _DEFPIN_ARM(20, 0,  9); // D20 is P0.09 (NFC1)
-        _DEFPIN_ARM(21, 0, 10); // D21 is P0.10 (NFC2)
-        _DEFPIN_ARM(22, 0, 11); // D22 is P0.11 (TRACEDATA2 / BUTTON1 default)
-        _DEFPIN_ARM(23, 0, 12); // D23 is P0.12 (TRACEDATA1 / BUTTON2 default)
-        _DEFPIN_ARM(24, 0, 13); // D24 is P0.13 (LED1)
-        _DEFPIN_ARM(25, 0, 14); // D25 is P0.14 (LED2)
-        _DEFPIN_ARM(26, 0, 15); // D26 is P0.15 (LED3)
-        _DEFPIN_ARM(27, 0, 16); // D27 is P0.16 (LED4)
-        _DEFPIN_ARM(28, 0, 17); // D28 is P0.17 (QSPI !CS , unless SB13 cut)
-        // _DEFPIN_ARM(29, 0, 18); // D29 is P0.18 (RESET)
-        _DEFPIN_ARM(30, 0, 19); // D30 is P0.19 (QSPI CLK , unless SB11 cut)
-        _DEFPIN_ARM(31, 0, 20); // D31 is P0.20 (QSPI DIO0, unless SB12 cut)
-        _DEFPIN_ARM(32, 0, 21); // D32 is P0.21 (QSPI DIO1, unless SB14 cut)
-        _DEFPIN_ARM(33, 0, 22); // D33 is P0.22 (QSPI DIO2, unless SB15 cut)
-        _DEFPIN_ARM(34, 0, 23); // D34 is P0.23 (QSPI DIO3, unless SB10 cut)
-        _DEFPIN_ARM(35, 0, 24); // D35 is P0.24 (BUTTON3)
-        _DEFPIN_ARM(36, 0, 25); // D36 is P0.25 (BUTTON4)
-        _DEFPIN_ARM(37, 1, 00); // D37 is P1.00 (TRACEDATA0 / SWO)
-        _DEFPIN_ARM(38, 1, 09); // D38 is P1.09 (TRACEDATA3)
-        //_DEFPIN_ARM(??, 0,  2); // D?? is P0.02 (AREF, aka AIN0)
-        //_DEFPIN_ARM(??, 0,  3); // D?? is P0.03 (A0,   aka AIN1)
-        //_DEFPIN_ARM(??, 0,  4); // D?? is P0.04 (A1,   aka AIN2, aka UART CTS option)
-        //_DEFPIN_ARM(??, 0, 28); // D?? is P0.28 (A2,   aka AIN4)
-        //_DEFPIN_ARM(??, 0, 29); // D?? is P0.29 (A3,   aka AIN5)
-        //_DEFPIN_ARM(??, 0, 30); // D?? is P0.30 (A4,   aka AIN6)
-        //_DEFPIN_ARM(??, 0, 31); // D?? is P0.31 (A5,   aka AIN7)
+        _FL_DEFPIN( 0,  1, 1); // D0  is P1.01 
+        _FL_DEFPIN( 1,  2, 1); // D1  is P1.02 
+        _FL_DEFPIN( 2,  3, 1); // D2  is P1.03
+        _FL_DEFPIN( 3,  4, 1); // D3  is P1.04 
+        _FL_DEFPIN( 4,  5, 1); // D4  is P1.05 
+        _FL_DEFPIN( 5,  6, 1); // D5  is P1.06 
+        _FL_DEFPIN( 6,  7, 1); // D6  is P1.07 (BUTTON1 option)
+        _FL_DEFPIN( 7,  8, 1); // D7  is P1.08 (BUTTON2 option)
+        _FL_DEFPIN( 8, 10, 1); // D8  is P1.10 
+        _FL_DEFPIN( 9, 11, 1); // D9  is P1.11 
+        _FL_DEFPIN(10, 12, 1); // D10 is P1.12 
+        _FL_DEFPIN(11, 13, 1); // D11 is P1.13 
+        _FL_DEFPIN(12, 14, 1); // D12 is P1.14
+        _FL_DEFPIN(13, 15, 1); // D13 is P1.15 
+        _FL_DEFPIN(14,  0, 0); // D14 is P0.00 (if SB4 bridged)
+        _FL_DEFPIN(15,  1, 0); // D15 is P0.01 (if SB3 bridged)
+        _FL_DEFPIN(16,  5, 0); // D16 is P0.05 (aka AIN3, aka UART RTS)
+        _FL_DEFPIN(17,  6, 0); // D17 is P0.06 (UART TxD)
+        _FL_DEFPIN(18,  7, 0); // D18 is P0.07 (UART CTS default)
+        _FL_DEFPIN(19,  8, 0); // D19 is P0.08 (UART RxD)
+        _FL_DEFPIN(20,  9, 0); // D20 is P0.09 (NFC1)
+        _FL_DEFPIN(21, 10, 0); // D21 is P0.10 (NFC2)
+        _FL_DEFPIN(22, 11, 0); // D22 is P0.11 (TRACEDATA2 / BUTTON1 default)
+        _FL_DEFPIN(23, 12, 0); // D23 is P0.12 (TRACEDATA1 / BUTTON2 default)
+        _FL_DEFPIN(24, 13, 0); // D24 is P0.13 (LED1)
+        _FL_DEFPIN(25, 14, 0); // D25 is P0.14 (LED2)
+        _FL_DEFPIN(26, 15, 0); // D26 is P0.15 (LED3)
+        _FL_DEFPIN(27, 16, 0); // D27 is P0.16 (LED4)
+        _FL_DEFPIN(28, 17, 0); // D28 is P0.17 (QSPI !CS , unless SB13 cut)
+        // _FL_DEFPIN(29, 18, 0); // D29 is P0.18 (RESET)
+        _FL_DEFPIN(30, 19, 0); // D30 is P0.19 (QSPI CLK , unless SB11 cut)
+        _FL_DEFPIN(31, 20, 0); // D31 is P0.20 (QSPI DIO0, unless SB12 cut)
+        _FL_DEFPIN(32, 21, 0); // D32 is P0.21 (QSPI DIO1, unless SB14 cut)
+        _FL_DEFPIN(33, 22, 0); // D33 is P0.22 (QSPI DIO2, unless SB15 cut)
+        _FL_DEFPIN(34, 23, 0); // D34 is P0.23 (QSPI DIO3, unless SB10 cut)
+        _FL_DEFPIN(35, 24, 0); // D35 is P0.24 (BUTTON3)
+        _FL_DEFPIN(36, 25, 0); // D36 is P0.25 (BUTTON4)
+        _FL_DEFPIN(37, 00, 1); // D37 is P1.00 (TRACEDATA0 / SWO)
+        _FL_DEFPIN(38, 09, 1); // D38 is P1.09 (TRACEDATA3)
+        //_FL_DEFPIN(??,  2, 0); // D?? is P0.02 (AREF, aka AIN0)
+        //_FL_DEFPIN(??,  3, 0); // D?? is P0.03 (A0,   aka AIN1)
+        //_FL_DEFPIN(??,  4, 0); // D?? is P0.04 (A1,   aka AIN2, aka UART CTS option)
+        //_FL_DEFPIN(??, 28, 0); // D?? is P0.28 (A2,   aka AIN4)
+        //_FL_DEFPIN(??, 29, 0); // D?? is P0.29 (A3,   aka AIN5)
+        //_FL_DEFPIN(??, 30, 0); // D?? is P0.30 (A4,   aka AIN6)
+        //_FL_DEFPIN(??, 31, 0); // D?? is P0.31 (A5,   aka AIN7)
 
     #else
         /* 48 pins, defined using natural mapping in Adafruit's variant.cpp (!) */
@@ -279,33 +279,33 @@
     #endif
     #warning "Electronut labs bluey is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
 
-    _DEFPIN_ARM( 0, 0, 26); // D0  is P0.26
-    _DEFPIN_ARM( 1, 0, 27); // D1  is P0.27
-    _DEFPIN_ARM( 2, 0, 22); // D2  is P0.22 (SPI SS  )
-    _DEFPIN_ARM( 3, 0, 23); // D3  is P0.23 (SPI MOSI)
-    _DEFPIN_ARM( 4, 0, 24); // D4  is P0.24 (SPI MISO, also A3)
-    _DEFPIN_ARM( 5, 0, 25); // D5  is P0.25 (SPI SCK )
-    _DEFPIN_ARM( 6, 0, 16); // D6  is P0.16 (Button)
-    _DEFPIN_ARM( 7, 0, 19); // D7  is P0.19 (R)
-    _DEFPIN_ARM( 8, 0, 18); // D8  is P0.18 (G)
-    _DEFPIN_ARM( 9, 0, 17); // D9  is P0.17 (B)
-    _DEFPIN_ARM(10, 0, 11); // D10 is P0.11 (SCL)
-    _DEFPIN_ARM(11, 0, 12); // D11 is P0.12 (DRDYn)
-    _DEFPIN_ARM(12, 0, 13); // D12 is P0.13 (SDA)
-    _DEFPIN_ARM(13, 0, 14); // D13 is P0.17 (INT)
-    _DEFPIN_ARM(14, 0, 15); // D14 is P0.15 (INT1)
-    _DEFPIN_ARM(15, 0, 20); // D15 is P0.20 (INT2)
-    _DEFPIN_ARM(16, 0,  2); // D16 is P0.02 (A0)
-    _DEFPIN_ARM(17, 0,  3); // D17 is P0.03 (A1)
-    _DEFPIN_ARM(18, 0,  4); // D18 is P0.04 (A2)
-    _DEFPIN_ARM(19, 0, 24); // D19 is P0.24 (A3, also D4/SPI MISO) -- is this right?
-    _DEFPIN_ARM(20, 0, 29); // D20 is P0.29 (A4)
-    _DEFPIN_ARM(21, 0, 30); // D21 is P0.30 (A5)
-    _DEFPIN_ARM(22, 0, 31); // D22 is P0.31 (A6)
-    _DEFPIN_ARM(23, 0,  8); // D23 is P0.08 (RX)
-    _DEFPIN_ARM(24, 0,  6); // D24 is P0.06 (TX)
-    _DEFPIN_ARM(25, 0,  5); // D25 is P0.05 (RTS)
-    _DEFPIN_ARM(26, 0,  7); // D26 is P0.07 (CTS)
+    _FL_DEFPIN( 0, 26, 0); // D0  is P0.26
+    _FL_DEFPIN( 1, 27, 0); // D1  is P0.27
+    _FL_DEFPIN( 2, 22, 0); // D2  is P0.22 (SPI SS  )
+    _FL_DEFPIN( 3, 23, 0); // D3  is P0.23 (SPI MOSI)
+    _FL_DEFPIN( 4, 24, 0); // D4  is P0.24 (SPI MISO, also A3)
+    _FL_DEFPIN( 5, 25, 0); // D5  is P0.25 (SPI SCK )
+    _FL_DEFPIN( 6, 16, 0); // D6  is P0.16 (Button)
+    _FL_DEFPIN( 7, 19, 0); // D7  is P0.19 (R)
+    _FL_DEFPIN( 8, 18, 0); // D8  is P0.18 (G)
+    _FL_DEFPIN( 9, 17, 0); // D9  is P0.17 (B)
+    _FL_DEFPIN(10, 11, 0); // D10 is P0.11 (SCL)
+    _FL_DEFPIN(11, 12, 0); // D11 is P0.12 (DRDYn)
+    _FL_DEFPIN(12, 13, 0); // D12 is P0.13 (SDA)
+    _FL_DEFPIN(13, 14, 0); // D13 is P0.17 (INT)
+    _FL_DEFPIN(14, 15, 0); // D14 is P0.15 (INT1)
+    _FL_DEFPIN(15, 20, 0); // D15 is P0.20 (INT2)
+    _FL_DEFPIN(16,  2, 0); // D16 is P0.02 (A0)
+    _FL_DEFPIN(17,  3, 0); // D17 is P0.03 (A1)
+    _FL_DEFPIN(18,  4, 0); // D18 is P0.04 (A2)
+    _FL_DEFPIN(19, 24, 0); // D19 is P0.24 (A3, also D4/SPI MISO) -- is this right?
+    _FL_DEFPIN(20, 29, 0); // D20 is P0.29 (A4)
+    _FL_DEFPIN(21, 30, 0); // D21 is P0.30 (A5)
+    _FL_DEFPIN(22, 31, 0); // D22 is P0.31 (A6)
+    _FL_DEFPIN(23,  8, 0); // D23 is P0.08 (RX)
+    _FL_DEFPIN(24,  6, 0); // D24 is P0.06 (TX)
+    _FL_DEFPIN(25,  5, 0); // D25 is P0.05 (RTS)
+    _FL_DEFPIN(26,  7, 0); // D26 is P0.07 (CTS)
 #endif // defined(ARDUINO_ELECTRONUT_BLUEY)
 
 // Electronut labs hackaBLE
@@ -317,33 +317,33 @@
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
     #warning "Electronut labs hackaBLE is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-    _DEFPIN_ARM( 0, 0, 14); // D0  is P0.14 (RX)
-    _DEFPIN_ARM( 1, 0, 13); // D1  is P0.13 (TX)
-    _DEFPIN_ARM( 2, 0, 12); // D2  is P0.12
-    _DEFPIN_ARM( 3, 0, 11); // D3  is P0.11 (SPI MOSI)
-    _DEFPIN_ARM( 4, 0,  8); // D4  is P0.08 (SPI MISO)
-    _DEFPIN_ARM( 5, 0,  7); // D5  is P0.07 (SPI SCK )
-    _DEFPIN_ARM( 6, 0,  6); // D6  is P0.06
-    _DEFPIN_ARM( 7, 0, 27); // D7  is P0.27
-    _DEFPIN_ARM( 8, 0, 26); // D8  is P0.26
-    _DEFPIN_ARM( 9, 0, 25); // D9  is P0.25
-    _DEFPIN_ARM(10, 0,  5); // D10 is P0.05 (A3)
-    _DEFPIN_ARM(11, 0,  4); // D11 is P0.04 (A2)
-    _DEFPIN_ARM(12, 0,  3); // D12 is P0.03 (A1)
-    _DEFPIN_ARM(13, 0,  2); // D13 is P0.02 (A0 / AREF)
-    _DEFPIN_ARM(14, 0, 23); // D14 is P0.23
-    _DEFPIN_ARM(15, 0, 22); // D15 is P0.22
-    _DEFPIN_ARM(16, 0, 18); // D16 is P0.18
-    _DEFPIN_ARM(17, 0, 16); // D17 is P0.16
-    _DEFPIN_ARM(18, 0, 15); // D18 is P0.15
-    _DEFPIN_ARM(19, 0, 24); // D19 is P0.24
-    _DEFPIN_ARM(20, 0, 28); // D20 is P0.28 (A4)
-    _DEFPIN_ARM(21, 0, 29); // D21 is P0.29 (A5)
-    _DEFPIN_ARM(22, 0, 30); // D22 is P0.30 (A6)
-    _DEFPIN_ARM(23, 0, 31); // D23 is P0.31 (A7)
-    _DEFPIN_ARM(24, 0, 19); // D24 is P0.19 (RED LED)
-    _DEFPIN_ARM(25, 0, 20); // D25 is P0.20 (GREEN LED)
-    _DEFPIN_ARM(26, 0, 17); // D26 is P0.17 (BLUE LED)
+    _FL_DEFPIN( 0, 14, 0); // D0  is P0.14 (RX)
+    _FL_DEFPIN( 1, 13, 0); // D1  is P0.13 (TX)
+    _FL_DEFPIN( 2, 12, 0); // D2  is P0.12
+    _FL_DEFPIN( 3, 11, 0); // D3  is P0.11 (SPI MOSI)
+    _FL_DEFPIN( 4,  8, 0); // D4  is P0.08 (SPI MISO)
+    _FL_DEFPIN( 5,  7, 0); // D5  is P0.07 (SPI SCK )
+    _FL_DEFPIN( 6,  6, 0); // D6  is P0.06
+    _FL_DEFPIN( 7, 27, 0); // D7  is P0.27
+    _FL_DEFPIN( 8, 26, 0); // D8  is P0.26
+    _FL_DEFPIN( 9, 25, 0); // D9  is P0.25
+    _FL_DEFPIN(10,  5, 0); // D10 is P0.05 (A3)
+    _FL_DEFPIN(11,  4, 0); // D11 is P0.04 (A2)
+    _FL_DEFPIN(12,  3, 0); // D12 is P0.03 (A1)
+    _FL_DEFPIN(13,  2, 0); // D13 is P0.02 (A0 / AREF)
+    _FL_DEFPIN(14, 23, 0); // D14 is P0.23
+    _FL_DEFPIN(15, 22, 0); // D15 is P0.22
+    _FL_DEFPIN(16, 18, 0); // D16 is P0.18
+    _FL_DEFPIN(17, 16, 0); // D17 is P0.16
+    _FL_DEFPIN(18, 15, 0); // D18 is P0.15
+    _FL_DEFPIN(19, 24, 0); // D19 is P0.24
+    _FL_DEFPIN(20, 28, 0); // D20 is P0.28 (A4)
+    _FL_DEFPIN(21, 29, 0); // D21 is P0.29 (A5)
+    _FL_DEFPIN(22, 30, 0); // D22 is P0.30 (A6)
+    _FL_DEFPIN(23, 31, 0); // D23 is P0.31 (A7)
+    _FL_DEFPIN(24, 19, 0); // D24 is P0.19 (RED LED)
+    _FL_DEFPIN(25, 20, 0); // D25 is P0.20 (GREEN LED)
+    _FL_DEFPIN(26, 17, 0); // D26 is P0.17 (BLUE LED)
 #endif // defined(ARDUINO_ELECTRONUT_HACKABLE)
 
 // Electronut labs hackaBLE_v2
@@ -399,31 +399,31 @@
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
     #warning "RedBear Blend 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-    _DEFPIN_ARM( 0, 0, 11); // D0  is P0.11
-    _DEFPIN_ARM( 1, 0, 12); // D1  is P0.12
-    _DEFPIN_ARM( 2, 0, 13); // D2  is P0.13
-    _DEFPIN_ARM( 3, 0, 14); // D3  is P0.14
-    _DEFPIN_ARM( 4, 0, 15); // D4  is P0.15
-    _DEFPIN_ARM( 5, 0, 16); // D5  is P0.16
-    _DEFPIN_ARM( 6, 0, 17); // D6  is P0.17
-    _DEFPIN_ARM( 7, 0, 18); // D7  is P0.18
-    _DEFPIN_ARM( 8, 0, 19); // D8  is P0.19
-    _DEFPIN_ARM( 9, 0, 20); // D9  is P0.20
-    _DEFPIN_ARM(10, 0, 22); // D10 is P0.22 (SPI SS  )
-    _DEFPIN_ARM(11, 0, 23); // D11 is P0.23 (SPI MOSI)
-    _DEFPIN_ARM(12, 0, 24); // D12 is P0.24 (SPI MISO)
-    _DEFPIN_ARM(13, 0, 25); // D13 is P0.25 (SPI SCK / LED)
-    _DEFPIN_ARM(14, 0,  3); // D14 is P0.03 (A0)
-    _DEFPIN_ARM(15, 0,  4); // D15 is P0.04 (A1)
-    _DEFPIN_ARM(16, 0, 28); // D16 is P0.28 (A2)
-    _DEFPIN_ARM(17, 0, 29); // D17 is P0.29 (A3)
-    _DEFPIN_ARM(18, 0, 30); // D18 is P0.30 (A4)
-    _DEFPIN_ARM(19, 0, 31); // D19 is P0.31 (A5)
-    _DEFPIN_ARM(20, 0, 26); // D20 is P0.26 (SDA)
-    _DEFPIN_ARM(21, 0, 27); // D21 is P0.27 (SCL)
-    _DEFPIN_ARM(22, 0,  8); // D22 is P0.08 (RX)
-    _DEFPIN_ARM(23, 0,  6); // D23 is P0.06 (TX)
-    _DEFPIN_ARM(24, 0,  2); // D24 is P0.02 (AREF)
+    _FL_DEFPIN( 0, 11, 0); // D0  is P0.11
+    _FL_DEFPIN( 1, 12, 0); // D1  is P0.12
+    _FL_DEFPIN( 2, 13, 0); // D2  is P0.13
+    _FL_DEFPIN( 3, 14, 0); // D3  is P0.14
+    _FL_DEFPIN( 4, 15, 0); // D4  is P0.15
+    _FL_DEFPIN( 5, 16, 0); // D5  is P0.16
+    _FL_DEFPIN( 6, 17, 0); // D6  is P0.17
+    _FL_DEFPIN( 7, 18, 0); // D7  is P0.18
+    _FL_DEFPIN( 8, 19, 0); // D8  is P0.19
+    _FL_DEFPIN( 9, 20, 0); // D9  is P0.20
+    _FL_DEFPIN(10, 22, 0); // D10 is P0.22 (SPI SS  )
+    _FL_DEFPIN(11, 23, 0); // D11 is P0.23 (SPI MOSI)
+    _FL_DEFPIN(12, 24, 0); // D12 is P0.24 (SPI MISO)
+    _FL_DEFPIN(13, 25, 0); // D13 is P0.25 (SPI SCK / LED)
+    _FL_DEFPIN(14,  3, 0); // D14 is P0.03 (A0)
+    _FL_DEFPIN(15,  4, 0); // D15 is P0.04 (A1)
+    _FL_DEFPIN(16, 28, 0); // D16 is P0.28 (A2)
+    _FL_DEFPIN(17, 29, 0); // D17 is P0.29 (A3)
+    _FL_DEFPIN(18, 30, 0); // D18 is P0.30 (A4)
+    _FL_DEFPIN(19, 31, 0); // D19 is P0.31 (A5)
+    _FL_DEFPIN(20, 26, 0); // D20 is P0.26 (SDA)
+    _FL_DEFPIN(21, 27, 0); // D21 is P0.27 (SCL)
+    _FL_DEFPIN(22,  8, 0); // D22 is P0.08 (RX)
+    _FL_DEFPIN(23,  6, 0); // D23 is P0.06 (TX)
+    _FL_DEFPIN(24,  2, 0); // D24 is P0.02 (AREF)
 #endif // defined(ARDUINO_RB_BLEND_2)
 
 // RedBear BLE Nano 2
@@ -435,18 +435,18 @@
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
     #warning "RedBear BLE Nano 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-    _DEFPIN_ARM( 0, 0, 30); // D0  is P0.30 (A0 / RX)
-    _DEFPIN_ARM( 1, 0, 29); // D1  is P0.29 (A1 / TX)
-    _DEFPIN_ARM( 2, 0, 28); // D2  is P0.28 (A2 / SDA)
-    _DEFPIN_ARM( 3, 0,  2); // D3  is P0.02 (A3 / SCL)
-    _DEFPIN_ARM( 4, 0,  5); // D4  is P0.05 (A4)
-    _DEFPIN_ARM( 5, 0,  4); // D5  is P0.04 (A5)
-    _DEFPIN_ARM( 6, 0,  3); // D6  is P0.03 (SPI SS  )
-    _DEFPIN_ARM( 7, 0,  6); // D7  is P0.06 (SPI MOSI)
-    _DEFPIN_ARM( 8, 0,  7); // D8  is P0.07 (SPI MISO)
-    _DEFPIN_ARM( 9, 0,  8); // D9  is P0.08 (SPI SCK )
-    // _DEFPIN_ARM(10, 0, 21); // D10 is P0.21 (RESET)
-    _DEFPIN_ARM(13, 0, 11); // D11 is P0.11 (LED)
+    _FL_DEFPIN( 0, 30, 0); // D0  is P0.30 (A0 / RX)
+    _FL_DEFPIN( 1, 29, 0); // D1  is P0.29 (A1 / TX)
+    _FL_DEFPIN( 2, 28, 0); // D2  is P0.28 (A2 / SDA)
+    _FL_DEFPIN( 3,  2, 0); // D3  is P0.02 (A3 / SCL)
+    _FL_DEFPIN( 4,  5, 0); // D4  is P0.05 (A4)
+    _FL_DEFPIN( 5,  4, 0); // D5  is P0.04 (A5)
+    _FL_DEFPIN( 6,  3, 0); // D6  is P0.03 (SPI SS  )
+    _FL_DEFPIN( 7,  6, 0); // D7  is P0.06 (SPI MOSI)
+    _FL_DEFPIN( 8,  7, 0); // D8  is P0.07 (SPI MISO)
+    _FL_DEFPIN( 9,  8, 0); // D9  is P0.08 (SPI SCK )
+    // _FL_DEFPIN(10, 21, 0); // D10 is P0.21 (RESET)
+    _FL_DEFPIN(13, 11, 0); // D11 is P0.11 (LED)
 #endif // defined(ARDUINO_RB_BLE_NANO_2)
 
 // Nordic Semiconductor nRF52 DK
@@ -458,32 +458,32 @@
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
     #warning "Nordic Semiconductor nRF52 DK is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-    _DEFPIN_ARM( 0, 0, 11); // D0  is P0.11
-    _DEFPIN_ARM( 1, 0, 12); // D1  is P0.12
-    _DEFPIN_ARM( 2, 0, 13); // D2  is P0.13 (BUTTON1)
-    _DEFPIN_ARM( 3, 0, 14); // D3  is P0.14 (BUTTON2)
-    _DEFPIN_ARM( 4, 0, 15); // D4  is P0.15 (BUTTON3)
-    _DEFPIN_ARM( 5, 0, 16); // D5  is P0.16 (BUTTON4)
-    _DEFPIN_ARM( 6, 0, 17); // D6  is P0.17 (LED1)
-    _DEFPIN_ARM( 7, 0, 18); // D7  is P0.18 (LED2)
-    _DEFPIN_ARM( 8, 0, 19); // D8  is P0.19 (LED3)
-    _DEFPIN_ARM( 9, 0, 20); // D9  is P0.20 (LED4)
-    _DEFPIN_ARM(10, 0, 22); // D10 is P0.22 (SPI SS  )
-    _DEFPIN_ARM(11, 0, 23); // D11 is P0.23 (SPI MOSI)
-    _DEFPIN_ARM(12, 0, 24); // D12 is P0.24 (SPI MISO)
-    _DEFPIN_ARM(13, 0, 25); // D13 is P0.25 (SPI SCK / LED)
-    _DEFPIN_ARM(14, 0,  3); // D14 is P0.03 (A0)
-    _DEFPIN_ARM(15, 0,  4); // D15 is P0.04 (A1)
-    _DEFPIN_ARM(16, 0, 28); // D16 is P0.28 (A2)
-    _DEFPIN_ARM(17, 0, 29); // D17 is P0.29 (A3)
-    _DEFPIN_ARM(18, 0, 30); // D18 is P0.30 (A4)
-    _DEFPIN_ARM(19, 0, 31); // D19 is P0.31 (A5)
-    _DEFPIN_ARM(20, 0,  5); // D20 is P0.05 (A6)
-    _DEFPIN_ARM(21, 0,  2); // D21 is P0.02 (A7 / AREF)
-    _DEFPIN_ARM(22, 0, 26); // D22 is P0.26 (SDA)
-    _DEFPIN_ARM(23, 0, 27); // D23 is P0.27 (SCL)
-    _DEFPIN_ARM(24, 0,  8); // D24 is P0.08 (RX)
-    _DEFPIN_ARM(25, 0,  6); // D25 is P0.06 (TX)
+    _FL_DEFPIN( 0, 11, 0); // D0  is P0.11
+    _FL_DEFPIN( 1, 12, 0); // D1  is P0.12
+    _FL_DEFPIN( 2, 13, 0); // D2  is P0.13 (BUTTON1)
+    _FL_DEFPIN( 3, 14, 0); // D3  is P0.14 (BUTTON2)
+    _FL_DEFPIN( 4, 15, 0); // D4  is P0.15 (BUTTON3)
+    _FL_DEFPIN( 5, 16, 0); // D5  is P0.16 (BUTTON4)
+    _FL_DEFPIN( 6, 17, 0); // D6  is P0.17 (LED1)
+    _FL_DEFPIN( 7, 18, 0); // D7  is P0.18 (LED2)
+    _FL_DEFPIN( 8, 19, 0); // D8  is P0.19 (LED3)
+    _FL_DEFPIN( 9, 20, 0); // D9  is P0.20 (LED4)
+    _FL_DEFPIN(10, 22, 0); // D10 is P0.22 (SPI SS  )
+    _FL_DEFPIN(11, 23, 0); // D11 is P0.23 (SPI MOSI)
+    _FL_DEFPIN(12, 24, 0); // D12 is P0.24 (SPI MISO)
+    _FL_DEFPIN(13, 25, 0); // D13 is P0.25 (SPI SCK / LED)
+    _FL_DEFPIN(14,  3, 0); // D14 is P0.03 (A0)
+    _FL_DEFPIN(15,  4, 0); // D15 is P0.04 (A1)
+    _FL_DEFPIN(16, 28, 0); // D16 is P0.28 (A2)
+    _FL_DEFPIN(17, 29, 0); // D17 is P0.29 (A3)
+    _FL_DEFPIN(18, 30, 0); // D18 is P0.30 (A4)
+    _FL_DEFPIN(19, 31, 0); // D19 is P0.31 (A5)
+    _FL_DEFPIN(20,  5, 0); // D20 is P0.05 (A6)
+    _FL_DEFPIN(21,  2, 0); // D21 is P0.02 (A7 / AREF)
+    _FL_DEFPIN(22, 26, 0); // D22 is P0.26 (SDA)
+    _FL_DEFPIN(23, 27, 0); // D23 is P0.27 (SCL)
+    _FL_DEFPIN(24,  8, 0); // D24 is P0.08 (RX)
+    _FL_DEFPIN(25,  6, 0); // D25 is P0.06 (TX)
 #endif // defined(ARDUINO_NRF52_DK)
 
 // Taida Century nRF52 mini board
@@ -495,38 +495,38 @@
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
     #warning "Taida Century nRF52 mini board is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-    //_DEFPIN_ARM( 0, 0, 25); // D0  is P0.xx (near radio!)
-    //_DEFPIN_ARM( 1, 0, 26); // D1  is P0.xx (near radio!)
-    //_DEFPIN_ARM( 2, 0, 27); // D2  is P0.xx (near radio!)
-    //_DEFPIN_ARM( 3, 0, 28); // D3  is P0.xx (near radio!)
-    //_DEFPIN_ARM( 4, 0, 29); // D4  is P0.xx (Not connected, near radio!)
-    //_DEFPIN_ARM( 5, 0, 30); // D5  is P0.xx (LED1, near radio!)
-    //_DEFPIN_ARM( 6, 0, 31); // D6  is P0.xx (LED2, near radio!)
-    _DEFPIN_ARM( 7, 0,  2); // D7  is P0.xx (SDA)
-    _DEFPIN_ARM( 8, 0,  3); // D8  is P0.xx (SCL)
-    _DEFPIN_ARM( 9, 0,  4); // D9  is P0.xx (BUTTON1 / NFC1)
-    _DEFPIN_ARM(10, 0,  5); // D10 is P0.xx
-    //_DEFPIN_ARM(11, 0,  0); // D11 is P0.xx (Not connected)
-    //_DEFPIN_ARM(12, 0,  1); // D12 is P0.xx (Not connected)
-    _DEFPIN_ARM(13, 0,  6); // D13 is P0.xx
-    _DEFPIN_ARM(14, 0,  7); // D14 is P0.xx
-    _DEFPIN_ARM(15, 0,  8); // D15 is P0.xx
-    //_DEFPIN_ARM(16, 0,  9); // D16 is P0.xx (Not connected)
-    //_DEFPIN_ARM(17, 0, 10); // D17 is P0.xx (NFC2, Not connected)
-    _DEFPIN_ARM(18, 0, 11); // D18 is P0.xx (RXD)
-    _DEFPIN_ARM(19, 0, 12); // D19 is P0.xx (TXD)
-    _DEFPIN_ARM(20, 0, 13); // D20 is P0.xx (SPI SS  )
-    _DEFPIN_ARM(21, 0, 14); // D21 is P0.xx (SPI MISO)
-    _DEFPIN_ARM(22, 0, 15); // D22 is P0.xx (SPI MOSI)
-    _DEFPIN_ARM(23, 0, 16); // D23 is P0.xx (SPI SCK )
-    _DEFPIN_ARM(24, 0, 17); // D24 is P0.xx (A0)
-    _DEFPIN_ARM(25, 0, 18); // D25 is P0.xx (A1)
-    _DEFPIN_ARM(26, 0, 19); // D26 is P0.xx (A2)
-    _DEFPIN_ARM(27, 0, 20); // D27 is P0.xx (A3)
-    //_DEFPIN_ARM(28, 0, 22); // D28 is P0.xx (A4, near radio!)
-    //_DEFPIN_ARM(29, 0, 23); // D29 is P0.xx (A5, near radio!)
-    _DEFPIN_ARM(30, 0, 24); // D30 is P0.xx
-    // _DEFPIN_ARM(31, 0, 21); // D31 is P0.21 (RESET)
+    //_FL_DEFPIN( 0, 25, 0); // D0  is P0.xx (near radio!)
+    //_FL_DEFPIN( 1, 26, 0); // D1  is P0.xx (near radio!)
+    //_FL_DEFPIN( 2, 27, 0); // D2  is P0.xx (near radio!)
+    //_FL_DEFPIN( 3, 28, 0); // D3  is P0.xx (near radio!)
+    //_FL_DEFPIN( 4, 29, 0); // D4  is P0.xx (Not connected, near radio!)
+    //_FL_DEFPIN( 5, 30, 0); // D5  is P0.xx (LED1, near radio!)
+    //_FL_DEFPIN( 6, 31, 0); // D6  is P0.xx (LED2, near radio!)
+    _FL_DEFPIN( 7,  2, 0); // D7  is P0.xx (SDA)
+    _FL_DEFPIN( 8,  3, 0); // D8  is P0.xx (SCL)
+    _FL_DEFPIN( 9,  4, 0); // D9  is P0.xx (BUTTON1 / NFC1)
+    _FL_DEFPIN(10,  5, 0); // D10 is P0.xx
+    //_FL_DEFPIN(11,  0, 0); // D11 is P0.xx (Not connected)
+    //_FL_DEFPIN(12,  1, 0); // D12 is P0.xx (Not connected)
+    _FL_DEFPIN(13,  6, 0); // D13 is P0.xx
+    _FL_DEFPIN(14,  7, 0); // D14 is P0.xx
+    _FL_DEFPIN(15,  8, 0); // D15 is P0.xx
+    //_FL_DEFPIN(16,  9, 0); // D16 is P0.xx (Not connected)
+    //_FL_DEFPIN(17, 10, 0); // D17 is P0.xx (NFC2, Not connected)
+    _FL_DEFPIN(18, 11, 0); // D18 is P0.xx (RXD)
+    _FL_DEFPIN(19, 12, 0); // D19 is P0.xx (TXD)
+    _FL_DEFPIN(20, 13, 0); // D20 is P0.xx (SPI SS  )
+    _FL_DEFPIN(21, 14, 0); // D21 is P0.xx (SPI MISO)
+    _FL_DEFPIN(22, 15, 0); // D22 is P0.xx (SPI MOSI)
+    _FL_DEFPIN(23, 16, 0); // D23 is P0.xx (SPI SCK )
+    _FL_DEFPIN(24, 17, 0); // D24 is P0.xx (A0)
+    _FL_DEFPIN(25, 18, 0); // D25 is P0.xx (A1)
+    _FL_DEFPIN(26, 19, 0); // D26 is P0.xx (A2)
+    _FL_DEFPIN(27, 20, 0); // D27 is P0.xx (A3)
+    //_FL_DEFPIN(28, 22, 0); // D28 is P0.xx (A4, near radio!)
+    //_FL_DEFPIN(29, 23, 0); // D29 is P0.xx (A5, near radio!)
+    _FL_DEFPIN(30, 24, 0); // D30 is P0.xx
+    // _FL_DEFPIN(31, 21, 0); // D31 is P0.21 (RESET)
 #endif // defined(ARDUINO_STCT_NRF52_minidev)
 
 // Generic nRF52832
diff --git a/platforms/arm/sam/fastpin_arm_sam.h b/platforms/arm/sam/fastpin_arm_sam.h
index 2bb7804298..339c5e75a4 100644
--- a/platforms/arm/sam/fastpin_arm_sam.h
+++ b/platforms/arm/sam/fastpin_arm_sam.h
@@ -78,49 +78,50 @@ template<uint8_t PIN, uint32_t _BIT, typename _PDOR, typename _PSOR, typename _P
 #define _R(T) struct __gen_struct_ ## T
 #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline reg32_t r() { return T; } \
 	template<int BIT> static __attribute__((always_inline)) inline ptr_reg32_t rx() { return GPIO_BITBAND_PTR(T, BIT); } };
-#define DUE_IO32(L) _RD32(REG_PIO ## L ## _ODSR); _RD32(REG_PIO ## L ## _SODR); _RD32(REG_PIO ## L ## _CODR); _RD32(REG_PIO ## L ## _OER);
+#define _FL_IO(L,C) _RD32(REG_PIO ## L ## _ODSR); _RD32(REG_PIO ## L ## _SODR); _RD32(REG_PIO ## L ## _CODR); _RD32(REG_PIO ## L ## _OER); _FL_DEFINE_PORT3(L, C, _R(REG_PIO ## L ## _ODSR));
 
-#define _DEFPIN_DUE(PIN, BIT, L) template<> class FastPin<PIN> : public _DUEPIN<PIN, 1 << BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _DUEPIN<PIN, 1 << BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \
   																			_R(GPIO ## L ## _OER)> {}; \
   								   template<> class FastPinBB<PIN> : public _DUEPIN_BITBAND<PIN, BIT, _R(REG_PIO ## L ## _ODSR), _R(REG_PIO ## L ## _SODR), _R(REG_PIO ## L ## _CODR), \
   																			_R(GPIO ## L ## _OER)> {};
 
+_FL_IO(A,0);
+_FL_IO(B,1);
+_FL_IO(C,2);
+_FL_IO(D,3);
+
 #if defined(__SAM3X8E__)
 
-DUE_IO32(A);
-DUE_IO32(B);
-DUE_IO32(C);
-DUE_IO32(D);
 
 #define MAX_PIN 78
-_DEFPIN_DUE(0, 8, A); _DEFPIN_DUE(1, 9, A); _DEFPIN_DUE(2, 25, B); _DEFPIN_DUE(3, 28, C);
-_DEFPIN_DUE(4, 26, C); _DEFPIN_DUE(5, 25, C); _DEFPIN_DUE(6, 24, C); _DEFPIN_DUE(7, 23, C);
-_DEFPIN_DUE(8, 22, C); _DEFPIN_DUE(9, 21, C); _DEFPIN_DUE(10, 29, C); _DEFPIN_DUE(11, 7, D);
-_DEFPIN_DUE(12, 8, D); _DEFPIN_DUE(13, 27, B); _DEFPIN_DUE(14, 4, D); _DEFPIN_DUE(15, 5, D);
-_DEFPIN_DUE(16, 13, A); _DEFPIN_DUE(17, 12, A); _DEFPIN_DUE(18, 11, A); _DEFPIN_DUE(19, 10, A);
-_DEFPIN_DUE(20, 12, B); _DEFPIN_DUE(21, 13, B); _DEFPIN_DUE(22, 26, B); _DEFPIN_DUE(23, 14, A);
-_DEFPIN_DUE(24, 15, A); _DEFPIN_DUE(25, 0, D); _DEFPIN_DUE(26, 1, D); _DEFPIN_DUE(27, 2, D);
-_DEFPIN_DUE(28, 3, D); _DEFPIN_DUE(29, 6, D); _DEFPIN_DUE(30, 9, D); _DEFPIN_DUE(31, 7, A);
-_DEFPIN_DUE(32, 10, D); _DEFPIN_DUE(33, 1, C); _DEFPIN_DUE(34, 2, C); _DEFPIN_DUE(35, 3, C);
-_DEFPIN_DUE(36, 4, C); _DEFPIN_DUE(37, 5, C); _DEFPIN_DUE(38, 6, C); _DEFPIN_DUE(39, 7, C);
-_DEFPIN_DUE(40, 8, C); _DEFPIN_DUE(41, 9, C); _DEFPIN_DUE(42, 19, A); _DEFPIN_DUE(43, 20, A);
-_DEFPIN_DUE(44, 19, C); _DEFPIN_DUE(45, 18, C); _DEFPIN_DUE(46, 17, C); _DEFPIN_DUE(47, 16, C);
-_DEFPIN_DUE(48, 15, C); _DEFPIN_DUE(49, 14, C); _DEFPIN_DUE(50, 13, C); _DEFPIN_DUE(51, 12, C);
-_DEFPIN_DUE(52, 21, B); _DEFPIN_DUE(53, 14, B); _DEFPIN_DUE(54, 16, A); _DEFPIN_DUE(55, 24, A);
-_DEFPIN_DUE(56, 23, A); _DEFPIN_DUE(57, 22, A); _DEFPIN_DUE(58, 6, A); _DEFPIN_DUE(59, 4, A);
-_DEFPIN_DUE(60, 3, A); _DEFPIN_DUE(61, 2, A); _DEFPIN_DUE(62, 17, B); _DEFPIN_DUE(63, 18, B);
-_DEFPIN_DUE(64, 19, B); _DEFPIN_DUE(65, 20, B); _DEFPIN_DUE(66, 15, B); _DEFPIN_DUE(67, 16, B);
-_DEFPIN_DUE(68, 1, A); _DEFPIN_DUE(69, 0, A); _DEFPIN_DUE(70, 17, A); _DEFPIN_DUE(71, 18, A);
-_DEFPIN_DUE(72, 30, C); _DEFPIN_DUE(73, 21, A); _DEFPIN_DUE(74, 25, A); _DEFPIN_DUE(75, 26, A);
-_DEFPIN_DUE(76, 27, A); _DEFPIN_DUE(77, 28, A); _DEFPIN_DUE(78, 23, B);
+_FL_DEFPIN(0, 8, A); _FL_DEFPIN(1, 9, A); _FL_DEFPIN(2, 25, B); _FL_DEFPIN(3, 28, C);
+_FL_DEFPIN(4, 26, C); _FL_DEFPIN(5, 25, C); _FL_DEFPIN(6, 24, C); _FL_DEFPIN(7, 23, C);
+_FL_DEFPIN(8, 22, C); _FL_DEFPIN(9, 21, C); _FL_DEFPIN(10, 29, C); _FL_DEFPIN(11, 7, D);
+_FL_DEFPIN(12, 8, D); _FL_DEFPIN(13, 27, B); _FL_DEFPIN(14, 4, D); _FL_DEFPIN(15, 5, D);
+_FL_DEFPIN(16, 13, A); _FL_DEFPIN(17, 12, A); _FL_DEFPIN(18, 11, A); _FL_DEFPIN(19, 10, A);
+_FL_DEFPIN(20, 12, B); _FL_DEFPIN(21, 13, B); _FL_DEFPIN(22, 26, B); _FL_DEFPIN(23, 14, A);
+_FL_DEFPIN(24, 15, A); _FL_DEFPIN(25, 0, D); _FL_DEFPIN(26, 1, D); _FL_DEFPIN(27, 2, D);
+_FL_DEFPIN(28, 3, D); _FL_DEFPIN(29, 6, D); _FL_DEFPIN(30, 9, D); _FL_DEFPIN(31, 7, A);
+_FL_DEFPIN(32, 10, D); _FL_DEFPIN(33, 1, C); _FL_DEFPIN(34, 2, C); _FL_DEFPIN(35, 3, C);
+_FL_DEFPIN(36, 4, C); _FL_DEFPIN(37, 5, C); _FL_DEFPIN(38, 6, C); _FL_DEFPIN(39, 7, C);
+_FL_DEFPIN(40, 8, C); _FL_DEFPIN(41, 9, C); _FL_DEFPIN(42, 19, A); _FL_DEFPIN(43, 20, A);
+_FL_DEFPIN(44, 19, C); _FL_DEFPIN(45, 18, C); _FL_DEFPIN(46, 17, C); _FL_DEFPIN(47, 16, C);
+_FL_DEFPIN(48, 15, C); _FL_DEFPIN(49, 14, C); _FL_DEFPIN(50, 13, C); _FL_DEFPIN(51, 12, C);
+_FL_DEFPIN(52, 21, B); _FL_DEFPIN(53, 14, B); _FL_DEFPIN(54, 16, A); _FL_DEFPIN(55, 24, A);
+_FL_DEFPIN(56, 23, A); _FL_DEFPIN(57, 22, A); _FL_DEFPIN(58, 6, A); _FL_DEFPIN(59, 4, A);
+_FL_DEFPIN(60, 3, A); _FL_DEFPIN(61, 2, A); _FL_DEFPIN(62, 17, B); _FL_DEFPIN(63, 18, B);
+_FL_DEFPIN(64, 19, B); _FL_DEFPIN(65, 20, B); _FL_DEFPIN(66, 15, B); _FL_DEFPIN(67, 16, B);
+_FL_DEFPIN(68, 1, A); _FL_DEFPIN(69, 0, A); _FL_DEFPIN(70, 17, A); _FL_DEFPIN(71, 18, A);
+_FL_DEFPIN(72, 30, C); _FL_DEFPIN(73, 21, A); _FL_DEFPIN(74, 25, A); _FL_DEFPIN(75, 26, A);
+_FL_DEFPIN(76, 27, A); _FL_DEFPIN(77, 28, A); _FL_DEFPIN(78, 23, B);
 
 // digix pins
-_DEFPIN_DUE(90, 0, B); _DEFPIN_DUE(91, 1, B); _DEFPIN_DUE(92, 2, B); _DEFPIN_DUE(93, 3, B);
-_DEFPIN_DUE(94, 4, B); _DEFPIN_DUE(95, 5, B); _DEFPIN_DUE(96, 6, B); _DEFPIN_DUE(97, 7, B);
-_DEFPIN_DUE(98, 8, B); _DEFPIN_DUE(99, 9, B); _DEFPIN_DUE(100, 5, A); _DEFPIN_DUE(101, 22, B);
-_DEFPIN_DUE(102, 23, B); _DEFPIN_DUE(103, 24, B); _DEFPIN_DUE(104, 27, C); _DEFPIN_DUE(105, 20, C);
-_DEFPIN_DUE(106, 11, C); _DEFPIN_DUE(107, 10, C); _DEFPIN_DUE(108, 21, A); _DEFPIN_DUE(109, 30, C);
-_DEFPIN_DUE(110, 29, B); _DEFPIN_DUE(111, 30, B); _DEFPIN_DUE(112, 31, B); _DEFPIN_DUE(113, 28, B);
+_FL_DEFPIN(90, 0, B); _FL_DEFPIN(91, 1, B); _FL_DEFPIN(92, 2, B); _FL_DEFPIN(93, 3, B);
+_FL_DEFPIN(94, 4, B); _FL_DEFPIN(95, 5, B); _FL_DEFPIN(96, 6, B); _FL_DEFPIN(97, 7, B);
+_FL_DEFPIN(98, 8, B); _FL_DEFPIN(99, 9, B); _FL_DEFPIN(100, 5, A); _FL_DEFPIN(101, 22, B);
+_FL_DEFPIN(102, 23, B); _FL_DEFPIN(103, 24, B); _FL_DEFPIN(104, 27, C); _FL_DEFPIN(105, 20, C);
+_FL_DEFPIN(106, 11, C); _FL_DEFPIN(107, 10, C); _FL_DEFPIN(108, 21, A); _FL_DEFPIN(109, 30, C);
+_FL_DEFPIN(110, 29, B); _FL_DEFPIN(111, 30, B); _FL_DEFPIN(112, 31, B); _FL_DEFPIN(113, 28, B);
 
 #define SPI_DATA 75
 #define SPI_CLOCK 76
diff --git a/platforms/arm/stm32/fastpin_arm_stm32.h b/platforms/arm/stm32/fastpin_arm_stm32.h
index 57309cd9c3..c80390f3c4 100644
--- a/platforms/arm/stm32/fastpin_arm_stm32.h
+++ b/platforms/arm/stm32/fastpin_arm_stm32.h
@@ -56,45 +56,67 @@ template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, typename _GPIO> class _ARMPI
 };
 
 #if defined(STM32F10X_MD)
- #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile GPIO_TypeDef * r() { return T; } };
- #define _IO32(L) _RD32(GPIO ## L)
+  #define _R(T) struct __gen_struct_ ## T
+  #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile GPIO_TypeDef * r() { return T; } };
+  #define _FL_IO(L,C) _RD32(GPIO ## L);  __FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
 #elif defined(__STM32F1__)
- #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline gpio_reg_map* r() { return T->regs; } };
- #define _IO32(L) _RD32(GPIO ## L)
+  #define _R(T) struct __gen_struct_ ## T
+  #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline gpio_reg_map* r() { return T->regs; } };
+  #define _FL_IO(L,C) _RD32(GPIO ## L); __FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
 #else
  #error "Platform not supported"
 #endif
 
-#define _R(T) struct __gen_struct_ ## T
-#define _DEFPIN_ARM(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L)> {};
+#define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L)> {};
+
+#ifdef GPIOA
+_FL_IO(A,0);
+#endif
+#ifdef GPIOB
+_FL_IO(B,1);
+#endif
+#ifdef GPIOC
+_FL_IO(C,2);
+#endif
+#ifdef GPIOD
+_FL_IO(D,3);
+#endif
+#ifdef GPIOE
+_FL_IO(E,4);
+#endif
+#ifdef GPIOF
+_FL_IO(F,5);
+#endif
+#ifdef GPIOG
+_FL_IO(G,6);
+#endif
 
 // Actual pin definitions
 #if defined(SPARK) // Sparkfun STM32F103 based board
 
-_IO32(A); _IO32(B); _IO32(C); _IO32(D); _IO32(E); _IO32(F); _IO32(G);
 
 
 #define MAX_PIN 19
-_DEFPIN_ARM(0, 7, B);
-_DEFPIN_ARM(1, 6, B);
-_DEFPIN_ARM(2, 5, B);
-_DEFPIN_ARM(3, 4, B);
-_DEFPIN_ARM(4, 3, B);
-_DEFPIN_ARM(5, 15, A);
-_DEFPIN_ARM(6, 14, A);
-_DEFPIN_ARM(7, 13, A);
-_DEFPIN_ARM(8, 8, A);
-_DEFPIN_ARM(9, 9, A);
-_DEFPIN_ARM(10, 0, A);
-_DEFPIN_ARM(11, 1, A);
-_DEFPIN_ARM(12, 4, A);
-_DEFPIN_ARM(13, 5, A);
-_DEFPIN_ARM(14, 6, A);
-_DEFPIN_ARM(15, 7, A);
-_DEFPIN_ARM(16, 0, B);
-_DEFPIN_ARM(17, 1, B);
-_DEFPIN_ARM(18, 3, A);
-_DEFPIN_ARM(19, 2, A);
+_FL_DEFPIN(0, 7, B);
+_FL_DEFPIN(1, 6, B);
+_FL_DEFPIN(2, 5, B);
+_FL_DEFPIN(3, 4, B);
+_FL_DEFPIN(4, 3, B);
+_FL_DEFPIN(5, 15, A);
+_FL_DEFPIN(6, 14, A);
+_FL_DEFPIN(7, 13, A);
+_FL_DEFPIN(8, 8, A);
+_FL_DEFPIN(9, 9, A);
+_FL_DEFPIN(10, 0, A);
+_FL_DEFPIN(11, 1, A);
+_FL_DEFPIN(12, 4, A);
+_FL_DEFPIN(13, 5, A);
+_FL_DEFPIN(14, 6, A);
+_FL_DEFPIN(15, 7, A);
+_FL_DEFPIN(16, 0, B);
+_FL_DEFPIN(17, 1, B);
+_FL_DEFPIN(18, 3, A);
+_FL_DEFPIN(19, 2, A);
 
 
 #define SPI_DATA 15
@@ -106,43 +128,41 @@ _DEFPIN_ARM(19, 2, A);
 
 #if defined(__STM32F1__) // Generic STM32F103 aka "Blue Pill"
 
-_IO32(A); _IO32(B); _IO32(C);
-
 #define MAX_PIN 46
 
-_DEFPIN_ARM(10, 0, A);	// PA0 - PA7
-_DEFPIN_ARM(11, 1, A);
-_DEFPIN_ARM(12, 2, A);
-_DEFPIN_ARM(13, 3, A);
-_DEFPIN_ARM(14, 4, A);
-_DEFPIN_ARM(15, 5, A);
-_DEFPIN_ARM(16, 6, A);
-_DEFPIN_ARM(17, 7, A);
-_DEFPIN_ARM(29, 8, A);	// PA8 - PA15
-_DEFPIN_ARM(30, 9, A);
-_DEFPIN_ARM(31, 10, A);
-_DEFPIN_ARM(32, 11, A);
-_DEFPIN_ARM(33, 12, A);
-_DEFPIN_ARM(34, 13, A);
-_DEFPIN_ARM(37, 14, A);
-_DEFPIN_ARM(38, 15, A);
-
-_DEFPIN_ARM(18, 0, B);	// PB0 - PB11
-_DEFPIN_ARM(19, 1, B);
-_DEFPIN_ARM(20, 2, B);
-_DEFPIN_ARM(39, 3, B);
-_DEFPIN_ARM(40, 4, B);
-_DEFPIN_ARM(41, 5, B);
-_DEFPIN_ARM(42, 6, B);
-_DEFPIN_ARM(43, 7, B);
-_DEFPIN_ARM(45, 8, B);
-_DEFPIN_ARM(46, 9, B);
-_DEFPIN_ARM(21, 10, B);
-_DEFPIN_ARM(22, 11, B);
-
-_DEFPIN_ARM(2, 13, C);	// PC13 - PC15
-_DEFPIN_ARM(3, 14, C);
-_DEFPIN_ARM(4, 15, C);
+_FL_DEFPIN(10, 0, A);	// PA0 - PA7
+_FL_DEFPIN(11, 1, A);
+_FL_DEFPIN(12, 2, A);
+_FL_DEFPIN(13, 3, A);
+_FL_DEFPIN(14, 4, A);
+_FL_DEFPIN(15, 5, A);
+_FL_DEFPIN(16, 6, A);
+_FL_DEFPIN(17, 7, A);
+_FL_DEFPIN(29, 8, A);	// PA8 - PA15
+_FL_DEFPIN(30, 9, A);
+_FL_DEFPIN(31, 10, A);
+_FL_DEFPIN(32, 11, A);
+_FL_DEFPIN(33, 12, A);
+_FL_DEFPIN(34, 13, A);
+_FL_DEFPIN(37, 14, A);
+_FL_DEFPIN(38, 15, A);
+
+_FL_DEFPIN(18, 0, B);	// PB0 - PB11
+_FL_DEFPIN(19, 1, B);
+_FL_DEFPIN(20, 2, B);
+_FL_DEFPIN(39, 3, B);
+_FL_DEFPIN(40, 4, B);
+_FL_DEFPIN(41, 5, B);
+_FL_DEFPIN(42, 6, B);
+_FL_DEFPIN(43, 7, B);
+_FL_DEFPIN(45, 8, B);
+_FL_DEFPIN(46, 9, B);
+_FL_DEFPIN(21, 10, B);
+_FL_DEFPIN(22, 11, B);
+
+_FL_DEFPIN(2, 13, C);	// PC13 - PC15
+_FL_DEFPIN(3, 14, C);
+_FL_DEFPIN(4, 15, C);
 
 #define SPI_DATA BOARD_SPI1_MOSI_PIN
 #define SPI_CLOCK BOARD_SPI1_SCK_PIN
diff --git a/platforms/avr/fastpin_avr.h b/platforms/avr/fastpin_avr.h
index 4e25cf8d4f..a8df76c406 100644
--- a/platforms/avr/fastpin_avr.h
+++ b/platforms/avr/fastpin_avr.h
@@ -48,11 +48,54 @@ template<uint8_t PIN, uint8_t _MASK, typename _PORT, typename _DDR, typename _PI
 typedef volatile uint8_t & reg8_t;
 #define _R(T) struct __gen_struct_ ## T
 #define _RD8(T) struct __gen_struct_ ## T { static inline reg8_t r() { return T; }};
-#define _IO(L) _RD8(DDR ## L); _RD8(PORT ## L); _RD8(PIN ## L);
-#define _DEFPIN_AVR(_PIN, MASK, L) template<> class FastPin<_PIN> : public _AVRPIN<_PIN, MASK, _R(PORT ## L), _R(DDR ## L), _R(PIN ## L)> {};
+#define _FL_IO(L,C) _RD8(DDR ## L); _RD8(PORT ## L); _RD8(PIN ## L); _FL_DEFINE_PORT3(L, C, _R(PORT ## L));
+#define _FL_DEFPIN(_PIN, BIT, L) template<> class FastPin<_PIN> : public _AVRPIN<_PIN, 1<<BIT, _R(PORT ## L), _R(DDR ## L), _R(PIN ## L)> {};
+
+// Pre-do all the port definitions
+#ifdef PORTA
+  _FL_IO(A,0)
+#endif
+#ifdef PORTB
+  _FL_IO(B,1)
+#endif
+#ifdef PORTC
+  _FL_IO(C,2)
+#endif
+#ifdef PORTD
+  _FL_IO(D,3)
+#endif
+#ifdef PORTE
+  _FL_IO(E,4)
+#endif
+#ifdef PORTF
+  _FL_IO(F,5)
+#endif
+#ifdef PORTG
+  _FL_IO(G,6)
+#endif
+#ifdef PORTH
+  _FL_IO(H,7)
+#endif
+#ifdef PORTI
+  _FL_IO(I,8)
+#endif
+#ifdef PORTJ
+  _FL_IO(J,9)
+#endif
+#ifdef PORTK
+  _FL_IO(K,10)
+#endif
+#ifdef PORTL
+  _FL_IO(L,11)
+#endif
+#ifdef PORTM
+  _FL_IO(M,12)
+#endif
+#ifdef PORTN
+  _FL_IO(N,13)
+#endif
 
 #if defined(__AVR_ATtiny85__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny25__)
-_IO(B);
 
 #if defined(__AVR_ATtiny25__)
 #pragma message "ATtiny25 has very limited storage. This library could use up to more than 100% of its flash size"
@@ -60,60 +103,55 @@ _IO(B);
 
 #define MAX_PIN 5
 
-_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B); _DEFPIN_AVR(3, 0x08, B);
-_DEFPIN_AVR(4, 0x10, B); _DEFPIN_AVR(5, 0x20, B);
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);
+_FL_DEFPIN(4, 4, B); _FL_DEFPIN(5, 5, B);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(__AVR_ATtiny841__) || defined(__AVR_ATtiny441__)
 #define MAX_PIN 11
-_IO(A); _IO(B);
 
-_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B);
-_DEFPIN_AVR(3, 0x80, A); _DEFPIN_AVR(4, 0x40, A); _DEFPIN_AVR(5, 0x20, A);
-_DEFPIN_AVR(6, 0x10, A); _DEFPIN_AVR(7, 0x08, A); _DEFPIN_AVR(8, 0x04, A);
-_DEFPIN_AVR(9, 0x02, A); _DEFPIN_AVR(10, 0x01, A); _DEFPIN_AVR(11, 0x08, B);
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B);
+_FL_DEFPIN(3, 7, A); _FL_DEFPIN(4, 6, A); _FL_DEFPIN(5, 5, A);
+_FL_DEFPIN(6, 4, A); _FL_DEFPIN(7, 3, A); _FL_DEFPIN(8, 2, A);
+_FL_DEFPIN(9, 1, A); _FL_DEFPIN(10, 0, A); _FL_DEFPIN(11, 3, B);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AVR_DIGISPARK) // digispark pin layout
 #define MAX_PIN 5
 #define HAS_HARDWARE_PIN_SUPPORT 1
-_IO(A); _IO(B);
 
-_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B);
-_DEFPIN_AVR(3, 0x80, A); _DEFPIN_AVR(4, 0x40, A); _DEFPIN_AVR(5, 0x20, A);
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B);
+_FL_DEFPIN(3, 7, A); _FL_DEFPIN(4, 6, A); _FL_DEFPIN(5, 5, A);
 
-#elif defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) 
-_IO(A); _IO(B);
+#elif defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__)
 
 #define MAX_PIN 10
 
-_DEFPIN_AVR(0, 0x01, A); _DEFPIN_AVR(1, 0x02, A); _DEFPIN_AVR(2, 0x04, A); _DEFPIN_AVR(3, 0x08, A);
-_DEFPIN_AVR(4, 0x10, A); _DEFPIN_AVR(5, 0x20, A); _DEFPIN_AVR(6, 0x40, A); _DEFPIN_AVR(7, 0x80, A);
-_DEFPIN_AVR(8, 0x04, B); _DEFPIN_AVR(9, 0x02, B); _DEFPIN_AVR(10, 0x01, B);
+_FL_DEFPIN(0, 0, A); _FL_DEFPIN(1, 1, A); _FL_DEFPIN(2, 2, A); _FL_DEFPIN(3, 3, A);
+_FL_DEFPIN(4, 4, A); _FL_DEFPIN(5, 5, A); _FL_DEFPIN(6, 6, A); _FL_DEFPIN(7, 7, A);
+_FL_DEFPIN(8, 2, B); _FL_DEFPIN(9, 1, B); _FL_DEFPIN(10, 0, B);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AVR_DIGISPARKPRO)
 
-_IO(A); _IO(B);
 #define MAX_PIN 12
 
-_DEFPIN_AVR(0, 0x01, B); _DEFPIN_AVR(1, 0x02, B); _DEFPIN_AVR(2, 0x04, B); _DEFPIN_AVR(3, 0x20, B);
-_DEFPIN_AVR(4, 0x08, B); _DEFPIN_AVR(5, 0x80, A); _DEFPIN_AVR(6, 0x01, A); _DEFPIN_AVR(7, 0x02, A);
-_DEFPIN_AVR(8, 0x04, A); _DEFPIN_AVR(9, 0x08, A); _DEFPIN_AVR(10, 0x10, A); _DEFPIN_AVR(11, 0x20, A);
-_DEFPIN_AVR(12, 0x40, A);
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 5, B);
+_FL_DEFPIN(4, 3, B); _FL_DEFPIN(5, 7, A); _FL_DEFPIN(6, 0, A); _FL_DEFPIN(7, 1, A);
+_FL_DEFPIN(8, 2, A); _FL_DEFPIN(9, 3, A); _FL_DEFPIN(10, 4, A); _FL_DEFPIN(11, 5, A);
+_FL_DEFPIN(12, 6, A);
 
 #elif defined(__AVR_ATtiny167__) || defined(__AVR_ATtiny87__)
-_IO(A); _IO(B);
 
 #define MAX_PIN 15
 
-_DEFPIN_AVR(0, 0x01, A);  _DEFPIN_AVR(1, 0x02, A);   _DEFPIN_AVR(2, 0x04, A);  _DEFPIN_AVR(3, 0x08, A);
-_DEFPIN_AVR(4, 0x10, A);  _DEFPIN_AVR(5, 0x20, A);   _DEFPIN_AVR(6, 0x40, A);  _DEFPIN_AVR(7, 0x80, A);
-_DEFPIN_AVR(8, 0x01, B);  _DEFPIN_AVR(9, 0x02, B);   _DEFPIN_AVR(10, 0x04, B); _DEFPIN_AVR(11, 0x08, B);
-_DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x40, B); _DEFPIN_AVR(15, 0x80, B);
+_FL_DEFPIN(0, 0, A);  _FL_DEFPIN(1, 1, A);   _FL_DEFPIN(2, 2, A);  _FL_DEFPIN(3, 3, A);
+_FL_DEFPIN(4, 4, A);  _FL_DEFPIN(5, 5, A);   _FL_DEFPIN(6, 6, A);  _FL_DEFPIN(7, 7, A);
+_FL_DEFPIN(8, 0, B);  _FL_DEFPIN(9, 1, B);   _FL_DEFPIN(10, 2, B); _FL_DEFPIN(11, 3, B);
+_FL_DEFPIN(12, 4, B); _FL_DEFPIN(13, 5, B); _FL_DEFPIN(14, 6, B); _FL_DEFPIN(15, 7, B);
 
 #define SPI_DATA 4
 #define SPI_CLOCK 5
@@ -122,17 +160,15 @@ _DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x40, B); _D
 #define HAS_HARDWARE_PIN_SUPPORT 1
 #elif defined(ARDUINO_HOODLOADER2) && (defined(__AVR_ATmega32U2__) || defined(__AVR_ATmega16U2__) || defined(__AVR_ATmega8U2__)) || defined(__AVR_AT90USB82__) || defined(__AVR_AT90USB162__)
 
-_IO(D); _IO(B); _IO(C);
-
 #define MAX_PIN 20
 
-_DEFPIN_AVR( 0, 0x01, B); _DEFPIN_AVR( 1, 0x02, B); _DEFPIN_AVR( 2, 0x04, B); _DEFPIN_AVR( 3, 0x08, B);
-_DEFPIN_AVR( 4, 0x10, B); _DEFPIN_AVR( 5, 0x20, B); _DEFPIN_AVR( 6, 0x40, B); _DEFPIN_AVR( 7, 0x80, B);
+_FL_DEFPIN( 0, 0, B); _FL_DEFPIN( 1, 1, B); _FL_DEFPIN( 2, 2, B); _FL_DEFPIN( 3, 3, B);
+_FL_DEFPIN( 4, 4, B); _FL_DEFPIN( 5, 5, B); _FL_DEFPIN( 6, 6, B); _FL_DEFPIN( 7, 7, B);
 
-_DEFPIN_AVR( 8, 0x80, C); _DEFPIN_AVR( 9, 0x40, C); _DEFPIN_AVR( 10, 0x20,C); _DEFPIN_AVR( 11, 0x10, C);
-_DEFPIN_AVR( 12, 0x04, C); _DEFPIN_AVR( 13, 0x01, D); _DEFPIN_AVR( 14, 0x02, D); _DEFPIN_AVR(15, 0x04, D);
-_DEFPIN_AVR( 16, 0x08, D); _DEFPIN_AVR( 17, 0x10, D); _DEFPIN_AVR( 18, 0x20, D); _DEFPIN_AVR( 19, 0x40, D);
-_DEFPIN_AVR( 20, 0x80, D);
+_FL_DEFPIN( 8, 7, C); _FL_DEFPIN( 9, 6, C); _FL_DEFPIN( 10, 5,C); _FL_DEFPIN( 11, 4, C);
+_FL_DEFPIN( 12, 2, C); _FL_DEFPIN( 13, 0, D); _FL_DEFPIN( 14, 1, D); _FL_DEFPIN(15, 2, D);
+_FL_DEFPIN( 16, 3, D); _FL_DEFPIN( 17, 4, D); _FL_DEFPIN( 18, 5, D); _FL_DEFPIN( 19, 6, D);
+_FL_DEFPIN( 20, 7, D);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 // #define SPI_DATA 2
@@ -141,15 +177,12 @@ _DEFPIN_AVR( 20, 0x80, D);
 
 #elif defined(IS_BEAN)
 
-// Accelerated port definitions for arduino avrs
-_IO(D); _IO(B); _IO(C);
-
 #define MAX_PIN 19
-_DEFPIN_AVR( 0, 0x40, D); _DEFPIN_AVR( 1, 0x02, B); _DEFPIN_AVR( 2, 0x04, B); _DEFPIN_AVR( 3, 0x08, B);
-_DEFPIN_AVR( 4, 0x10, B); _DEFPIN_AVR( 5, 0x20, B); _DEFPIN_AVR( 6, 0x01, D); _DEFPIN_AVR( 7, 0x80, D);
-_DEFPIN_AVR( 8, 0x01, B); _DEFPIN_AVR( 9, 0x02, D); _DEFPIN_AVR(10, 0x04, D); _DEFPIN_AVR(11, 0x08, D);
-_DEFPIN_AVR(12, 0x10, D); _DEFPIN_AVR(13, 0x20, D); _DEFPIN_AVR(14, 0x01, C); _DEFPIN_AVR(15, 0x02, C);
-_DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _DEFPIN_AVR(19, 0x20, C);
+_FL_DEFPIN( 0, 6, D); _FL_DEFPIN( 1, 1, B); _FL_DEFPIN( 2, 2, B); _FL_DEFPIN( 3, 3, B);
+_FL_DEFPIN( 4, 4, B); _FL_DEFPIN( 5, 5, B); _FL_DEFPIN( 6, 0, D); _FL_DEFPIN( 7, 7, D);
+_FL_DEFPIN( 8, 0, B); _FL_DEFPIN( 9, 1, D); _FL_DEFPIN(10, 2, D); _FL_DEFPIN(11, 3, D);
+_FL_DEFPIN(12, 4, D); _FL_DEFPIN(13, 5, D); _FL_DEFPIN(14, 0, C); _FL_DEFPIN(15, 1, C);
+_FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19, 5, C);
 
 #define SPI_DATA 3
 #define SPI_CLOCK 5
@@ -163,15 +196,13 @@ _DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _D
 #endif
 
 #elif defined(__AVR_ATmega328P__) || defined(__AVR_ATmega328PB__) || defined(__AVR_ATmega328__) || defined(__AVR_ATmega168__) || defined(__AVR_ATmega168P__) || defined(__AVR_ATmega8__)
-// Accelerated port definitions for arduino avrs
-_IO(D); _IO(B); _IO(C);
 
 #define MAX_PIN 19
-_DEFPIN_AVR( 0, 0x01, D); _DEFPIN_AVR( 1, 0x02, D); _DEFPIN_AVR( 2, 0x04, D); _DEFPIN_AVR( 3, 0x08, D);
-_DEFPIN_AVR( 4, 0x10, D); _DEFPIN_AVR( 5, 0x20, D); _DEFPIN_AVR( 6, 0x40, D); _DEFPIN_AVR( 7, 0x80, D);
-_DEFPIN_AVR( 8, 0x01, B); _DEFPIN_AVR( 9, 0x02, B); _DEFPIN_AVR(10, 0x04, B); _DEFPIN_AVR(11, 0x08, B);
-_DEFPIN_AVR(12, 0x10, B); _DEFPIN_AVR(13, 0x20, B); _DEFPIN_AVR(14, 0x01, C); _DEFPIN_AVR(15, 0x02, C);
-_DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _DEFPIN_AVR(19, 0x20, C);
+_FL_DEFPIN( 0, 0, D); _FL_DEFPIN( 1, 1, D); _FL_DEFPIN( 2, 2, D); _FL_DEFPIN( 3, 3, D);
+_FL_DEFPIN( 4, 4, D); _FL_DEFPIN( 5, 5, D); _FL_DEFPIN( 6, 6, D); _FL_DEFPIN( 7, 7, D);
+_FL_DEFPIN( 8, 0, B); _FL_DEFPIN( 9, 1, B); _FL_DEFPIN(10, 2, B); _FL_DEFPIN(11, 3, B);
+_FL_DEFPIN(12, 4, B); _FL_DEFPIN(13, 5, B); _FL_DEFPIN(14, 0, C); _FL_DEFPIN(15, 1, C);
+_FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19, 5, C);
 
 #define SPI_DATA 11
 #define SPI_CLOCK 13
@@ -186,17 +217,15 @@ _DEFPIN_AVR(16, 0x04, C); _DEFPIN_AVR(17, 0x08, C); _DEFPIN_AVR(18, 0x10, C); _D
 
 #elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__)
 
-_IO(A); _IO(B); _IO(C); _IO(D);
-
 #define MAX_PIN 31
-_DEFPIN_AVR(0, 1<<0, B); _DEFPIN_AVR(1, 1<<1, B); _DEFPIN_AVR(2, 1<<2, B); _DEFPIN_AVR(3, 1<<3, B);
-_DEFPIN_AVR(4, 1<<4, B); _DEFPIN_AVR(5, 1<<5, B); _DEFPIN_AVR(6, 1<<6, B); _DEFPIN_AVR(7, 1<<7, B);
-_DEFPIN_AVR(8, 1<<0, D); _DEFPIN_AVR(9, 1<<1, D); _DEFPIN_AVR(10, 1<<2, D); _DEFPIN_AVR(11, 1<<3, D);
-_DEFPIN_AVR(12, 1<<4, D); _DEFPIN_AVR(13, 1<<5, D); _DEFPIN_AVR(14, 1<<6, D); _DEFPIN_AVR(15, 1<<7, D);
-_DEFPIN_AVR(16, 1<<0, C); _DEFPIN_AVR(17, 1<<1, C); _DEFPIN_AVR(18, 1<<2, C); _DEFPIN_AVR(19, 1<<3, C);
-_DEFPIN_AVR(20, 1<<4, C); _DEFPIN_AVR(21, 1<<5, C); _DEFPIN_AVR(22, 1<<6, C); _DEFPIN_AVR(23, 1<<7, C);
-_DEFPIN_AVR(24, 1<<0, A); _DEFPIN_AVR(25, 1<<1, A); _DEFPIN_AVR(26, 1<<2, A); _DEFPIN_AVR(27, 1<<3, A);
-_DEFPIN_AVR(28, 1<<4, A); _DEFPIN_AVR(29, 1<<5, A); _DEFPIN_AVR(30, 1<<6, A); _DEFPIN_AVR(31, 1<<7, A);
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);
+_FL_DEFPIN(4, 4, B); _FL_DEFPIN(5, 5, B); _FL_DEFPIN(6, 6, B); _FL_DEFPIN(7, 7, B);
+_FL_DEFPIN(8, 0, D); _FL_DEFPIN(9, 1, D); _FL_DEFPIN(10, 2, D); _FL_DEFPIN(11, 3, D);
+_FL_DEFPIN(12, 4, D); _FL_DEFPIN(13, 5, D); _FL_DEFPIN(14, 6, D); _FL_DEFPIN(15, 7, D);
+_FL_DEFPIN(16, 0, C); _FL_DEFPIN(17, 1, C); _FL_DEFPIN(18, 2, C); _FL_DEFPIN(19, 3, C);
+_FL_DEFPIN(20, 4, C); _FL_DEFPIN(21, 5, C); _FL_DEFPIN(22, 6, C); _FL_DEFPIN(23, 7, C);
+_FL_DEFPIN(24, 0, A); _FL_DEFPIN(25, 1, A); _FL_DEFPIN(26, 2, A); _FL_DEFPIN(27, 3, A);
+_FL_DEFPIN(28, 4, A); _FL_DEFPIN(29, 5, A); _FL_DEFPIN(30, 6, A); _FL_DEFPIN(31, 7, A);
 
 #define SPI_DATA 5
 #define SPI_CLOCK 7
@@ -207,17 +236,14 @@ _DEFPIN_AVR(28, 1<<4, A); _DEFPIN_AVR(29, 1<<5, A); _DEFPIN_AVR(30, 1<<6, A); _D
 #elif  defined(__AVR_ATmega128RFA1__) || defined(__AVR_ATmega256RFR2__)
 
 // AKA the Pinoccio
-
-_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
-
-_DEFPIN_AVR( 0, 1<<0, E); _DEFPIN_AVR( 1, 1<<1, E); _DEFPIN_AVR( 2, 1<<7, B); _DEFPIN_AVR( 3, 1<<3, E);
-_DEFPIN_AVR( 4, 1<<4, E); _DEFPIN_AVR( 5, 1<<5, E); _DEFPIN_AVR( 6, 1<<2, E); _DEFPIN_AVR( 7, 1<<6, E);
-_DEFPIN_AVR( 8, 1<<5, D); _DEFPIN_AVR( 9, 1<<0, B); _DEFPIN_AVR(10, 1<<2, B); _DEFPIN_AVR(11, 1<<3, B);
-_DEFPIN_AVR(12, 1<<1, B); _DEFPIN_AVR(13, 1<<2, D); _DEFPIN_AVR(14, 1<<3, D); _DEFPIN_AVR(15, 1<<0, D);
-_DEFPIN_AVR(16, 1<<1, D); _DEFPIN_AVR(17, 1<<4, D); _DEFPIN_AVR(18, 1<<7, E); _DEFPIN_AVR(19, 1<<6, D);
-_DEFPIN_AVR(20, 1<<7, D); _DEFPIN_AVR(21, 1<<4, B); _DEFPIN_AVR(22, 1<<5, B); _DEFPIN_AVR(23, 1<<6, B);
-_DEFPIN_AVR(24, 1<<0, F); _DEFPIN_AVR(25, 1<<1, F); _DEFPIN_AVR(26, 1<<2, F); _DEFPIN_AVR(27, 1<<3, F);
-_DEFPIN_AVR(28, 1<<4, F); _DEFPIN_AVR(29, 1<<5, F); _DEFPIN_AVR(30, 1<<6, F); _DEFPIN_AVR(31, 1<<7, F);
+_FL_DEFPIN( 0, 0, E); _FL_DEFPIN( 1, 1, E); _FL_DEFPIN( 2, 7, B); _FL_DEFPIN( 3, 3, E);
+_FL_DEFPIN( 4, 4, E); _FL_DEFPIN( 5, 5, E); _FL_DEFPIN( 6, 2, E); _FL_DEFPIN( 7, 6, E);
+_FL_DEFPIN( 8, 5, D); _FL_DEFPIN( 9, 0, B); _FL_DEFPIN(10, 2, B); _FL_DEFPIN(11, 3, B);
+_FL_DEFPIN(12, 1, B); _FL_DEFPIN(13, 2, D); _FL_DEFPIN(14, 3, D); _FL_DEFPIN(15, 0, D);
+_FL_DEFPIN(16, 1, D); _FL_DEFPIN(17, 4, D); _FL_DEFPIN(18, 7, E); _FL_DEFPIN(19, 6, D);
+_FL_DEFPIN(20, 7, D); _FL_DEFPIN(21, 4, B); _FL_DEFPIN(22, 5, B); _FL_DEFPIN(23, 6, B);
+_FL_DEFPIN(24, 0, F); _FL_DEFPIN(25, 1, F); _FL_DEFPIN(26, 2, F); _FL_DEFPIN(27, 3, F);
+_FL_DEFPIN(28, 4, F); _FL_DEFPIN(29, 5, F); _FL_DEFPIN(30, 6, F); _FL_DEFPIN(31, 7, F);
 
 #define SPI_DATA 10
 #define SPI_CLOCK 12
@@ -228,28 +254,25 @@ _DEFPIN_AVR(28, 1<<4, F); _DEFPIN_AVR(29, 1<<5, F); _DEFPIN_AVR(30, 1<<6, F); _D
 
 #elif defined(__AVR_ATmega1280__) || defined(__AVR_ATmega2560__)
 // megas
-
-_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F); _IO(G); _IO(H); _IO(J); _IO(K); _IO(L);
-
 #define MAX_PIN 69
-_DEFPIN_AVR(0, 1, E); _DEFPIN_AVR(1, 2, E); _DEFPIN_AVR(2, 16, E); _DEFPIN_AVR(3, 32, E);
-_DEFPIN_AVR(4, 32, G); _DEFPIN_AVR(5, 8, E); _DEFPIN_AVR(6, 8, H); _DEFPIN_AVR(7, 16, H);
-_DEFPIN_AVR(8, 32, H); _DEFPIN_AVR(9, 64, H); _DEFPIN_AVR(10, 16, B); _DEFPIN_AVR(11, 32, B);
-_DEFPIN_AVR(12, 64, B); _DEFPIN_AVR(13, 128, B); _DEFPIN_AVR(14, 2, J); _DEFPIN_AVR(15, 1, J);
-_DEFPIN_AVR(16, 2, H); _DEFPIN_AVR(17, 1, H); _DEFPIN_AVR(18, 8, D); _DEFPIN_AVR(19, 4, D);
-_DEFPIN_AVR(20, 2, D); _DEFPIN_AVR(21, 1, D); _DEFPIN_AVR(22, 1, A); _DEFPIN_AVR(23, 2, A);
-_DEFPIN_AVR(24, 4, A); _DEFPIN_AVR(25, 8, A); _DEFPIN_AVR(26, 16, A); _DEFPIN_AVR(27, 32, A);
-_DEFPIN_AVR(28, 64, A); _DEFPIN_AVR(29, 128, A); _DEFPIN_AVR(30, 128, C); _DEFPIN_AVR(31, 64, C);
-_DEFPIN_AVR(32, 32, C); _DEFPIN_AVR(33, 16, C); _DEFPIN_AVR(34, 8, C); _DEFPIN_AVR(35, 4, C);
-_DEFPIN_AVR(36, 2, C); _DEFPIN_AVR(37, 1, C); _DEFPIN_AVR(38, 128, D); _DEFPIN_AVR(39, 4, G);
-_DEFPIN_AVR(40, 2, G); _DEFPIN_AVR(41, 1, G); _DEFPIN_AVR(42, 128, L); _DEFPIN_AVR(43, 64, L);
-_DEFPIN_AVR(44, 32, L); _DEFPIN_AVR(45, 16, L); _DEFPIN_AVR(46, 8, L); _DEFPIN_AVR(47, 4, L);
-_DEFPIN_AVR(48, 2, L); _DEFPIN_AVR(49, 1, L); _DEFPIN_AVR(50, 8, B); _DEFPIN_AVR(51, 4, B);
-_DEFPIN_AVR(52, 2, B); _DEFPIN_AVR(53, 1, B); _DEFPIN_AVR(54, 1, F); _DEFPIN_AVR(55, 2, F);
-_DEFPIN_AVR(56, 4, F); _DEFPIN_AVR(57, 8, F); _DEFPIN_AVR(58, 16, F); _DEFPIN_AVR(59, 32, F);
-_DEFPIN_AVR(60, 64, F); _DEFPIN_AVR(61, 128, F); _DEFPIN_AVR(62, 1, K); _DEFPIN_AVR(63, 2, K);
-_DEFPIN_AVR(64, 4, K); _DEFPIN_AVR(65, 8, K); _DEFPIN_AVR(66, 16, K); _DEFPIN_AVR(67, 32, K);
-_DEFPIN_AVR(68, 64, K); _DEFPIN_AVR(69, 128, K);
+_FL_DEFPIN(0, 0, E); _FL_DEFPIN(1, 1, E); _FL_DEFPIN(2, 4, E); _FL_DEFPIN(3, 5, E);
+_FL_DEFPIN(4, 5, G); _FL_DEFPIN(5, 3, E); _FL_DEFPIN(6, 3, H); _FL_DEFPIN(7, 4, H);
+_FL_DEFPIN(8, 5, H); _FL_DEFPIN(9, 6, H); _FL_DEFPIN(10, 4, B); _FL_DEFPIN(11, 5, B);
+_FL_DEFPIN(12, 6, B); _FL_DEFPIN(13, 7, B); _FL_DEFPIN(14, 1, J); _FL_DEFPIN(15, 0, J);
+_FL_DEFPIN(16, 1, H); _FL_DEFPIN(17, 0, H); _FL_DEFPIN(18, 3, D); _FL_DEFPIN(19, 2, D);
+_FL_DEFPIN(20, 1, D); _FL_DEFPIN(21, 0, D); _FL_DEFPIN(22, 0, A); _FL_DEFPIN(23, 1, A);
+_FL_DEFPIN(24, 2, A); _FL_DEFPIN(25, 3, A); _FL_DEFPIN(26, 4, A); _FL_DEFPIN(27, 5, A);
+_FL_DEFPIN(28, 6, A); _FL_DEFPIN(29, 7, A); _FL_DEFPIN(30, 7, C); _FL_DEFPIN(31, 6, C);
+_FL_DEFPIN(32, 5, C); _FL_DEFPIN(33, 4, C); _FL_DEFPIN(34, 3, C); _FL_DEFPIN(35, 2, C);
+_FL_DEFPIN(36, 1, C); _FL_DEFPIN(37, 0, C); _FL_DEFPIN(38, 7, D); _FL_DEFPIN(39, 2, G);
+_FL_DEFPIN(40, 1, G); _FL_DEFPIN(41, 0, G); _FL_DEFPIN(42, 7, L); _FL_DEFPIN(43, 6, L);
+_FL_DEFPIN(44, 5, L); _FL_DEFPIN(45, 4, L); _FL_DEFPIN(46, 3, L); _FL_DEFPIN(47, 2, L);
+_FL_DEFPIN(48, 1, L); _FL_DEFPIN(49, 0, L); _FL_DEFPIN(50, 3, B); _FL_DEFPIN(51, 2, B);
+_FL_DEFPIN(52, 1, B); _FL_DEFPIN(53, 0, B); _FL_DEFPIN(54, 0, F); _FL_DEFPIN(55, 1, F);
+_FL_DEFPIN(56, 2, F); _FL_DEFPIN(57, 3, F); _FL_DEFPIN(58, 4, F); _FL_DEFPIN(59, 5, F);
+_FL_DEFPIN(60, 6, F); _FL_DEFPIN(61, 7, F); _FL_DEFPIN(62, 0, K); _FL_DEFPIN(63, 1, K);
+_FL_DEFPIN(64, 2, K); _FL_DEFPIN(65, 3, K); _FL_DEFPIN(66, 4, K); _FL_DEFPIN(67, 5, K);
+_FL_DEFPIN(68, 6, K); _FL_DEFPIN(69, 7, K);
 
 #define SPI_DATA 51
 #define SPI_CLOCK 52
@@ -261,15 +284,13 @@ _DEFPIN_AVR(68, 64, K); _DEFPIN_AVR(69, 128, K);
 #elif defined(__AVR_ATmega32U4__) && defined(CORE_TEENSY)
 
 // teensy defs
-_IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
-
 #define MAX_PIN 23
-_DEFPIN_AVR(0, 1, B); _DEFPIN_AVR(1, 2, B); _DEFPIN_AVR(2, 4, B); _DEFPIN_AVR(3, 8, B);
-_DEFPIN_AVR(4, 128, B); _DEFPIN_AVR(5, 1, D); _DEFPIN_AVR(6, 2, D); _DEFPIN_AVR(7, 4, D);
-_DEFPIN_AVR(8, 8, D); _DEFPIN_AVR(9, 64, C); _DEFPIN_AVR(10, 128, C); _DEFPIN_AVR(11, 64, D);
-_DEFPIN_AVR(12, 128, D); _DEFPIN_AVR(13, 16, B); _DEFPIN_AVR(14, 32, B); _DEFPIN_AVR(15, 64, B);
-_DEFPIN_AVR(16, 128, F); _DEFPIN_AVR(17, 64, F); _DEFPIN_AVR(18, 32, F); _DEFPIN_AVR(19, 16, F);
-_DEFPIN_AVR(20, 2, F); _DEFPIN_AVR(21, 1, F); _DEFPIN_AVR(22, 16, D); _DEFPIN_AVR(23, 32, D);
+_FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);
+_FL_DEFPIN(4, 7, B); _FL_DEFPIN(5, 0, D); _FL_DEFPIN(6, 1, D); _FL_DEFPIN(7, 2, D);
+_FL_DEFPIN(8, 3, D); _FL_DEFPIN(9, 6, C); _FL_DEFPIN(10, 7, C); _FL_DEFPIN(11, 6, D);
+_FL_DEFPIN(12, 7, D); _FL_DEFPIN(13, 4, B); _FL_DEFPIN(14, 5, B); _FL_DEFPIN(15, 6, B);
+_FL_DEFPIN(16, 7, F); _FL_DEFPIN(17, 6, F); _FL_DEFPIN(18, 5, F); _FL_DEFPIN(19, 4, F);
+_FL_DEFPIN(20, 1, F); _FL_DEFPIN(21, 0, F); _FL_DEFPIN(22, 4, D); _FL_DEFPIN(23, 5, D);
 
 #define SPI_DATA 2
 #define SPI_CLOCK 1
@@ -283,22 +304,19 @@ _DEFPIN_AVR(20, 2, F); _DEFPIN_AVR(21, 1, F); _DEFPIN_AVR(22, 16, D); _DEFPIN_AV
 
 #elif defined(__AVR_AT90USB646__) || defined(__AVR_AT90USB1286__)
 // teensy++ 2 defs
-
-_IO(A); _IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
-
 #define MAX_PIN 45
-_DEFPIN_AVR(0, 1, D); _DEFPIN_AVR(1, 2, D); _DEFPIN_AVR(2, 4, D); _DEFPIN_AVR(3, 8, D);
-_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 32, D); _DEFPIN_AVR(6, 64, D); _DEFPIN_AVR(7, 128, D);
-_DEFPIN_AVR(8, 1, E); _DEFPIN_AVR(9, 2, E); _DEFPIN_AVR(10, 1, C); _DEFPIN_AVR(11, 2, C);
-_DEFPIN_AVR(12, 4, C); _DEFPIN_AVR(13, 8, C); _DEFPIN_AVR(14, 16, C); _DEFPIN_AVR(15, 32, C);
-_DEFPIN_AVR(16, 64, C); _DEFPIN_AVR(17, 128, C); _DEFPIN_AVR(18, 64, E); _DEFPIN_AVR(19, 128, E);
-_DEFPIN_AVR(20, 1, B); _DEFPIN_AVR(21, 2, B); _DEFPIN_AVR(22, 4, B); _DEFPIN_AVR(23, 8, B);
-_DEFPIN_AVR(24, 16, B); _DEFPIN_AVR(25, 32, B); _DEFPIN_AVR(26, 64, B); _DEFPIN_AVR(27, 128, B);
-_DEFPIN_AVR(28, 1, A); _DEFPIN_AVR(29, 2, A); _DEFPIN_AVR(30, 4, A); _DEFPIN_AVR(31, 8, A);
-_DEFPIN_AVR(32, 16, A); _DEFPIN_AVR(33, 32, A); _DEFPIN_AVR(34, 64, A); _DEFPIN_AVR(35, 128, A);
-_DEFPIN_AVR(36, 16, E); _DEFPIN_AVR(37, 32, E); _DEFPIN_AVR(38, 1, F); _DEFPIN_AVR(39, 2, F);
-_DEFPIN_AVR(40, 4, F); _DEFPIN_AVR(41, 8, F); _DEFPIN_AVR(42, 16, F); _DEFPIN_AVR(43, 32, F);
-_DEFPIN_AVR(44, 64, F); _DEFPIN_AVR(45, 128, F);
+_FL_DEFPIN(0, 0, D); _FL_DEFPIN(1, 1, D); _FL_DEFPIN(2, 2, D); _FL_DEFPIN(3, 3, D);
+_FL_DEFPIN(4, 4, D); _FL_DEFPIN(5, 5, D); _FL_DEFPIN(6, 6, D); _FL_DEFPIN(7, 7, D);
+_FL_DEFPIN(8, 0, E); _FL_DEFPIN(9, 1, E); _FL_DEFPIN(10, 0, C); _FL_DEFPIN(11, 1, C);
+_FL_DEFPIN(12, 2, C); _FL_DEFPIN(13, 3, C); _FL_DEFPIN(14, 4, C); _FL_DEFPIN(15, 5, C);
+_FL_DEFPIN(16, 6, C); _FL_DEFPIN(17, 7, C); _FL_DEFPIN(18, 6, E); _FL_DEFPIN(19, 7, E);
+_FL_DEFPIN(20, 0, B); _FL_DEFPIN(21, 1, B); _FL_DEFPIN(22, 2, B); _FL_DEFPIN(23, 3, B);
+_FL_DEFPIN(24, 4, B); _FL_DEFPIN(25, 5, B); _FL_DEFPIN(26, 6, B); _FL_DEFPIN(27, 7, B);
+_FL_DEFPIN(28, 0, A); _FL_DEFPIN(29, 1, A); _FL_DEFPIN(30, 2, A); _FL_DEFPIN(31, 3, A);
+_FL_DEFPIN(32, 4, A); _FL_DEFPIN(33, 5, A); _FL_DEFPIN(34, 6, A); _FL_DEFPIN(35, 7, A);
+_FL_DEFPIN(36, 4, E); _FL_DEFPIN(37, 5, E); _FL_DEFPIN(38, 0, F); _FL_DEFPIN(39, 1, F);
+_FL_DEFPIN(40, 2, F); _FL_DEFPIN(41, 3, F); _FL_DEFPIN(42, 4, F); _FL_DEFPIN(43, 5, F);
+_FL_DEFPIN(44, 6, F); _FL_DEFPIN(45, 7, F);
 
 #define SPI_DATA 22
 #define SPI_CLOCK 21
@@ -314,17 +332,15 @@ _DEFPIN_AVR(44, 64, F); _DEFPIN_AVR(45, 128, F);
 #elif defined(__AVR_ATmega32U4__)
 
 // leonard defs
-_IO(B); _IO(C); _IO(D); _IO(E); _IO(F);
-
 #define MAX_PIN 30
-_DEFPIN_AVR(0, 4, D); _DEFPIN_AVR(1, 8, D); _DEFPIN_AVR(2, 2, D); _DEFPIN_AVR(3, 1, D);
-_DEFPIN_AVR(4, 16, D); _DEFPIN_AVR(5, 64, C); _DEFPIN_AVR(6, 128, D); _DEFPIN_AVR(7, 64, E);
-_DEFPIN_AVR(8, 16, B); _DEFPIN_AVR(9, 32, B); _DEFPIN_AVR(10, 64, B); _DEFPIN_AVR(11, 128, B);
-_DEFPIN_AVR(12, 64, D); _DEFPIN_AVR(13, 128, C); _DEFPIN_AVR(14, 8, B); _DEFPIN_AVR(15, 2, B);
-_DEFPIN_AVR(16, 4, B); _DEFPIN_AVR(17, 1, B); _DEFPIN_AVR(18, 128, F); _DEFPIN_AVR(19, 64, F);
-_DEFPIN_AVR(20, 32, F); _DEFPIN_AVR(21, 16, F); _DEFPIN_AVR(22, 2, F); _DEFPIN_AVR(23, 1, F);
-_DEFPIN_AVR(24, 16, D); _DEFPIN_AVR(25, 128, D); _DEFPIN_AVR(26, 16, B); _DEFPIN_AVR(27, 32, B);
-_DEFPIN_AVR(28, 64, B); _DEFPIN_AVR(29, 64, D); _DEFPIN_AVR(30, 32, D);
+_FL_DEFPIN(0, 2, D); _FL_DEFPIN(1, 3, D); _FL_DEFPIN(2, 1, D); _FL_DEFPIN(3, 0, D);
+_FL_DEFPIN(4, 4, D); _FL_DEFPIN(5, 6, C); _FL_DEFPIN(6, 7, D); _FL_DEFPIN(7, 6, E);
+_FL_DEFPIN(8, 4, B); _FL_DEFPIN(9, 5, B); _FL_DEFPIN(10, 6, B); _FL_DEFPIN(11, 7, B);
+_FL_DEFPIN(12, 6, D); _FL_DEFPIN(13, 7, C); _FL_DEFPIN(14, 3, B); _FL_DEFPIN(15, 1, B);
+_FL_DEFPIN(16, 2, B); _FL_DEFPIN(17, 0, B); _FL_DEFPIN(18, 7, F); _FL_DEFPIN(19, 6, F);
+_FL_DEFPIN(20, 5, F); _FL_DEFPIN(21, 4, F); _FL_DEFPIN(22, 1, F); _FL_DEFPIN(23, 0, F);
+_FL_DEFPIN(24, 4, D); _FL_DEFPIN(25, 7, D); _FL_DEFPIN(26, 4, B); _FL_DEFPIN(27, 5, B);
+_FL_DEFPIN(28, 6, B); _FL_DEFPIN(29, 6, D); _FL_DEFPIN(30, 5, D);
 
 #define SPI_DATA 16
 #define SPI_CLOCK 15
diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
index fd03d5c813..d54d7fee67 100644
--- a/platforms/esp/32/fastpin_esp32.h
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -11,7 +11,7 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
   inline static void setOutput() { pinMode(PIN, OUTPUT); }
   inline static void setInput() { pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { 
+  inline static void hi() __attribute__ ((always_inline)) {
       if (PIN < 32) GPIO.out_w1ts = MASK;
       else GPIO.out1_w1ts.val = MASK;
   }
@@ -28,9 +28,9 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { 
-      if(PIN < 32) { GPIO.out ^= MASK; } 
-      else { GPIO.out1.val ^=MASK; } 
+  inline static void toggle() __attribute__ ((always_inline)) {
+      if(PIN < 32) { GPIO.out ^= MASK; }
+      else { GPIO.out1.val ^=MASK; }
   }
 
   inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
@@ -52,7 +52,7 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
       else return &GPIO.out1.val;
   }
 
-  inline static port_ptr_t sport() __attribute__ ((always_inline)) { 
+  inline static port_ptr_t sport() __attribute__ ((always_inline)) {
       if (PIN < 32) return &GPIO.out_w1ts;
       else return &GPIO.out1_w1ts.val;
   }
@@ -70,46 +70,45 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
   }
 };
 
-#define _DEFPIN_ESP32(PIN)  template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << PIN)> {};
-#define _DEFPIN_32_33_ESP32(PIN) template<> class FastPin<PIN> : public _ESPPIN<PIN, ((uint32_t)1 << (PIN-32))> {};
+#define _FL_DEFPIN(PIN)  template<> class FastPin<PIN> : public _ESPPIN<PIN, ((PIN<32)?((uint32_t)1 << PIN):((uint32_t)1 << (PIN-32)))> {};
 
-_DEFPIN_ESP32(0);
-_DEFPIN_ESP32(1); // WARNING: Using TX causes flashiness when uploading
-_DEFPIN_ESP32(2); 
-_DEFPIN_ESP32(3); // WARNING: Using RX causes flashiness when uploading
-_DEFPIN_ESP32(4);
-_DEFPIN_ESP32(5);
+_FL_DEFPIN(0);
+_FL_DEFPIN(1); // WARNING: Using TX causes flashiness when uploading
+_FL_DEFPIN(2);
+_FL_DEFPIN(3); // WARNING: Using RX causes flashiness when uploading
+_FL_DEFPIN(4);
+_FL_DEFPIN(5);
 
 // -- These pins are not safe to use:
-// _DEFPIN_ESP32(6,6); _DEFPIN_ESP32(7,7); _DEFPIN_ESP32(8,8); 
-// _DEFPIN_ESP32(9,9); _DEFPIN_ESP32(10,10); _DEFPIN_ESP32(11,11); 
+// _FL_DEFPIN(6,6); _FL_DEFPIN(7,7); _FL_DEFPIN(8,8);
+// _FL_DEFPIN(9,9); _FL_DEFPIN(10,10); _FL_DEFPIN(11,11);
 
-_DEFPIN_ESP32(12);
-_DEFPIN_ESP32(13);
-_DEFPIN_ESP32(14);
-_DEFPIN_ESP32(15);
-_DEFPIN_ESP32(16);
-_DEFPIN_ESP32(17);
-_DEFPIN_ESP32(18);
-_DEFPIN_ESP32(19);
+_FL_DEFPIN(12);
+_FL_DEFPIN(13);
+_FL_DEFPIN(14);
+_FL_DEFPIN(15);
+_FL_DEFPIN(16);
+_FL_DEFPIN(17);
+_FL_DEFPIN(18);
+_FL_DEFPIN(19);
 
-// No pin 20 : _DEFPIN_ESP32(20,20); 
+// No pin 20 : _FL_DEFPIN(20,20);
 
-_DEFPIN_ESP32(21); // Works, but note that GPIO21 is I2C SDA
-_DEFPIN_ESP32(22); // Works, but note that GPIO22 is I2C SCL
-_DEFPIN_ESP32(23); 
+_FL_DEFPIN(21); // Works, but note that GPIO21 is I2C SDA
+_FL_DEFPIN(22); // Works, but note that GPIO22 is I2C SCL
+_FL_DEFPIN(23);
 
-// No pin 24 : _DEFPIN_ESP32(24,24); 
+// No pin 24 : _FL_DEFPIN(24,24);
 
-_DEFPIN_ESP32(25);
-_DEFPIN_ESP32(26);
-_DEFPIN_ESP32(27); 
+_FL_DEFPIN(25);
+_FL_DEFPIN(26);
+_FL_DEFPIN(27);
 
-// No pin 28-31: _DEFPIN_ESP32(28,28); _DEFPIN_ESP32(29,29); _DEFPIN_ESP32(30,30); _DEFPIN_ESP32(31,31);
+// No pin 28-31: _FL_DEFPIN(28,28); _FL_DEFPIN(29,29); _FL_DEFPIN(30,30); _FL_DEFPIN(31,31);
 
 // Need special handling for pins > 31
-_DEFPIN_32_33_ESP32(32); 
-_DEFPIN_32_33_ESP32(33);
+_FL_DEFPIN(32);
+_FL_DEFPIN(33);
 
 #define HAS_HARDWARE_PIN_SUPPORT
 
diff --git a/platforms/esp/8266/fastpin_esp8266.h b/platforms/esp/8266/fastpin_esp8266.h
index b8095b8597..69085bf9a2 100644
--- a/platforms/esp/8266/fastpin_esp8266.h
+++ b/platforms/esp/8266/fastpin_esp8266.h
@@ -42,40 +42,40 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
   inline static bool isset() __attribute__ ((always_inline)) { return (PIN < 16) ? (GPO & MASK) : (GP16O & MASK); }
 };
 
-#define _DEFPIN_ESP8266(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
+#define _FL_DEFPIN(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
 
 
 #ifdef FASTLED_ESP8266_RAW_PIN_ORDER
 #define MAX_PIN 16
-_DEFPIN_ESP8266(0,0); _DEFPIN_ESP8266(1,1); _DEFPIN_ESP8266(2,2); _DEFPIN_ESP8266(3,3);
-_DEFPIN_ESP8266(4,4); _DEFPIN_ESP8266(5,5);
+_FL_DEFPIN(0,0); _FL_DEFPIN(1,1); _FL_DEFPIN(2,2); _FL_DEFPIN(3,3);
+_FL_DEFPIN(4,4); _FL_DEFPIN(5,5);
 
 // These pins should be disabled, as they always cause WDT resets
-// _DEFPIN_ESP8266(6,6); _DEFPIN_ESP8266(7,7);
-// _DEFPIN_ESP8266(8,8); _DEFPIN_ESP8266(9,9); _DEFPIN_ESP8266(10,10); _DEFPIN_ESP8266(11,11);
+// _FL_DEFPIN(6,6); _FL_DEFPIN(7,7);
+// _FL_DEFPIN(8,8); _FL_DEFPIN(9,9); _FL_DEFPIN(10,10); _FL_DEFPIN(11,11);
 
-_DEFPIN_ESP8266(12,12); _DEFPIN_ESP8266(13,13); _DEFPIN_ESP8266(14,14); _DEFPIN_ESP8266(15,15);
-_DEFPIN_ESP8266(16,16);
+_FL_DEFPIN(12,12); _FL_DEFPIN(13,13); _FL_DEFPIN(14,14); _FL_DEFPIN(15,15);
+_FL_DEFPIN(16,16);
 
 #define PORTA_FIRST_PIN 12
 #elif defined(FASTLED_ESP8266_D1_PIN_ORDER)
 #define MAX_PIN 15
-_DEFPIN_ESP8266(0,3);
-_DEFPIN_ESP8266(1,1);
-_DEFPIN_ESP8266(2,16);
-_DEFPIN_ESP8266(3,5);
-_DEFPIN_ESP8266(4,4);
-_DEFPIN_ESP8266(5,14);
-_DEFPIN_ESP8266(6,12);
-_DEFPIN_ESP8266(7,13);
-_DEFPIN_ESP8266(8,0);
-_DEFPIN_ESP8266(9,2);
-_DEFPIN_ESP8266(10,15);
-_DEFPIN_ESP8266(11,13);
-_DEFPIN_ESP8266(12,12);
-_DEFPIN_ESP8266(13,14);
-_DEFPIN_ESP8266(14,4);
-_DEFPIN_ESP8266(15,5);
+_FL_DEFPIN(0,3);
+_FL_DEFPIN(1,1);
+_FL_DEFPIN(2,16);
+_FL_DEFPIN(3,5);
+_FL_DEFPIN(4,4);
+_FL_DEFPIN(5,14);
+_FL_DEFPIN(6,12);
+_FL_DEFPIN(7,13);
+_FL_DEFPIN(8,0);
+_FL_DEFPIN(9,2);
+_FL_DEFPIN(10,15);
+_FL_DEFPIN(11,13);
+_FL_DEFPIN(12,12);
+_FL_DEFPIN(13,14);
+_FL_DEFPIN(14,4);
+_FL_DEFPIN(15,5);
 
 #define PORTA_FIRST_PIN 12
 
@@ -83,16 +83,16 @@ _DEFPIN_ESP8266(15,5);
 #define MAX_PIN 10
 
 // This seems to be the standard Dxx pin mapping on most of the esp boards that i've found
-_DEFPIN_ESP8266(0,16); _DEFPIN_ESP8266(1,5); _DEFPIN_ESP8266(2,4); _DEFPIN_ESP8266(3,0);
-_DEFPIN_ESP8266(4,2); _DEFPIN_ESP8266(5,14); _DEFPIN_ESP8266(6,12); _DEFPIN_ESP8266(7,13);
-_DEFPIN_ESP8266(8,15); _DEFPIN_ESP8266(9,3); _DEFPIN_ESP8266(10,1);
+_FL_DEFPIN(0,16); _FL_DEFPIN(1,5); _FL_DEFPIN(2,4); _FL_DEFPIN(3,0);
+_FL_DEFPIN(4,2); _FL_DEFPIN(5,14); _FL_DEFPIN(6,12); _FL_DEFPIN(7,13);
+_FL_DEFPIN(8,15); _FL_DEFPIN(9,3); _FL_DEFPIN(10,1);
 
 #define PORTA_FIRST_PIN 6
 
 // The rest of the pins - these are generally not available
-// _DEFPIN_ESP8266(11,6);
-// _DEFPIN_ESP8266(12,7); _DEFPIN_ESP8266(13,8); _DEFPIN_ESP8266(14,9); _DEFPIN_ESP8266(15,10);
-// _DEFPIN_ESP8266(16,11);
+// _FL_DEFPIN(11,6);
+// _FL_DEFPIN(12,7); _FL_DEFPIN(13,8); _FL_DEFPIN(14,9); _FL_DEFPIN(15,10);
+// _FL_DEFPIN(16,11);
 
 #endif
 

From 6110d35f72214bb0bdb87bf37a030bc1769317b2 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 25 Aug 2019 17:07:54 -0700
Subject: [PATCH 096/204] Fix #865 - use the atmega1284/644 definitions for
 atmega32 as well

---
 platforms/avr/fastpin_avr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/avr/fastpin_avr.h b/platforms/avr/fastpin_avr.h
index a8df76c406..5b39aa098d 100644
--- a/platforms/avr/fastpin_avr.h
+++ b/platforms/avr/fastpin_avr.h
@@ -215,7 +215,7 @@ _FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19,
 #define SPI_UART0_CLOCK 4
 #endif
 
-#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__)
+#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__)  || defined(__AVR_ATmega32__)
 
 #define MAX_PIN 31
 _FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);

From be47737f0481ab02362987fb46ffe10660d64495 Mon Sep 17 00:00:00 2001
From: Daniel Garcia <dgarcia@dgarcia.net>
Date: Sun, 25 Aug 2019 17:11:16 -0700
Subject: [PATCH 097/204] Spinning FastLED 3.3.2

---
 FastLED.h          | 6 +++---
 library.json       | 2 +-
 library.properties | 2 +-
 release_notes.md   | 4 +++-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index cafbdb7815..d2d4b64997 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -8,12 +8,12 @@
 #define FASTLED_HAS_PRAGMA_MESSAGE
 #endif
 
-#define FASTLED_VERSION 3003001
+#define FASTLED_VERSION 3003002
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.003.001"
+#    pragma message "FastLED version 3.003.002"
 #  else
-#    warning FastLED version 3.003.001  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.003.002  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index b95708ab64..c7075be799 100644
--- a/library.json
+++ b/library.json
@@ -18,7 +18,7 @@
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.3.1",
+    "version": "3.3.2",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index b6f22eb893..31175c5506 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.3.1
+version=3.3.2
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index 158737c5ad..cf9ce1a365 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,7 +1,9 @@
-FastLED 3.3.2pre
+FastLED 3.3.2
 =============
 
 * Fix APA102 compile error #870 
+* Normalize pin definition macros so that we can have an .ino file that can be used to output what pin/port mappings should be for a platform
+* Add defnition for ATmega32
 
 FastLED 3.3.1
 =============

From 8b31b643e42d8a53a3df9abc96bf45f00adf52df Mon Sep 17 00:00:00 2001
From: Dan Garcia <dangarcia@apple.com>
Date: Mon, 26 Aug 2019 11:45:43 -0700
Subject: [PATCH 098/204] Should fix the build error in #873

---
 platforms/arm/stm32/fastpin_arm_stm32.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/arm/stm32/fastpin_arm_stm32.h b/platforms/arm/stm32/fastpin_arm_stm32.h
index c80390f3c4..274d0f60da 100644
--- a/platforms/arm/stm32/fastpin_arm_stm32.h
+++ b/platforms/arm/stm32/fastpin_arm_stm32.h
@@ -58,11 +58,11 @@ template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, typename _GPIO> class _ARMPI
 #if defined(STM32F10X_MD)
   #define _R(T) struct __gen_struct_ ## T
   #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile GPIO_TypeDef * r() { return T; } };
-  #define _FL_IO(L,C) _RD32(GPIO ## L);  __FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
+  #define _FL_IO(L,C) _RD32(GPIO ## L);  _FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
 #elif defined(__STM32F1__)
   #define _R(T) struct __gen_struct_ ## T
   #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline gpio_reg_map* r() { return T->regs; } };
-  #define _FL_IO(L,C) _RD32(GPIO ## L); __FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
+  #define _FL_IO(L,C) _RD32(GPIO ## L); _FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
 #else
  #error "Platform not supported"
 #endif

From 53696c7c18053efb0c761d0a591663f483af1abb Mon Sep 17 00:00:00 2001
From: Gabriel Levy <gabelevy@gmail.com>
Date: Tue, 17 Sep 2019 17:36:46 -0700
Subject: [PATCH 099/204] Fix truncated SPI_DATA_RATE template parameter to
 addLeds with Teensy 4

---
 FastLED.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index d2d4b64997..05e8530ca9 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -220,7 +220,7 @@ class CFastLED {
 	/// @tparam RGB_ORDER - the rgb ordering for the leds (e.g. what order red, green, and blue data is written out in)
 	/// @tparam SPI_DATA_RATE - the data rate to drive the SPI clock at, defined using DATA_RATE_MHZ or DATA_RATE_KHZ macros
 	/// @returns a reference to the added controller
-	template<ESPIChipsets CHIPSET,  uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER, uint8_t SPI_DATA_RATE > CLEDController &addLeds(struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+	template<ESPIChipsets CHIPSET,  uint8_t DATA_PIN, uint8_t CLOCK_PIN, EOrder RGB_ORDER, uint32_t SPI_DATA_RATE > CLEDController &addLeds(struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
 		switch(CHIPSET) {
 			case LPD6803: { static LPD6803Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE> c; return addLeds(&c, data, nLedsOrOffset, nLedsIfOffset); }
 			case LPD8806: { static LPD8806Controller<DATA_PIN, CLOCK_PIN, RGB_ORDER, SPI_DATA_RATE> c; return addLeds(&c, data, nLedsOrOffset, nLedsIfOffset); }
@@ -271,7 +271,7 @@ class CFastLED {
 		return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB_ORDER>(data, nLedsOrOffset, nLedsIfOffset);
 	}
 
-	template<ESPIChipsets CHIPSET, EOrder RGB_ORDER, uint8_t SPI_DATA_RATE> static CLEDController &addLeds(struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
+	template<ESPIChipsets CHIPSET, EOrder RGB_ORDER, uint32_t SPI_DATA_RATE> static CLEDController &addLeds(struct CRGB *data, int nLedsOrOffset, int nLedsIfOffset = 0) {
 		return addLeds<CHIPSET, SPI_DATA, SPI_CLOCK, RGB_ORDER, SPI_DATA_RATE>(data, nLedsOrOffset, nLedsIfOffset);
 	}
 

From 0f915c295f7a1d7486e3c29ff3b04947bb65e892 Mon Sep 17 00:00:00 2001
From: Jason Tranchida <jason.tranchida@oculus.com>
Date: Fri, 28 Jun 2019 20:40:57 -0700
Subject: [PATCH 100/204] Fix compilation for nRF5 boards with a single GPIO
 port

nrf52832, nrf52810, and nrf52811 have a single GPIO port, and do not define NRF_P1_BASE.  If NRF_P1_BASE is not defined, do not define the generated structures requried to access GPIO port 1.
---
 platforms/arm/nrf52/fastpin_arm_nrf52.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
index 7526000445..e035663018 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -77,11 +77,14 @@ struct __generated_struct_NRF_P0 {
         return NRF_P0_BASE;
     }
 };
+// Not all NRF52 chips have two ports.  Only define if P1 is present.
+#if defined(NRF_P1_BASE)
 struct __generated_struct_NRF_P1 {
     FASTLED_NRF52_INLINE_ATTRIBUTE constexpr static uintptr_t r() {
         return NRF_P1_BASE;
     }
 };
+#endif
 
 
 // The actual class template can then use a typename, for what is essentially a constexpr NRF_GPIO_Type*

From de86febbfc005285b2541b84c035a94c836d2f0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Br=C3=BCckner?= <marc@ma-br.de>
Date: Sat, 19 Oct 2019 01:52:17 +0200
Subject: [PATCH 101/204] CPixelView operator-() gives an resulting pixelview
 pointing to wrong data

In the operator implementation the `leds` pointer is already shifted to the opposite end of the data. Using the constructor with start and end index however does the shift again. A simple fix would be the shift in the operator.
---
 pixelset.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pixelset.h b/pixelset.h
index 097df6d67a..9c69176eee 100644
--- a/pixelset.h
+++ b/pixelset.h
@@ -64,7 +64,7 @@ class CPixelView {
   /// Not sure i want this? inline CPixelView operator()(int end) { return CPixelView(leds, 0, end); }
 
   /// Return the reverse ordering of this set
-  inline CPixelView operator-() { return CPixelView(leds + len - dir, len - dir, 0); }
+  inline CPixelView operator-() { return CPixelView(leds, len - dir, 0); }
 
   /// Return a pointer to the first element in this set
   inline operator PIXEL_TYPE* () const { return leds; }

From bc273371fd28390906c7bee51ade27618d5a74d0 Mon Sep 17 00:00:00 2001
From: Charlie Mooney <cmooney3@gmail.com>
Date: Sun, 20 Oct 2019 15:44:56 -0700
Subject: [PATCH 102/204] Add atmega16 support to fastpin_avr (Mightycore
 Arduino)

The Atmega16 has the same pinout at the Atmega32 when using the MightyCore board
in arduino.  This commit just adds it to fastpin_avr.

Tested on my Atmega16 board, and appears to work like a charm

Signed-off-by: Charlie Mooney <cmooney3@gmail.com>
---
 platforms/avr/fastpin_avr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/avr/fastpin_avr.h b/platforms/avr/fastpin_avr.h
index 5b39aa098d..956e00a9d5 100644
--- a/platforms/avr/fastpin_avr.h
+++ b/platforms/avr/fastpin_avr.h
@@ -215,7 +215,7 @@ _FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19,
 #define SPI_UART0_CLOCK 4
 #endif
 
-#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__)  || defined(__AVR_ATmega32__)
+#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__) || defined(__AVR_ATmega32__) || defined(__AVR_ATmega16__)
 
 #define MAX_PIN 31
 _FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);

From 316e1bc7a481950aa5cf493c408085e2e353e00b Mon Sep 17 00:00:00 2001
From: Pasindu Sandeepa <38062478+PasinduSan@users.noreply.github.com>
Date: Tue, 29 Oct 2019 08:51:06 +0530
Subject: [PATCH 103/204] updated

---
 pixelset.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pixelset.h b/pixelset.h
index 097df6d67a..a2af7cc47f 100644
--- a/pixelset.h
+++ b/pixelset.h
@@ -7,9 +7,9 @@
 #include <stdlib.h>
 #endif
 
-/// Represents a set of CRGB led objects.  Provides the [] array operator, and works like a normal array in that case.
-/// This should be kept in sync with the set of functions provided by CRGB as well as functions in colorutils.  Note
-/// that a pixel set is a window into another set of led data, it is not its own set of led data.
+/////  Represents a set of CRGB led objects.  Provides the [] array operator, and works like a normal array in that case.
+/////  This should be kept in sync with the set of functions provided by CRGB as well as functions in colorutils.  Note
+/////  that a pixel set is a window into another set of led data, it is not its own set of led data.
 template<class PIXEL_TYPE>
 class CPixelView {
 public:

From 1e3f3c78ac05e4413e591f2a3a9f3421788ad5e6 Mon Sep 17 00:00:00 2001
From: Nick Pisarro <infinityminusnine@gmail.com>
Date: Thu, 31 Oct 2019 11:40:32 -0700
Subject: [PATCH 104/204] Convert rand multiplication constants to shifts and
 adds.

Since many popular low-cost chipsets do not have a native multiply operation,
this calculation can be executed much more efficiently using only add and left shift.
---
 lib8tion/random8.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib8tion/random8.h b/lib8tion/random8.h
index ba60cf5755..ab9ac27e52 100644
--- a/lib8tion/random8.h
+++ b/lib8tion/random8.h
@@ -12,13 +12,16 @@
 #define FASTLED_RAND16_2053  ((uint16_t)(2053))
 #define FASTLED_RAND16_13849 ((uint16_t)(13849))
 
+// equivalent to x * 2053
+#define APPLY_FASTLED_RAND16_2053(x) (x << 11) + (x << 2) + x
+
 /// random number seed
 extern uint16_t rand16seed;// = RAND16_SEED;
 
 /// Generate an 8-bit random number
 LIB8STATIC uint8_t random8()
 {
-    rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849;
+    rand16seed = APPLY_FASTLED_RAND16_2053(rand16seed) + FASTLED_RAND16_13849;
     // return the sum of the high and low bytes, for better
     //  mixing and non-sequential correlation
     return (uint8_t)(((uint8_t)(rand16seed & 0xFF)) +
@@ -28,7 +31,7 @@ LIB8STATIC uint8_t random8()
 /// Generate a 16 bit random number
 LIB8STATIC uint16_t random16()
 {
-    rand16seed = (rand16seed * FASTLED_RAND16_2053) + FASTLED_RAND16_13849;
+    rand16seed = APPLY_FASTLED_RAND16_2053(rand16seed) + FASTLED_RAND16_13849;
     return rand16seed;
 }
 

From 1b32d0316ad790fdd178bca02f1af71c0ce15ef8 Mon Sep 17 00:00:00 2001
From: Kurt Eckhardt <kurte@rockisland.com>
Date: Fri, 22 Nov 2019 05:26:55 -0800
Subject: [PATCH 105/204] Support WS2812Serial Library on Teensy T4

Recently there was reported that the WS2812Serial library (github.com/PaulStoffregen/WS2812Serial) was not ported over to work on the new Teensy T4, so I thought I would take a look.

I added the T4 support, which is now pending in a Pull Request.  During that I also found that there were errors in one of the core header files for the T4, which I also fixed, which is now pending in another Pull Request (github.com/PaulStoffregen/Cores)

After that was working, it was pointed out that the FastLED sample in the WS2812Serial librry did not compile. So...

More details in the forum Thread:
https://forum.pjrc.com/threads/58442-Non-Blocking-WS2812-LED-Library-and-Teensy-4-0
---
 platforms/arm/mxrt1062/fastled_arm_mxrt1062.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
index 0814c7fad4..5098af335a 100644
--- a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
@@ -3,6 +3,9 @@
 
 #include "fastpin_arm_mxrt1062.h"
 #include "fastspi_arm_mxrt1062.h"
+#include "../k20/octows2811_controller.h"
+#include "../k20/ws2812serial_controller.h"
+#include "../k20/smartmatrix_t3.h"
 #include "clockless_arm_mxrt1062.h"
 #include "block_clockless_arm_mxrt1062.h"
 

From 5b5376459c989a6e0af8d18ebcd75c90a80c6b4b Mon Sep 17 00:00:00 2001
From: Chris Sharp <sharpchris@gmail.com>
Date: Mon, 16 Dec 2019 19:04:42 -0500
Subject: [PATCH 106/204] Update MultiArrays.ino

Correction so that the Blue LEDs are properly blacked out
---
 examples/Multiple/MultiArrays/MultiArrays.ino | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/Multiple/MultiArrays/MultiArrays.ino b/examples/Multiple/MultiArrays/MultiArrays.ino
index 50241c394b..9d3cbb6b7f 100644
--- a/examples/Multiple/MultiArrays/MultiArrays.ino
+++ b/examples/Multiple/MultiArrays/MultiArrays.ino
@@ -33,7 +33,7 @@ void loop() {
     // clear our current dot before we move on
     redLeds[i] = CRGB::Black;
     greenLeds[i] = CRGB::Black;
-    blueLeds[i] = CRGB::Blue;
+    blueLeds[i] = CRGB::Black;
     delay(100);
   }
 
@@ -46,7 +46,7 @@ void loop() {
     // clear our current dot before we move on
     redLeds[i] = CRGB::Black;
     greenLeds[i] = CRGB::Black;
-    blueLeds[i] = CRGB::Blue;
+    blueLeds[i] = CRGB::Black;
     delay(100);
   }
 }

From 23e99173e8286e7bcddc5c2e5f9248f2209c0c14 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 18 Dec 2019 17:23:42 -0500
Subject: [PATCH 107/204] Added optional timing instrumentation to watch for
 dropped interrupts

---
 platforms/esp/32/clockless_rmt_esp32.h | 74 ++++++++++++++++++++++----
 1 file changed, 65 insertions(+), 9 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index 6368bc9328..26d7cbac96 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -114,6 +114,13 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define FASTLED_HAS_CLOCKLESS 1
 #define NUM_COLOR_CHANNELS 3
 
+// -- Set to true to print debugging information about timing
+//    Useful for finding out if timing is being messed up by other things
+//    on the processor (WiFi, for example)
+#ifndef FASTLED_RMT_SHOW_TIMER
+#define FASTLED_RMT_SHOW_TIMER false
+#endif
+
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
 #define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
@@ -192,6 +199,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t * mBuffer;
     uint16_t       mBufferSize;
 
+    // -- Timing information for debugging
+    uint32_t       mLastFillTime;
+    uint32_t       mTotalTime;
+    uint32_t       mTimeCount;
+
 public:
 
     void init()
@@ -228,9 +240,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
     void initRMT()
     {
-        // -- Only need to do this once
-        if (gInitialized) return;
-
         for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
             gOnChannel[i] = NULL;
 
@@ -278,13 +287,47 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gInitialized = true;
     }
 
+    virtual void IRAM_ATTR initTimer()
+    {
+        mLastFillTime = 0;
+        mTotalTime = 0;
+        mTimeCount = 0;
+    }
+
+    virtual void IRAM_ATTR updateTimer()
+    {
+        uint32_t current = __clock_cycles();
+        if (mLastFillTime != 0) {
+            mTotalTime += (current - mLastFillTime);
+            mTimeCount++;
+        }
+        mLastFillTime = current;
+    }
+
+    virtual void IRAM_ATTR printTimer()
+    {
+        if (mTimeCount > 0) {
+            uint32_t ave = mTotalTime / mTimeCount;
+            Serial.print("Controller on pin ");
+            Serial.print(mPin);
+            Serial.print(" : ");
+            Serial.print(ave);
+            Serial.print(" cycles per fill with ");
+            Serial.print(mTimeCount);
+            Serial.println(" fills");
+        }
+    }
+
     // -- Show pixels
     //    This is the main entry point for the controller.
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    virtual void IRAM_ATTR showPixels(PixelController<RGB_ORDER> & pixels)
     {
         if (gNumStarted == 0) {
             // -- First controller: make sure everything is set up
-            initRMT();
+            // -- Only need to do this once
+            if ( ! gInitialized) {
+                initRMT();
+            }
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
         }
 
@@ -296,7 +339,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             //    variable in the calling function, and this data structure
             //    needs to outlive this call to showPixels.
             (*mPixels) = pixels;
-        }        
+        }
+
+        // -- Keep track of timing between buffer fills, for debugging
+        initTimer();
 
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
@@ -319,6 +365,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
             xSemaphoreGive(gTX_sem);
 
+            if (FASTLED_RMT_SHOW_TIMER) {
+                for (int i = 0; i < gNumControllers; i++) {
+                    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+                    pController->printTimer();
+                }
+            }
+
             // -- Reset the counters
             gNumStarted = 0;
             gNumDone = 0;
@@ -417,6 +470,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // -- Fill both halves of the buffer
             fillHalfRMTBuffer();
             fillHalfRMTBuffer();
+            updateTimer();
 
             // -- Turn on the interrupts
             rmt_set_tx_intr_en(mRMT_channel, true);
@@ -432,7 +486,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    handler (below), or as a callback from the built-in
     //    interrupt handler. It is static because we don't know which
     //    controller is done until we look it up.
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    static void IRAM_ATTR doneOnChannel(rmt_channel_t channel, void * arg)
     {
         ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
         portBASE_TYPE HPTaskAwoken = 0;
@@ -479,6 +533,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                     // -- Refill the half of the buffer that we just finished,
                     //    allowing the other half to proceed.
                     ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+                    controller->updateTimer();
                     controller->fillHalfRMTBuffer();
                 } else {
                     // -- Transmission is complete on this channel
@@ -513,7 +568,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             byte = 0;
         }
 
-        mCurColor = (mCurColor + 1) % NUM_COLOR_CHANNELS;
+        mCurColor++;
+        if (mCurColor == NUM_COLOR_CHANNELS) mCurColor = 0;
 
         return byte;
     }
@@ -563,7 +619,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (mCurPulse >= MAX_PULSES*2) {
             mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
-        }            
+        }
     }
 };
 

From ac97d9f1efc5028cf83501821062a788266efe9c Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 26 Dec 2019 19:09:45 -0500
Subject: [PATCH 108/204] Rewriting of the fill routine with the goal of
 reducing latency and vulnerability to interrupts. One part is making the fill
 routine faster by only pushing one pixel worth of data at a time. The second
 part is waiting until all the buffers are full before starting

---
 platforms/esp/32/clockless_rmt_esp32.h | 170 +++++++++++++++----------
 1 file changed, 101 insertions(+), 69 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index 26d7cbac96..f75fbce93e 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -123,7 +123,8 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+#define MAX_PULSES         64 /* A channel has a 64 "pulse" buffer */
+#define PULSES_PER_FILL    24 /* One pixel's worth of pulses */
 
 // -- Convert ESP32 CPU cycles to RMT device cycles, taking into account the divider
 #define F_CPU_RMT                   (  80000000L)
@@ -199,11 +200,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t * mBuffer;
     uint16_t       mBufferSize;
 
-    // -- Timing information for debugging
-    uint32_t       mLastFillTime;
-    uint32_t       mTotalTime;
-    uint32_t       mTimeCount;
-
 public:
 
     void init()
@@ -262,10 +258,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             if (FASTLED_RMT_BUILTIN_DRIVER) {
                 rmt_driver_install(rmt_channel_t(i), 0, 0);
             } else {
-                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                // -- Set up the RMT to send 1 pixel of the pulse buffer and then
                 //    generate an interrupt. When we get this interrupt we
-                //    fill the other half in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+                //    fill the other part in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, PULSES_PER_FILL);
             }
         }
 
@@ -281,43 +277,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             //    strips, so it delegates to the refill function for each
             //    specific instantiation of ClocklessController.
             if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, ESP_INTR_FLAG_LEVEL3, interruptHandler, 0, &gRMT_intr_handle);
         }
 
         gInitialized = true;
     }
 
-    virtual void IRAM_ATTR initTimer()
-    {
-        mLastFillTime = 0;
-        mTotalTime = 0;
-        mTimeCount = 0;
-    }
-
-    virtual void IRAM_ATTR updateTimer()
-    {
-        uint32_t current = __clock_cycles();
-        if (mLastFillTime != 0) {
-            mTotalTime += (current - mLastFillTime);
-            mTimeCount++;
-        }
-        mLastFillTime = current;
-    }
-
-    virtual void IRAM_ATTR printTimer()
-    {
-        if (mTimeCount > 0) {
-            uint32_t ave = mTotalTime / mTimeCount;
-            Serial.print("Controller on pin ");
-            Serial.print(mPin);
-            Serial.print(" : ");
-            Serial.print(ave);
-            Serial.print(" cycles per fill with ");
-            Serial.print(mTimeCount);
-            Serial.println(" fills");
-        }
-    }
-
     // -- Show pixels
     //    This is the main entry point for the controller.
     virtual void IRAM_ATTR showPixels(PixelController<RGB_ORDER> & pixels)
@@ -341,9 +306,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             (*mPixels) = pixels;
         }
 
-        // -- Keep track of timing between buffer fills, for debugging
-        initTimer();
-
         // -- Keep track of the number of strips we've seen
         gNumStarted++;
 
@@ -359,19 +321,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 channel++;
             }
 
+            // -- Start them all
+            for (int i = 0; i < channel; i++) {
+                ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
+                rmt_tx_start(pController->mRMT_channel, true);
+            }
+
             // -- Wait here while the rest of the data is sent. The interrupt handler
             //    will keep refilling the RMT buffers until it is all sent; then it
             //    gives the semaphore back.
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
             xSemaphoreGive(gTX_sem);
 
-            if (FASTLED_RMT_SHOW_TIMER) {
-                for (int i = 0; i < gNumControllers; i++) {
-                    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
-                    pController->printTimer();
-                }
-            }
-
             // -- Reset the counters
             gNumStarted = 0;
             gNumDone = 0;
@@ -383,7 +344,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    This function is only used when the user chooses to use the
     //    built-in RMT driver, which needs all of the RMT pulses
     //    up-front.
-    virtual void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
     {
         // -- Compute the pulse values for the whole strip at once.
         //    Requires a large buffer
@@ -458,25 +419,18 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         } else {
             // -- Use our custom driver to send the data incrementally
 
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
             mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
             mCurColor = 0;
 
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-            updateTimer();
+            // -- Store 2 pixels worth of data (two "buffers" full)
+            fillNext();
+            fillNext();
 
             // -- Turn on the interrupts
             rmt_set_tx_intr_en(mRMT_channel, true);
-            
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
         }
     }
 
@@ -504,8 +458,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         } else {
             // -- Otherwise, if there are still controllers waiting, then
             //    start the next one on this channel
-            if (gNext < gNumControllers)
+            if (gNext < gNumControllers) {
                 startNext(channel);
+                // -- Start the RMT TX operation
+                //    (I'm not sure if this is necessary here)
+                rmt_tx_start(controller->mRMT_channel, true);
+            }
         }
     }
     
@@ -533,8 +491,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                     // -- Refill the half of the buffer that we just finished,
                     //    allowing the other half to proceed.
                     ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-                    controller->updateTimer();
-                    controller->fillHalfRMTBuffer();
+                    controller->fillNext();
                 } else {
                     // -- Transmission is complete on this channel
                     if (intr_st & BIT(tx_done_bit)) {
@@ -546,6 +503,58 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    // -- Fill RMT buffer
+    //    Puts one pixel's worth of data into the next 24 slots in the RMT memory
+    void IRAM_ATTR fillNext()
+    {
+        if (mPixels->has(1)) {
+            uint32_t t1 = __clock_cycles();
+            
+            uint32_t one_val = mOne.val;
+            uint32_t zero_val = mZero.val;
+
+            // -- Get a pixel's worth of data
+            uint8_t byte0 = mPixels->loadAndScale0();
+            uint8_t byte1 = mPixels->loadAndScale1();
+            uint8_t byte2 = mPixels->loadAndScale2();
+            mPixels->advanceData();
+            mPixels->stepDithering();
+
+            // -- Fill 24 slots in the RMT memory
+            register uint32_t pixel = byte0 << 24 | byte1 << 16 | byte2 << 8;
+
+            // -- Use locals for speed
+            volatile register uint32_t * pItem =  mRMT_mem_ptr;
+            register uint16_t curPulse = mCurPulse;
+            
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 24; j++) {
+                uint32_t val = (pixel & 0x80000000L) ? one_val : zero_val;
+                *pItem++ = val;
+                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+
+                pixel <<= 1;
+                curPulse++;
+
+                if (curPulse == MAX_PULSES) {
+                    pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+                    curPulse = 0;
+                }
+            }
+
+            // -- Store the new values back into the object
+            mCurPulse = curPulse;
+            mRMT_mem_ptr = pItem;
+        } else {
+            // -- No more data; signal to the RMT we are done
+            for (uint32_t j = 0; j < 8; j++) {
+                * mRMT_mem_ptr++ = 0;
+            }
+        }   
+    }
+
+    // NO LONGER USED
     uint8_t IRAM_ATTR getNextByte() __attribute__ ((always_inline))
     {
         uint8_t byte;
@@ -574,12 +583,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         return byte;
     }
 
+
+    // NO LONGER USED
     // -- Fill the RMT buffer
     //    This function fills the next 32 slots in the RMT write
     //    buffer with pixel data. It also handles the case where the
     //    pixel data is exhausted, so we need to fill the RMT buffer
     //    with zeros to signal that it's done.
-    virtual void IRAM_ATTR fillHalfRMTBuffer()
+    void IRAM_ATTR fillHalfRMTBuffer()
     {
         uint32_t one_val = mOne.val;
         uint32_t zero_val = mZero.val;
@@ -587,10 +598,31 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Convert (up to) 32 bits of the raw pixel data into
         //    into RMT pulses that encode the zeros and ones.
         int pulses = 0;
-        uint32_t byteval;
+        register uint32_t byteval;
         while (pulses < 32 && mPixels->has(1)) {
             // -- Get one byte
-            byteval = getNextByte();
+            // -- Cycle through the color channels
+            switch (mCurColor) {
+            case 0: 
+                byteval = mPixels->loadAndScale0();
+                break;
+            case 1: 
+                byteval = mPixels->loadAndScale1();
+                break;
+            case 2: 
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                break;
+            default:
+                // -- This is bad!
+                byteval = 0;
+            }
+
+            mCurColor++;
+            if (mCurColor == NUM_COLOR_CHANNELS) mCurColor = 0;
+        
+            // byteval = getNextByte();
             byteval <<= 24;
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
@@ -616,7 +648,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
         
         // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2) {
+        if (mCurPulse == MAX_PULSES) {
             mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
         }

From db30aab5de14decef17dfb98ab96ad89722683b5 Mon Sep 17 00:00:00 2001
From: Greg Mathews <gregdmathews@gmail.com>
Date: Fri, 3 Jan 2020 08:58:25 -0800
Subject: [PATCH 109/204] Underscore was missing

This was causing it to not compile on the Adafruit Feather nRF52840 Express for me.
---
 platforms/arm/nrf52/fastpin_arm_nrf52.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
index 7526000445..b761cfa859 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -290,7 +290,7 @@ template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUM
 //     _FL_DEFPIN(47, 47, 1);
 //
 
-#define FL_DEFPIN(ARDUINO_PIN, BOARD_PIN, BOARD_PORT)    \
+#define _FL_DEFPIN(ARDUINO_PIN, BOARD_PIN, BOARD_PORT)    \
     template<> class FastPin<ARDUINO_PIN> :              \
     public _ARMPIN<                                      \
         1u << (BOARD_PIN & 31u),                         \

From 9ab13502aaef22f92329741e2cbac0310ce60120 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Tue, 7 Jan 2020 22:06:58 -0500
Subject: [PATCH 110/204] Fixing the conflicts in RMT code

---
 platforms/esp/32/clockless_rmt_esp32.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index 1496dc19e6..de5b7c9840 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -123,12 +123,8 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-<<<<<<< HEAD
 #define MAX_PULSES         64 /* A channel has a 64 "pulse" buffer */
 #define PULSES_PER_FILL    24 /* One pixel's worth of pulses */
-=======
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
->>>>>>> upstream/master
 
 // -- Convert ESP32 CPU cycles to RMT device cycles, taking into account the divider
 #define F_CPU_RMT                   (  80000000L)
@@ -507,7 +503,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
-<<<<<<< HEAD
     // -- Fill RMT buffer
     //    Puts one pixel's worth of data into the next 24 slots in the RMT memory
     void IRAM_ATTR fillNext()
@@ -560,8 +555,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     }
 
     // NO LONGER USED
-=======
->>>>>>> upstream/master
     uint8_t IRAM_ATTR getNextByte() __attribute__ ((always_inline))
     {
         uint8_t byte;
@@ -655,17 +648,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
         
         // -- When we have filled the back half the buffer, reset the position to the first half
-<<<<<<< HEAD
         if (mCurPulse == MAX_PULSES) {
             mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
             mCurPulse = 0;
         }
-=======
-        if (mCurPulse >= MAX_PULSES*2) {
-            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-            mCurPulse = 0;
-        }            
->>>>>>> upstream/master
     }
 };
 

From 22339d873defdc317b988dc3a536143bd4443325 Mon Sep 17 00:00:00 2001
From: Martin Falatic <martin@falatic.com>
Date: Tue, 3 Sep 2019 00:51:03 -0700
Subject: [PATCH 111/204] Make examples consistent and document the likely
 non-RGB types

Resolves #878, #877, #928
---
 examples/Blink/Blink.ino               | 71 ++++++++++++++---------
 examples/FirstLight/FirstLight.ino     | 78 +++++++++++++++-----------
 examples/RGBCalibrate/RGBCalibrate.ino | 63 ++++++++++++++-------
 3 files changed, 132 insertions(+), 80 deletions(-)

diff --git a/examples/Blink/Blink.ino b/examples/Blink/Blink.ino
index 3364869421..443896ec07 100644
--- a/examples/Blink/Blink.ino
+++ b/examples/Blink/Blink.ino
@@ -3,9 +3,10 @@
 // How many leds in your strip?
 #define NUM_LEDS 1
 
-// For led chips like Neopixels, which have a data line, ground, and power, you just
+// For led chips like WS2812, which have a data line, ground, and power, you just
 // need to define DATA_PIN.  For led chipsets that are SPI based (four wires - data, clock,
 // ground, and power), like the LPD8806 define both DATA_PIN and CLOCK_PIN
+// Clock pin only needed for SPI based chipsets when not using hardware SPI
 #define DATA_PIN 3
 #define CLOCK_PIN 13
 
@@ -13,33 +14,47 @@
 CRGB leds[NUM_LEDS];
 
 void setup() { 
-      // Uncomment/edit one of the following lines for your leds arrangement.
-      // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);
-  	  FastLED.addLeds<NEOPIXEL, DATA_PIN>(leds, NUM_LEDS);
-      // FastLED.addLeds<APA104, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<UCS1903B, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<GW6205, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<GW6205_400, DATA_PIN, RGB>(leds, NUM_LEDS);
-      
-      // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<LPD8806, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<P9813, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<APA102, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<DOTSTAR, RGB>(leds, NUM_LEDS);
-
-      // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<P9813, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<APA102, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<DOTSTAR, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // Uncomment/edit one of the following lines for your leds arrangement.
+    // ## Clockless types ##
+    FastLED.addLeds<NEOPIXEL, DATA_PIN>(leds, NUM_LEDS);  // GRB ordering is assumed
+    // FastLED.addLeds<SM16703, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1829, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1812, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1903B, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1904, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS2903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2852, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<GS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SK6812, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<SK6822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<APA106, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<PL9823, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SK6822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2813, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<APA104, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2811_400, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GE8822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GW6205, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GW6205_400, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<LPD1886, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<LPD1886_8BIT, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // ## Clocked (SPI) types ##
+    // FastLED.addLeds<LPD6803, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2803, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<P9813, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<DOTSTAR, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<APA102, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<SK9822, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
 }
 
 void loop() { 
diff --git a/examples/FirstLight/FirstLight.ino b/examples/FirstLight/FirstLight.ino
index 9f561b08a2..8eaf4e231b 100644
--- a/examples/FirstLight/FirstLight.ino
+++ b/examples/FirstLight/FirstLight.ino
@@ -14,11 +14,12 @@
 // How many leds are in the strip?
 #define NUM_LEDS 60
 
-// Data pin that led data will be written out over
-#define DATA_PIN 3
-
+// For led chips like WS2812, which have a data line, ground, and power, you just
+// need to define DATA_PIN.  For led chipsets that are SPI based (four wires - data, clock,
+// ground, and power), like the LPD8806 define both DATA_PIN and CLOCK_PIN
 // Clock pin only needed for SPI based chipsets when not using hardware SPI
-//#define CLOCK_PIN 8
+#define DATA_PIN 3
+#define CLOCK_PIN 13
 
 // This is an array of leds.  One item for each led in your strip.
 CRGB leds[NUM_LEDS];
@@ -28,34 +29,47 @@ void setup() {
 	// sanity check delay - allows reprogramming if accidently blowing power w/leds
    	delay(2000);
 
-      // Uncomment one of the following lines for your leds arrangement.
-      // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
-      FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<NEOPIXEL, DATA_PIN>(leds, NUM_LEDS);
-      // FastLED.addLeds<APA104, DATA_PIN>(leds, NUM_LEDS);
-      // FastLED.addLeds<WS2811_400, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<GW6205, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<GW6205_400, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<UCS1903B, DATA_PIN, RGB>(leds, NUM_LEDS);
-
-      // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<LPD8806, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<P9813, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<APA102, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<DOTSTAR, RGB>(leds, NUM_LEDS);
-      
-      // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<P9813, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<APA102, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-      // FastLED.addLeds<DOTSTAR, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // Uncomment/edit one of the following lines for your leds arrangement.
+    // ## Clockless types ##
+    // FastLED.addLeds<NEOPIXEL, DATA_PIN>(leds, NUM_LEDS);  // GRB ordering is assumed
+    // FastLED.addLeds<SM16703, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1829, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1812, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1903B, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1904, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS2903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2852, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<GS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SK6812, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<SK6822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<APA106, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<PL9823, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SK6822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2813, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<APA104, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2811_400, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GE8822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GW6205, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GW6205_400, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<LPD1886, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<LPD1886_8BIT, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // ## Clocked (SPI) types ##
+    // FastLED.addLeds<LPD6803, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2803, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<P9813, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<DOTSTAR, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<APA102, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<SK9822, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
 }
 
 // This function runs over and over, and is where you do the magic to light
diff --git a/examples/RGBCalibrate/RGBCalibrate.ino b/examples/RGBCalibrate/RGBCalibrate.ino
index 72e21507b2..5ff33805a2 100644
--- a/examples/RGBCalibrate/RGBCalibrate.ino
+++ b/examples/RGBCalibrate/RGBCalibrate.ino
@@ -23,12 +23,14 @@
 //
 //////////////////////////////////////////////////
 
-#define NUM_LEDS 6
+#define NUM_LEDS 7
 
-// Data pin that led data will be written out over
-#define DATA_PIN 6
+// For led chips like WS2812, which have a data line, ground, and power, you just
+// need to define DATA_PIN.  For led chipsets that are SPI based (four wires - data, clock,
+// ground, and power), like the LPD8806 define both DATA_PIN and CLOCK_PIN
 // Clock pin only needed for SPI based chipsets when not using hardware SPI
-//#define CLOCK_PIN 8
+#define DATA_PIN 3
+#define CLOCK_PIN 13
 
 CRGB leds[NUM_LEDS];
 
@@ -36,27 +38,48 @@ void setup() {
     // sanity check delay - allows reprogramming if accidently blowing power w/leds
     delay(2000);
 
-    // Uncomment one of the following lines for your leds arrangement.
-    // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // Uncomment/edit one of the following lines for your leds arrangement.
+    // ## Clockless types ##
+    // FastLED.addLeds<SM16703, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1829, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1812, DATA_PIN, RGB>(leds, NUM_LEDS);
     // FastLED.addLeds<TM1809, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1804, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<TM1803, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1903B, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS1904, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<UCS2903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2852, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<WS2812B, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<GS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SK6812, DATA_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<SK6822, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<APA106, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<PL9823, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<SK6822, DATA_PIN, RGB>(leds, NUM_LEDS);
     // FastLED.addLeds<WS2811, DATA_PIN, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<WS2812, DATA_PIN, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<WS2812B, DATA_PIN, GRB>(leds, NUM_LEDS);
-    // FastLED.setBrightness(CRGB(255,255,255));
+    // FastLED.addLeds<WS2813, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<APA104, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2811_400, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<GE8822, DATA_PIN, RGB>(leds, NUM_LEDS);
     // FastLED.addLeds<GW6205, DATA_PIN, RGB>(leds, NUM_LEDS);
     // FastLED.addLeds<GW6205_400, DATA_PIN, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<UCS1903, DATA_PIN, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<UCS1903B, DATA_PIN, RGB>(leds, NUM_LEDS);
-
-    // FastLED.addLeds<WS2801, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<SM16716, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<LPD8806, 9, 10, RGB>(leds, NUM_LEDS);
-    FastLED.addLeds<LPD6803, RGB>(leds, NUM_LEDS);
-
+    // FastLED.addLeds<LPD1886, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<LPD1886_8BIT, DATA_PIN, RGB>(leds, NUM_LEDS);
+    // ## Clocked (SPI) types ##
+    // FastLED.addLeds<LPD6803, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
+    // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // GRB ordering is typical
     // FastLED.addLeds<WS2801, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<WS2803, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
     // FastLED.addLeds<SM16716, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
-    // FastLED.addLeds<LPD8806, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);
+    // FastLED.addLeds<P9813, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<DOTSTAR, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<APA102, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+    // FastLED.addLeds<SK9822, DATA_PIN, CLOCK_PIN, RGB>(leds, NUM_LEDS);  // BGR ordering is typical
+
+    // FastLED.setBrightness(CRGB(255,255,255));
 }
 
 void loop() {
@@ -69,4 +92,4 @@ void loop() {
     leds[6] = CRGB(0,0,0);
     FastLED.show();
     delay(1000);
-}
\ No newline at end of file
+}

From f140cdaaa5e34796cfcf9ed3a54d61328e78cc5b Mon Sep 17 00:00:00 2001
From: Ampt <Ampt@users.noreply.github.com>
Date: Wed, 8 Jan 2020 17:34:03 -0600
Subject: [PATCH 112/204] Fix Nano 33 IOT pindef

The pins here were based on an early draft of the Nano 33 IOT package where the pin definitions were mislabeled in the comments - https://github.com/arduino/ArduinoCore-samd/blob/08629f90b1f803017cc526c4cbbe3eaaccc8f062/variants/nano_33_iot/variant.cpp

This has recently been fixed -
https://github.com/arduino/ArduinoCore-samd/blob/master/variants/nano_33_iot/variant.cpp

This change uses the correct pin definitions.
---
 platforms/arm/d21/fastpin_arm_d21.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index 84b0738c3b..3974a74136 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -171,17 +171,17 @@ _FL_DEFPIN( 20,  6, 0); _FL_DEFPIN( 21,  7, 0);
 
 #elif defined(ARDUINO_SAMD_NANO_33_IOT)
 
-#define MAX_PIN 25
-_FL_DEFPIN(  0, 11, 0); _FL_DEFPIN(  1, 10, 0); _FL_DEFPIN(  2, 14, 0); _FL_DEFPIN(  3,  9, 0);
-_FL_DEFPIN(  4,  8, 0); _FL_DEFPIN(  5, 15, 0); _FL_DEFPIN(  6, 20, 0); _FL_DEFPIN(  7, 21, 0);
-_FL_DEFPIN(  8,  6, 0); _FL_DEFPIN(  9,  7, 0); _FL_DEFPIN( 10, 18, 0); _FL_DEFPIN( 11, 16, 0);
-_FL_DEFPIN( 12, 19, 0); _FL_DEFPIN( 13, 17, 0); _FL_DEFPIN( 14,  2, 0); _FL_DEFPIN( 15,  8, 1);
-_FL_DEFPIN( 16,  9, 1); _FL_DEFPIN( 17,  4, 0); _FL_DEFPIN( 18,  5, 0); _FL_DEFPIN( 19,  2, 1);
-_FL_DEFPIN( 20, 22, 0); _FL_DEFPIN( 21, 23, 0); _FL_DEFPIN( 22, 12, 0); _FL_DEFPIN( 23, 10, 1);
-_FL_DEFPIN( 24, 11, 1);
-
-#define SPI_DATA 23
-#define SPI_CLOCK 24
+#define MAX_PIN 26
+_FL_DEFPIN(  0, 23, 1); _FL_DEFPIN(  1, 22, 1); _FL_DEFPIN(  2, 10, 1); _FL_DEFPIN(  3, 11, 1);
+_FL_DEFPIN(  4,  7, 0); _FL_DEFPIN(  5,  5, 0); _FL_DEFPIN(  6,  4, 0); _FL_DEFPIN(  7,  6, 0);
+_FL_DEFPIN(  8, 18, 0); _FL_DEFPIN(  9, 20, 0); _FL_DEFPIN( 10, 21, 0); _FL_DEFPIN( 11, 16, 0);
+_FL_DEFPIN( 12, 19, 0); _FL_DEFPIN( 13, 17, 0); _FL_DEFPIN( 14,  2, 0); _FL_DEFPIN( 15,  2, 1);
+_FL_DEFPIN( 16, 11, 1); _FL_DEFPIN( 17, 10, 0); _FL_DEFPIN( 18,  8, 1); _FL_DEFPIN( 19,  9, 1);
+_FL_DEFPIN( 20,  9, 0); _FL_DEFPIN( 21,  3, 1); _FL_DEFPIN( 22, 12, 0); _FL_DEFPIN( 23, 13, 0);
+_FL_DEFPIN( 24, 14, 0); _FL_DEFPIN( 25, 15, 0);
+
+#define SPI_DATA 22
+#define SPI_CLOCK 25
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 

From 9935457511dd2f1b8fa456c91807133b4ef5caaa Mon Sep 17 00:00:00 2001
From: Andrew Morgan <andrew@amorgan.xyz>
Date: Tue, 21 Jan 2020 17:28:43 +0000
Subject: [PATCH 113/204] Add platform pin mappings for Arduino MKR WiFi 1010

---
 platforms/arm/d21/fastpin_arm_d21.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index 3974a74136..fac0185806 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -154,7 +154,7 @@ _FL_DEFPIN( 20,  8, 0); _FL_DEFPIN( 21,  9, 0); _FL_DEFPIN( 22, 10, 0); _FL_DEFP
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
-#elif defined(ARDUINO_SAMD_MKR1000)
+#elif defined(ARDUINO_SAMD_MKR1000) || defined(ARDUINO_SAMD_MKRWIFI1010)
 
 #define MAX_PIN 22
 _FL_DEFPIN(  0, 22, 0); _FL_DEFPIN(  1, 23, 0); _FL_DEFPIN(  2, 10, 0); _FL_DEFPIN(  3, 11, 0);

From 602a8577762a367d3d766c99662745660e0c6feb Mon Sep 17 00:00:00 2001
From: 8633brown <toddbrown.8633@yahoo.co.uk>
Date: Fri, 24 Jan 2020 16:10:37 -0700
Subject: [PATCH 114/204] add include for arduino library re:
 https://github.com/arduino/Arduino/wiki/Arduino-IDE-1.5:-Library-specification

---
 library.properties | 1 +
 1 file changed, 1 insertion(+)

diff --git a/library.properties b/library.properties
index 31175c5506..1cb2d08a96 100644
--- a/library.properties
+++ b/library.properties
@@ -7,3 +7,4 @@ paragraph=Multi-platform library for controlling dozens of different types of LE
 category=Display
 url=https://github.com/FastLED/FastLED
 architectures=*
+includes=FastLED.h

From 6baac0180da1036c589cc515a7a331fed56b46cc Mon Sep 17 00:00:00 2001
From: 8633brown <31827535+8633brown@users.noreply.github.com>
Date: Sun, 26 Jan 2020 14:23:57 -0700
Subject: [PATCH 115/204] wrong comment location

---
 power_mgt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/power_mgt.h b/power_mgt.h
index f156f7a251..6871881859 100644
--- a/power_mgt.h
+++ b/power_mgt.h
@@ -21,11 +21,11 @@ FASTLED_NAMESPACE_BEGIN
 /// @deprecated - use FastLED.setMaxPowerInVoltsAndMilliamps()
 void set_max_power_in_volts_and_milliamps( uint8_t volts, uint32_t milliamps);
 /// Set the maximum power used in watts
+/// @deprecated - use FastLED.setMaxPowerInMilliWatts
 void set_max_power_in_milliwatts( uint32_t powerInmW);
 
-/// Select a ping with an led that will be flashed to indicate that power management
+/// Select a pin with an led that will be flashed to indicate that power management
 /// is pulling down the brightness
-/// @deprecated - use FastLED.setMaxPowerInMilliWatts
 void set_max_power_indicator_LED( uint8_t pinNumber); // zero = no indicator LED
 
 

From 9c5e2ed3515b04ef3506dd19ffd51f37aeeea74c Mon Sep 17 00:00:00 2001
From: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>
Date: Tue, 28 Jan 2020 16:38:08 -0500
Subject: [PATCH 116/204] Explicitly check for __AVR__ before including AVR
 code.  Give a more meaningful error message if the platform is not
 recognized.

---
 led_sysdefs.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/led_sysdefs.h b/led_sysdefs.h
index 27da24a043..04afef9a97 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -32,9 +32,22 @@
 #include "platforms/esp/8266/led_sysdefs_esp8266.h"
 #elif defined(ESP32)
 #include "platforms/esp/32/led_sysdefs_esp32.h"
-#else
+#elif defined(__AVR__)
 // AVR platforms
 #include "platforms/avr/led_sysdefs_avr.h"
+#else
+//
+// We got here because we don't recognize the platform that you're
+// trying to compile for: it's not AVR, or an ESP or ARM that we recognize.
+//
+// If you're reading this because you got the error below,
+// and if this new platform is just a minor variant of an
+// existing supported ARM platform, you may be able to add
+// a new 'defined(XXX)' selector in the apporpriate code above.
+//
+// If this platform is a new microcontroller, see "PORTING.md".
+//
+#error "This platform isn't recognized by FastLED... yet.  See comments in FastLED/led_sysdefs.h for options."
 #endif
 
 #ifndef FASTLED_NAMESPACE_BEGIN

From b7e967263943a56b246d2c7b81e85d99a1c94322 Mon Sep 17 00:00:00 2001
From: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>
Date: Tue, 28 Jan 2020 16:49:44 -0500
Subject: [PATCH 117/204] Make switch/case 'fallthrough' explicit, to silence
 some compiler warnings on AVR

---
 platforms/avr/clockless_trinket.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index 69f33d6aff..312e9a880f 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -322,6 +322,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define DADVANCE 3
 #define DUSE (0xFF - (DADVANCE-1))
 
+// Silence compiler warnings about switch/case that is explicitly intended to fall through.
+#define FL_FALLTHROUGH __attribute__ ((fallthrough));
+
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
 	// gcc will use register Y for the this pointer.
 	static void /*__attribute__((optimize("O0")))*/  /*__attribute__ ((always_inline))*/  showRGBInternal(PixelController<RGB_ORDER> & pixels)  {
@@ -403,9 +406,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC14(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 				MOV_ADDDE14(b0,b1,d1,e1) _D2(4) LO1 _D3(0)
@@ -419,9 +422,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC24(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 
@@ -438,9 +441,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC04(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 				MOV_ADDDE04(b0,b1,d0,e0) _D2(4) LO1 _D3(5)

From 5608ecf7771274a9724d72bc848c33e0026dad7b Mon Sep 17 00:00:00 2001
From: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>
Date: Wed, 29 Jan 2020 19:55:42 -0500
Subject: [PATCH 118/204] Prevent divide-by-zero, fixes #881.  Thanks to
 @limpens and @andjoeg for help with this.

---
 FastLED.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/FastLED.cpp b/FastLED.cpp
index 94f2302124..bfcb73c4b4 100644
--- a/FastLED.cpp
+++ b/FastLED.cpp
@@ -208,9 +208,12 @@ void CFastLED::countFPS(int nFrames) {
   static uint32_t lastframe = 0; // millis();
 
   if(br++ >= nFrames) {
-		uint32_t now = millis();
-		now -= lastframe;
-		m_nFPS = (br * 1000) / now;
+      uint32_t now = millis();
+      now -= lastframe;
+      if( now == 0 ) {
+          now = 1; // prevent division by zero below
+      }
+      m_nFPS = (br * 1000) / now;
     br = 0;
     lastframe = millis();
   }

From ced802e5a418e4b3e5f9f5e7872d9c66e42b5d8d Mon Sep 17 00:00:00 2001
From: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>
Date: Thu, 30 Jan 2020 13:50:54 -0500
Subject: [PATCH 119/204] Adding Pacifica (ocean waves), Pride2015 (rainbows),
 and TwinkleFox (holiday lights) to the examples directory.

---
 examples/Pacifica/Pacifica.ino     | 152 ++++++++++++
 examples/Pride2015/Pride2015.ino   |  82 ++++++
 examples/TwinkleFox/TwinkleFox.ino | 383 +++++++++++++++++++++++++++++
 3 files changed, 617 insertions(+)
 create mode 100644 examples/Pacifica/Pacifica.ino
 create mode 100644 examples/Pride2015/Pride2015.ino
 create mode 100644 examples/TwinkleFox/TwinkleFox.ino

diff --git a/examples/Pacifica/Pacifica.ino b/examples/Pacifica/Pacifica.ino
new file mode 100644
index 0000000000..29ac9fcc74
--- /dev/null
+++ b/examples/Pacifica/Pacifica.ino
@@ -0,0 +1,152 @@
+//
+//  "Pacifica"
+//  Gentle, blue-green ocean waves.
+//  December 2019, Mark Kriegsman and Mary Corey March.
+//  For Dan.
+//
+
+#define FASTLED_ALLOW_INTERRUPTS 0
+#include <FastLED.h>
+FASTLED_USING_NAMESPACE
+
+#define DATA_PIN            3
+#define NUM_LEDS            60
+#define MAX_POWER_MILLIAMPS 500
+#define LED_TYPE            WS2812B
+#define COLOR_ORDER         GRB
+
+//////////////////////////////////////////////////////////////////////////
+
+CRGB leds[NUM_LEDS];
+
+void setup() {
+  delay( 3000); // 3 second delay for boot recovery, and a moment of silence
+  FastLED.addLeds<LED_TYPE,DATA_PIN,COLOR_ORDER>(leds, NUM_LEDS)
+        .setCorrection( TypicalLEDStrip );
+  FastLED.setMaxPowerInVoltsAndMilliamps( 5, MAX_POWER_MILLIAMPS);
+}
+
+void loop()
+{
+  EVERY_N_MILLISECONDS( 20) {
+    pacifica_loop();
+    FastLED.show();
+  }
+}
+
+//////////////////////////////////////////////////////////////////////////
+//
+// The code for this animation is more complicated than other examples, and 
+// while it is "ready to run", and documented in general, it is probably not 
+// the best starting point for learning.  Nevertheless, it does illustrate some
+// useful techniques.
+//
+//////////////////////////////////////////////////////////////////////////
+//
+// In this animation, there are four "layers" of waves of light.  
+//
+// Each layer moves independently, and each is scaled separately.
+//
+// All four wave layers are added together on top of each other, and then 
+// another filter is applied that adds "whitecaps" of brightness where the 
+// waves line up with each other more.  Finally, another pass is taken
+// over the led array to 'deepen' (dim) the blues and greens.
+//
+// The speed and scale and motion each layer varies slowly within independent 
+// hand-chosen ranges, which is why the code has a lot of low-speed 'beatsin8' functions
+// with a lot of oddly specific numeric ranges.
+//
+// These three custom blue-green color palettes were inspired by the colors found in
+// the waters off the southern coast of California, https://goo.gl/maps/QQgd97jjHesHZVxQ7
+//
+CRGBPalette16 pacifica_palette_1 = 
+    { 0x000507, 0x000409, 0x00030B, 0x00030D, 0x000210, 0x000212, 0x000114, 0x000117, 
+      0x000019, 0x00001C, 0x000026, 0x000031, 0x00003B, 0x000046, 0x14554B, 0x28AA50 };
+CRGBPalette16 pacifica_palette_2 = 
+    { 0x000507, 0x000409, 0x00030B, 0x00030D, 0x000210, 0x000212, 0x000114, 0x000117, 
+      0x000019, 0x00001C, 0x000026, 0x000031, 0x00003B, 0x000046, 0x0C5F52, 0x19BE5F };
+CRGBPalette16 pacifica_palette_3 = 
+    { 0x000208, 0x00030E, 0x000514, 0x00061A, 0x000820, 0x000927, 0x000B2D, 0x000C33, 
+      0x000E39, 0x001040, 0x001450, 0x001860, 0x001C70, 0x002080, 0x1040BF, 0x2060FF };
+
+
+void pacifica_loop()
+{
+  // Increment the four "color index start" counters, one for each wave layer.
+  // Each is incremented at a different speed, and the speeds vary over time.
+  static uint16_t sCIStart1, sCIStart2, sCIStart3, sCIStart4;
+  static uint32_t sLastms = 0;
+  uint32_t ms = GET_MILLIS();
+  uint32_t deltams = ms - sLastms;
+  sLastms = ms;
+  uint16_t speedfactor1 = beatsin16(3, 179, 269);
+  uint16_t speedfactor2 = beatsin16(4, 179, 269);
+  uint32_t deltams1 = (deltams * speedfactor1) / 256;
+  uint32_t deltams2 = (deltams * speedfactor2) / 256;
+  uint32_t deltams21 = (deltams1 + deltams2) / 2;
+  sCIStart1 += (deltams1 * beatsin88(1011,10,13));
+  sCIStart2 -= (deltams21 * beatsin88(777,8,11));
+  sCIStart3 -= (deltams1 * beatsin88(501,5,7));
+  sCIStart4 -= (deltams2 * beatsin88(257,4,6));
+
+  // Clear out the LED array to a dim background blue-green
+  fill_solid( leds, NUM_LEDS, CRGB( 2, 6, 10));
+
+  // Render each of four layers, with different scales and speeds, that vary over time
+  pacifica_one_layer( pacifica_palette_1, sCIStart1, beatsin16( 3, 11 * 256, 14 * 256), beatsin8( 10, 70, 130), 0-beat16( 301) );
+  pacifica_one_layer( pacifica_palette_2, sCIStart2, beatsin16( 4,  6 * 256,  9 * 256), beatsin8( 17, 40,  80), beat16( 401) );
+  pacifica_one_layer( pacifica_palette_3, sCIStart3, 6 * 256, beatsin8( 9, 10,38), 0-beat16(503));
+  pacifica_one_layer( pacifica_palette_3, sCIStart4, 5 * 256, beatsin8( 8, 10,28), beat16(601));
+
+  // Add brighter 'whitecaps' where the waves lines up more
+  pacifica_add_whitecaps();
+
+  // Deepen the blues and greens a bit
+  pacifica_deepen_colors();
+}
+
+// Add one layer of waves into the led array
+void pacifica_one_layer( CRGBPalette16& p, uint16_t cistart, uint16_t wavescale, uint8_t bri, uint16_t ioff)
+{
+  uint16_t ci = cistart;
+  uint16_t waveangle = ioff;
+  uint16_t wavescale_half = (wavescale / 2) + 20;
+  for( uint16_t i = 0; i < NUM_LEDS; i++) {
+    waveangle += 250;
+    uint16_t s16 = sin16( waveangle ) + 32768;
+    uint16_t cs = scale16( s16 , wavescale_half ) + wavescale_half;
+    ci += cs;
+    uint16_t sindex16 = sin16( ci) + 32768;
+    uint8_t sindex8 = scale16( sindex16, 240);
+    CRGB c = ColorFromPalette( p, sindex8, bri, LINEARBLEND);
+    leds[i] += c;
+  }
+}
+
+// Add extra 'white' to areas where the four layers of light have lined up brightly
+void pacifica_add_whitecaps()
+{
+  uint8_t basethreshold = beatsin8( 9, 55, 65);
+  uint8_t wave = beat8( 7 );
+  
+  for( uint16_t i = 0; i < NUM_LEDS; i++) {
+    uint8_t threshold = scale8( sin8( wave), 20) + basethreshold;
+    wave += 7;
+    uint8_t l = leds[i].getAverageLight();
+    if( l > threshold) {
+      uint8_t overage = l - threshold;
+      uint8_t overage2 = qadd8( overage, overage);
+      leds[i] += CRGB( overage, overage2, qadd8( overage2, overage2));
+    }
+  }
+}
+
+// Deepen the blues and greens
+void pacifica_deepen_colors()
+{
+  for( uint16_t i = 0; i < NUM_LEDS; i++) {
+    leds[i].blue = scale8( leds[i].blue,  145); 
+    leds[i].green= scale8( leds[i].green, 200); 
+    leds[i] |= CRGB( 2, 5, 7);
+  }
+}
diff --git a/examples/Pride2015/Pride2015.ino b/examples/Pride2015/Pride2015.ino
new file mode 100644
index 0000000000..0fbd3a5bce
--- /dev/null
+++ b/examples/Pride2015/Pride2015.ino
@@ -0,0 +1,82 @@
+#include "FastLED.h"
+
+// Pride2015
+// Animated, ever-changing rainbows.
+// by Mark Kriegsman
+
+#if FASTLED_VERSION < 3001000
+#error "Requires FastLED 3.1 or later; check github for latest code."
+#endif
+
+#define DATA_PIN    3
+//#define CLK_PIN   4
+#define LED_TYPE    WS2811
+#define COLOR_ORDER GRB
+#define NUM_LEDS    200
+#define BRIGHTNESS  255
+
+CRGB leds[NUM_LEDS];
+
+
+void setup() {
+  delay(3000); // 3 second delay for recovery
+  
+  // tell FastLED about the LED strip configuration
+  FastLED.addLeds<LED_TYPE,DATA_PIN,COLOR_ORDER>(leds, NUM_LEDS)
+    .setCorrection(TypicalLEDStrip)
+    .setDither(BRIGHTNESS < 255);
+
+  // set master brightness control
+  FastLED.setBrightness(BRIGHTNESS);
+}
+
+
+void loop()
+{
+  pride();
+  FastLED.show();  
+}
+
+
+// This function draws rainbows with an ever-changing,
+// widely-varying set of parameters.
+void pride() 
+{
+  static uint16_t sPseudotime = 0;
+  static uint16_t sLastMillis = 0;
+  static uint16_t sHue16 = 0;
+ 
+  uint8_t sat8 = beatsin88( 87, 220, 250);
+  uint8_t brightdepth = beatsin88( 341, 96, 224);
+  uint16_t brightnessthetainc16 = beatsin88( 203, (25 * 256), (40 * 256));
+  uint8_t msmultiplier = beatsin88(147, 23, 60);
+
+  uint16_t hue16 = sHue16;//gHue * 256;
+  uint16_t hueinc16 = beatsin88(113, 1, 3000);
+  
+  uint16_t ms = millis();
+  uint16_t deltams = ms - sLastMillis ;
+  sLastMillis  = ms;
+  sPseudotime += deltams * msmultiplier;
+  sHue16 += deltams * beatsin88( 400, 5,9);
+  uint16_t brightnesstheta16 = sPseudotime;
+  
+  for( uint16_t i = 0 ; i < NUM_LEDS; i++) {
+    hue16 += hueinc16;
+    uint8_t hue8 = hue16 / 256;
+
+    brightnesstheta16  += brightnessthetainc16;
+    uint16_t b16 = sin16( brightnesstheta16  ) + 32768;
+
+    uint16_t bri16 = (uint32_t)((uint32_t)b16 * (uint32_t)b16) / 65536;
+    uint8_t bri8 = (uint32_t)(((uint32_t)bri16) * brightdepth) / 65536;
+    bri8 += (255 - brightdepth);
+    
+    CRGB newcolor = CHSV( hue8, sat8, bri8);
+    
+    uint16_t pixelnumber = i;
+    pixelnumber = (NUM_LEDS-1) - pixelnumber;
+    
+    nblend( leds[pixelnumber], newcolor, 64);
+  }
+}
diff --git a/examples/TwinkleFox/TwinkleFox.ino b/examples/TwinkleFox/TwinkleFox.ino
new file mode 100644
index 0000000000..4821139bd6
--- /dev/null
+++ b/examples/TwinkleFox/TwinkleFox.ino
@@ -0,0 +1,383 @@
+#include "FastLED.h"
+
+#if defined(FASTLED_VERSION) && (FASTLED_VERSION < 3001000)
+#warning "Requires FastLED 3.1 or later; check github for latest code."
+#endif
+
+
+#define NUM_LEDS      100
+#define LED_TYPE   WS2811
+#define COLOR_ORDER   GRB
+#define DATA_PIN        3
+//#define CLK_PIN       4
+#define VOLTS          12
+#define MAX_MA       4000
+
+//  TwinkleFOX: Twinkling 'holiday' lights that fade in and out.
+//  Colors are chosen from a palette; a few palettes are provided.
+//
+//  This December 2015 implementation improves on the December 2014 version
+//  in several ways:
+//  - smoother fading, compatible with any colors and any palettes
+//  - easier control of twinkle speed and twinkle density
+//  - supports an optional 'background color'
+//  - takes even less RAM: zero RAM overhead per pixel
+//  - illustrates a couple of interesting techniques (uh oh...)
+//
+//  The idea behind this (new) implementation is that there's one
+//  basic, repeating pattern that each pixel follows like a waveform:
+//  The brightness rises from 0..255 and then falls back down to 0.
+//  The brightness at any given point in time can be determined as
+//  as a function of time, for example:
+//    brightness = sine( time ); // a sine wave of brightness over time
+//
+//  So the way this implementation works is that every pixel follows
+//  the exact same wave function over time.  In this particular case,
+//  I chose a sawtooth triangle wave (triwave8) rather than a sine wave,
+//  but the idea is the same: brightness = triwave8( time ).  
+//  
+//  Of course, if all the pixels used the exact same wave form, and 
+//  if they all used the exact same 'clock' for their 'time base', all
+//  the pixels would brighten and dim at once -- which does not look
+//  like twinkling at all.
+//
+//  So to achieve random-looking twinkling, each pixel is given a 
+//  slightly different 'clock' signal.  Some of the clocks run faster, 
+//  some run slower, and each 'clock' also has a random offset from zero.
+//  The net result is that the 'clocks' for all the pixels are always out 
+//  of sync from each other, producing a nice random distribution
+//  of twinkles.
+//
+//  The 'clock speed adjustment' and 'time offset' for each pixel
+//  are generated randomly.  One (normal) approach to implementing that
+//  would be to randomly generate the clock parameters for each pixel 
+//  at startup, and store them in some arrays.  However, that consumes
+//  a great deal of precious RAM, and it turns out to be totally
+//  unnessary!  If the random number generate is 'seeded' with the
+//  same starting value every time, it will generate the same sequence
+//  of values every time.  So the clock adjustment parameters for each
+//  pixel are 'stored' in a pseudo-random number generator!  The PRNG 
+//  is reset, and then the first numbers out of it are the clock 
+//  adjustment parameters for the first pixel, the second numbers out
+//  of it are the parameters for the second pixel, and so on.
+//  In this way, we can 'store' a stable sequence of thousands of
+//  random clock adjustment parameters in literally two bytes of RAM.
+//
+//  There's a little bit of fixed-point math involved in applying the
+//  clock speed adjustments, which are expressed in eighths.  Each pixel's
+//  clock speed ranges from 8/8ths of the system clock (i.e. 1x) to
+//  23/8ths of the system clock (i.e. nearly 3x).
+//
+//  On a basic Arduino Uno or Leonardo, this code can twinkle 300+ pixels
+//  smoothly at over 50 updates per seond.
+//
+//  -Mark Kriegsman, December 2015
+
+CRGBArray<NUM_LEDS> leds;
+
+// Overall twinkle speed.
+// 0 (VERY slow) to 8 (VERY fast).  
+// 4, 5, and 6 are recommended, default is 4.
+#define TWINKLE_SPEED 4
+
+// Overall twinkle density.
+// 0 (NONE lit) to 8 (ALL lit at once).  
+// Default is 5.
+#define TWINKLE_DENSITY 5
+
+// How often to change color palettes.
+#define SECONDS_PER_PALETTE  30
+// Also: toward the bottom of the file is an array 
+// called "ActivePaletteList" which controls which color
+// palettes are used; you can add or remove color palettes
+// from there freely.
+
+// Background color for 'unlit' pixels
+// Can be set to CRGB::Black if desired.
+CRGB gBackgroundColor = CRGB::Black; 
+// Example of dim incandescent fairy light background color
+// CRGB gBackgroundColor = CRGB(CRGB::FairyLight).nscale8_video(16);
+
+// If AUTO_SELECT_BACKGROUND_COLOR is set to 1,
+// then for any palette where the first two entries 
+// are the same, a dimmed version of that color will
+// automatically be used as the background color.
+#define AUTO_SELECT_BACKGROUND_COLOR 0
+
+// If COOL_LIKE_INCANDESCENT is set to 1, colors will 
+// fade out slighted 'reddened', similar to how
+// incandescent bulbs change color as they get dim down.
+#define COOL_LIKE_INCANDESCENT 1
+
+
+CRGBPalette16 gCurrentPalette;
+CRGBPalette16 gTargetPalette;
+
+void setup() {
+  delay( 3000 ); //safety startup delay
+  FastLED.setMaxPowerInVoltsAndMilliamps( VOLTS, MAX_MA);
+  FastLED.addLeds<LED_TYPE,DATA_PIN,COLOR_ORDER>(leds, NUM_LEDS)
+    .setCorrection(TypicalLEDStrip);
+
+  chooseNextColorPalette(gTargetPalette);
+}
+
+
+void loop()
+{
+  EVERY_N_SECONDS( SECONDS_PER_PALETTE ) { 
+    chooseNextColorPalette( gTargetPalette ); 
+  }
+  
+  EVERY_N_MILLISECONDS( 10 ) {
+    nblendPaletteTowardPalette( gCurrentPalette, gTargetPalette, 12);
+  }
+
+  drawTwinkles( leds);
+  
+  FastLED.show();
+}
+
+
+//  This function loops over each pixel, calculates the 
+//  adjusted 'clock' that this pixel should use, and calls 
+//  "CalculateOneTwinkle" on each pixel.  It then displays
+//  either the twinkle color of the background color, 
+//  whichever is brighter.
+void drawTwinkles( CRGBSet& L)
+{
+  // "PRNG16" is the pseudorandom number generator
+  // It MUST be reset to the same starting value each time
+  // this function is called, so that the sequence of 'random'
+  // numbers that it generates is (paradoxically) stable.
+  uint16_t PRNG16 = 11337;
+  
+  uint32_t clock32 = millis();
+
+  // Set up the background color, "bg".
+  // if AUTO_SELECT_BACKGROUND_COLOR == 1, and the first two colors of
+  // the current palette are identical, then a deeply faded version of
+  // that color is used for the background color
+  CRGB bg;
+  if( (AUTO_SELECT_BACKGROUND_COLOR == 1) &&
+      (gCurrentPalette[0] == gCurrentPalette[1] )) {
+    bg = gCurrentPalette[0];
+    uint8_t bglight = bg.getAverageLight();
+    if( bglight > 64) {
+      bg.nscale8_video( 16); // very bright, so scale to 1/16th
+    } else if( bglight > 16) {
+      bg.nscale8_video( 64); // not that bright, so scale to 1/4th
+    } else {
+      bg.nscale8_video( 86); // dim, scale to 1/3rd.
+    }
+  } else {
+    bg = gBackgroundColor; // just use the explicitly defined background color
+  }
+
+  uint8_t backgroundBrightness = bg.getAverageLight();
+  
+  for( CRGB& pixel: L) {
+    PRNG16 = (uint16_t)(PRNG16 * 2053) + 1384; // next 'random' number
+    uint16_t myclockoffset16= PRNG16; // use that number as clock offset
+    PRNG16 = (uint16_t)(PRNG16 * 2053) + 1384; // next 'random' number
+    // use that number as clock speed adjustment factor (in 8ths, from 8/8ths to 23/8ths)
+    uint8_t myspeedmultiplierQ5_3 =  ((((PRNG16 & 0xFF)>>4) + (PRNG16 & 0x0F)) & 0x0F) + 0x08;
+    uint32_t myclock30 = (uint32_t)((clock32 * myspeedmultiplierQ5_3) >> 3) + myclockoffset16;
+    uint8_t  myunique8 = PRNG16 >> 8; // get 'salt' value for this pixel
+
+    // We now have the adjusted 'clock' for this pixel, now we call
+    // the function that computes what color the pixel should be based
+    // on the "brightness = f( time )" idea.
+    CRGB c = computeOneTwinkle( myclock30, myunique8);
+
+    uint8_t cbright = c.getAverageLight();
+    int16_t deltabright = cbright - backgroundBrightness;
+    if( deltabright >= 32 || (!bg)) {
+      // If the new pixel is significantly brighter than the background color, 
+      // use the new color.
+      pixel = c;
+    } else if( deltabright > 0 ) {
+      // If the new pixel is just slightly brighter than the background color,
+      // mix a blend of the new color and the background color
+      pixel = blend( bg, c, deltabright * 8);
+    } else { 
+      // if the new pixel is not at all brighter than the background color,
+      // just use the background color.
+      pixel = bg;
+    }
+  }
+}
+
+
+//  This function takes a time in pseudo-milliseconds,
+//  figures out brightness = f( time ), and also hue = f( time )
+//  The 'low digits' of the millisecond time are used as 
+//  input to the brightness wave function.  
+//  The 'high digits' are used to select a color, so that the color
+//  does not change over the course of the fade-in, fade-out
+//  of one cycle of the brightness wave function.
+//  The 'high digits' are also used to determine whether this pixel
+//  should light at all during this cycle, based on the TWINKLE_DENSITY.
+CRGB computeOneTwinkle( uint32_t ms, uint8_t salt)
+{
+  uint16_t ticks = ms >> (8-TWINKLE_SPEED);
+  uint8_t fastcycle8 = ticks;
+  uint16_t slowcycle16 = (ticks >> 8) + salt;
+  slowcycle16 += sin8( slowcycle16);
+  slowcycle16 =  (slowcycle16 * 2053) + 1384;
+  uint8_t slowcycle8 = (slowcycle16 & 0xFF) + (slowcycle16 >> 8);
+  
+  uint8_t bright = 0;
+  if( ((slowcycle8 & 0x0E)/2) < TWINKLE_DENSITY) {
+    bright = attackDecayWave8( fastcycle8);
+  }
+
+  uint8_t hue = slowcycle8 - salt;
+  CRGB c;
+  if( bright > 0) {
+    c = ColorFromPalette( gCurrentPalette, hue, bright, NOBLEND);
+    if( COOL_LIKE_INCANDESCENT == 1 ) {
+      coolLikeIncandescent( c, fastcycle8);
+    }
+  } else {
+    c = CRGB::Black;
+  }
+  return c;
+}
+
+
+// This function is like 'triwave8', which produces a 
+// symmetrical up-and-down triangle sawtooth waveform, except that this
+// function produces a triangle wave with a faster attack and a slower decay:
+//
+//     / \ 
+//    /     \ 
+//   /         \ 
+//  /             \ 
+//
+
+uint8_t attackDecayWave8( uint8_t i)
+{
+  if( i < 86) {
+    return i * 3;
+  } else {
+    i -= 86;
+    return 255 - (i + (i/2));
+  }
+}
+
+// This function takes a pixel, and if its in the 'fading down'
+// part of the cycle, it adjusts the color a little bit like the 
+// way that incandescent bulbs fade toward 'red' as they dim.
+void coolLikeIncandescent( CRGB& c, uint8_t phase)
+{
+  if( phase < 128) return;
+
+  uint8_t cooling = (phase - 128) >> 4;
+  c.g = qsub8( c.g, cooling);
+  c.b = qsub8( c.b, cooling * 2);
+}
+
+// A mostly red palette with green accents and white trim.
+// "CRGB::Gray" is used as white to keep the brightness more uniform.
+const TProgmemRGBPalette16 RedGreenWhite_p FL_PROGMEM =
+{  CRGB::Red, CRGB::Red, CRGB::Red, CRGB::Red, 
+   CRGB::Red, CRGB::Red, CRGB::Red, CRGB::Red, 
+   CRGB::Red, CRGB::Red, CRGB::Gray, CRGB::Gray, 
+   CRGB::Green, CRGB::Green, CRGB::Green, CRGB::Green };
+
+// A mostly (dark) green palette with red berries.
+#define Holly_Green 0x00580c
+#define Holly_Red   0xB00402
+const TProgmemRGBPalette16 Holly_p FL_PROGMEM =
+{  Holly_Green, Holly_Green, Holly_Green, Holly_Green, 
+   Holly_Green, Holly_Green, Holly_Green, Holly_Green, 
+   Holly_Green, Holly_Green, Holly_Green, Holly_Green, 
+   Holly_Green, Holly_Green, Holly_Green, Holly_Red 
+};
+
+// A red and white striped palette
+// "CRGB::Gray" is used as white to keep the brightness more uniform.
+const TProgmemRGBPalette16 RedWhite_p FL_PROGMEM =
+{  CRGB::Red,  CRGB::Red,  CRGB::Red,  CRGB::Red, 
+   CRGB::Gray, CRGB::Gray, CRGB::Gray, CRGB::Gray,
+   CRGB::Red,  CRGB::Red,  CRGB::Red,  CRGB::Red, 
+   CRGB::Gray, CRGB::Gray, CRGB::Gray, CRGB::Gray };
+
+// A mostly blue palette with white accents.
+// "CRGB::Gray" is used as white to keep the brightness more uniform.
+const TProgmemRGBPalette16 BlueWhite_p FL_PROGMEM =
+{  CRGB::Blue, CRGB::Blue, CRGB::Blue, CRGB::Blue, 
+   CRGB::Blue, CRGB::Blue, CRGB::Blue, CRGB::Blue, 
+   CRGB::Blue, CRGB::Blue, CRGB::Blue, CRGB::Blue, 
+   CRGB::Blue, CRGB::Gray, CRGB::Gray, CRGB::Gray };
+
+// A pure "fairy light" palette with some brightness variations
+#define HALFFAIRY ((CRGB::FairyLight & 0xFEFEFE) / 2)
+#define QUARTERFAIRY ((CRGB::FairyLight & 0xFCFCFC) / 4)
+const TProgmemRGBPalette16 FairyLight_p FL_PROGMEM =
+{  CRGB::FairyLight, CRGB::FairyLight, CRGB::FairyLight, CRGB::FairyLight, 
+   HALFFAIRY,        HALFFAIRY,        CRGB::FairyLight, CRGB::FairyLight, 
+   QUARTERFAIRY,     QUARTERFAIRY,     CRGB::FairyLight, CRGB::FairyLight, 
+   CRGB::FairyLight, CRGB::FairyLight, CRGB::FairyLight, CRGB::FairyLight };
+
+// A palette of soft snowflakes with the occasional bright one
+const TProgmemRGBPalette16 Snow_p FL_PROGMEM =
+{  0x304048, 0x304048, 0x304048, 0x304048,
+   0x304048, 0x304048, 0x304048, 0x304048,
+   0x304048, 0x304048, 0x304048, 0x304048,
+   0x304048, 0x304048, 0x304048, 0xE0F0FF };
+
+// A palette reminiscent of large 'old-school' C9-size tree lights
+// in the five classic colors: red, orange, green, blue, and white.
+#define C9_Red    0xB80400
+#define C9_Orange 0x902C02
+#define C9_Green  0x046002
+#define C9_Blue   0x070758
+#define C9_White  0x606820
+const TProgmemRGBPalette16 RetroC9_p FL_PROGMEM =
+{  C9_Red,    C9_Orange, C9_Red,    C9_Orange,
+   C9_Orange, C9_Red,    C9_Orange, C9_Red,
+   C9_Green,  C9_Green,  C9_Green,  C9_Green,
+   C9_Blue,   C9_Blue,   C9_Blue,
+   C9_White
+};
+
+// A cold, icy pale blue palette
+#define Ice_Blue1 0x0C1040
+#define Ice_Blue2 0x182080
+#define Ice_Blue3 0x5080C0
+const TProgmemRGBPalette16 Ice_p FL_PROGMEM =
+{
+  Ice_Blue1, Ice_Blue1, Ice_Blue1, Ice_Blue1,
+  Ice_Blue1, Ice_Blue1, Ice_Blue1, Ice_Blue1,
+  Ice_Blue1, Ice_Blue1, Ice_Blue1, Ice_Blue1,
+  Ice_Blue2, Ice_Blue2, Ice_Blue2, Ice_Blue3
+};
+
+
+// Add or remove palette names from this list to control which color
+// palettes are used, and in what order.
+const TProgmemRGBPalette16* ActivePaletteList[] = {
+  &RetroC9_p,
+  &BlueWhite_p,
+  &RainbowColors_p,
+  &FairyLight_p,
+  &RedGreenWhite_p,
+  &PartyColors_p,
+  &RedWhite_p,
+  &Snow_p,
+  &Holly_p,
+  &Ice_p  
+};
+
+
+// Advance to the next color palette in the list (above).
+void chooseNextColorPalette( CRGBPalette16& pal)
+{
+  const uint8_t numberOfPalettes = sizeof(ActivePaletteList) / sizeof(ActivePaletteList[0]);
+  static uint8_t whichPalette = -1; 
+  whichPalette = addmod8( whichPalette, 1, numberOfPalettes);
+
+  pal = *(ActivePaletteList[whichPalette]);
+}

From fefc5f563a33963b9998f0475ad050813b30adcb Mon Sep 17 00:00:00 2001
From: Mark Kriegsman <1334634+kriegsman@users.noreply.github.com>
Date: Sun, 2 Feb 2020 23:25:05 -0500
Subject: [PATCH 120/204] Rev. 3.3.3 to improve board support, add example
 animations, fix bugs, and to pass the torch.

---
 FastLED.h          |  4 ++--
 library.json       | 17 ++++++++++++++++-
 library.properties |  2 +-
 release_notes.md   | 10 ++++++++++
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/FastLED.h b/FastLED.h
index 05e8530ca9..d7b6375a86 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -11,9 +11,9 @@
 #define FASTLED_VERSION 3003002
 #ifndef FASTLED_INTERNAL
 #  ifdef FASTLED_HAS_PRAGMA_MESSAGE
-#    pragma message "FastLED version 3.003.002"
+#    pragma message "FastLED version 3.003.003"
 #  else
-#    warning FastLED version 3.003.002  (Not really a warning, just telling you here.)
+#    warning FastLED version 3.003.003  (Not really a warning, just telling you here.)
 #  endif
 #endif
 
diff --git a/library.json b/library.json
index c7075be799..bcb24070f2 100644
--- a/library.json
+++ b/library.json
@@ -12,13 +12,28 @@
             "name": "Mark Kriegsman",
             "url": "https://github.com/kriegsman",
             "maintainer": true
+        },
+        {
+            "name": "Sam Guyer",
+            "url": "https://github.com/samguyer",
+            "maintainer": true
+        },
+        {
+            "name": "Jason Coon",
+            "url": "https://github.com/jasoncoon",
+            "maintainer": true
+        },
+        {
+            "name": "Josh Huber",
+            "url": "https://github.com/uberjay",
+            "maintainer": true
         }
     ],
     "repository": {
         "type": "git",
         "url": "https://github.com/FastLED/FastLED.git"
     },
-    "version": "3.3.2",
+    "version": "3.3.3",
     "license": "MIT",
     "homepage": "http://fastled.io",
     "frameworks": "arduino",
diff --git a/library.properties b/library.properties
index 1cb2d08a96..2ebea658bb 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FastLED
-version=3.3.2
+version=3.3.3
 author=Daniel Garcia
 maintainer=Daniel Garcia <dgarcia@fastled.io>
 sentence=Multi-platform library for controlling dozens of different types of LEDs along with optimized math, effect, and noise functions.
diff --git a/release_notes.md b/release_notes.md
index cf9ce1a365..9c38eedf76 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,3 +1,13 @@
+FastLED 3.3.3
+=============
+
+* Improved support for ESP32, Teensy4, ATmega16, nRF52, and ARM STM32.  
+* Added animation examples: "TwinkleFox" holiday lights, "Pride2015" moving rainbows, and "Pacifica" gentle ocean waves 
+* Fixed a few bugs including a rare divide-by-zero crash
+* Cleaned up code and examples a bit
+* Said our sad farwells to FastLED founder Daniel Garcia, who we lost in a tragic accident on September 2nd, 2019.  Dan's beautiful code and warm kindness have been at the heart of the library, and our community, for ten years.  FastLED will continue with help from all across the FastLED world, and Dan's spirit will be with us whenever the lights shine and glow.  Thank you, Dan, for everything.
+
+
 FastLED 3.3.2
 =============
 

From 49580d5c8d2bc280a58b6eea10949a5ff13e36b1 Mon Sep 17 00:00:00 2001
From: Ethan Johnston <rocketstrong600@gmail.com>
Date: Tue, 4 Feb 2020 01:36:57 +1000
Subject: [PATCH 121/204] fix for older gcc versions

fixes clockless_trinket.h:326:24: error: expected primary-expression before '__attribute__' on older gcc versions
added endif
---
 platforms/avr/clockless_trinket.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index 312e9a880f..8b7b88b98e 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -323,7 +323,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define DUSE (0xFF - (DADVANCE-1))
 
 // Silence compiler warnings about switch/case that is explicitly intended to fall through.
-#define FL_FALLTHROUGH __attribute__ ((fallthrough));
+#if defined(__GNUC__) && __GNUC__ >= 7
+ #define FL_FALLTHROUGH __attribute__ ((fallthrough));
+#else
+ #define FL_FALLTHROUGH ((void)0);
+#endif /* __GNUC__ >= 7 */
 
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
 	// gcc will use register Y for the this pointer.

From 6da0636e19ef310064959492b1b2e6ebf39e0132 Mon Sep 17 00:00:00 2001
From: Rory Hayes <rorosaurus@gmail.com>
Date: Thu, 6 Feb 2020 15:57:09 -0800
Subject: [PATCH 122/204] clarify comment for FASTLED_RMT_BUILTIN_DRIVER

it took me a second to realize there needs to be a value for this define or compilation fails. updated the comment to clarify for future visitors :)
---
 platforms/esp/32/clockless_rmt_esp32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index de5b7c9840..7c4a6e487e 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -49,7 +49,7 @@
  * co-exist. To switch to this mode, add the following directive
  * before you include FastLED.h:
  *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
+ *      #define FASTLED_RMT_BUILTIN_DRIVER 1
  *
  * There may be a performance penalty for using this mode. We need to
  * compute the RMT signal for the entire LED strip ahead of time,

From 3fb73d4ddf4d20e16e013cd931189a30e9c55b4a Mon Sep 17 00:00:00 2001
From: jackw01 <jackw01@users.noreply.github.com>
Date: Thu, 6 Feb 2020 19:04:52 -0800
Subject: [PATCH 123/204] overload array subscript operator for CHSV structs
 like it is for CRGB

---
 pixeltypes.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/pixeltypes.h b/pixeltypes.h
index ff327fd9a8..f4e57061e5 100644
--- a/pixeltypes.h
+++ b/pixeltypes.h
@@ -38,6 +38,18 @@ struct CHSV {
 		uint8_t raw[3];
 	};
 
+    /// Array access operator to index into the chsv object
+	inline uint8_t& operator[] (uint8_t x) __attribute__((always_inline))
+    {
+        return raw[x];
+    }
+
+    /// Array access operator to index into the chsv object
+    inline const uint8_t& operator[] (uint8_t x) const __attribute__((always_inline))
+    {
+        return raw[x];
+    }
+
     /// default values are UNITIALIZED
     inline CHSV() __attribute__((always_inline))
     {
@@ -106,7 +118,7 @@ struct CRGB {
 		uint8_t raw[3];
 	};
 
-  /// Array access operator to index into the crgb object
+    /// Array access operator to index into the crgb object
 	inline uint8_t& operator[] (uint8_t x) __attribute__((always_inline))
     {
         return raw[x];
@@ -478,7 +490,7 @@ struct CRGB {
         uint8_t max = red;
         if( green > max) max = green;
         if( blue > max) max = blue;
-        
+
         // stop div/0 when color is black
         if(max > 0) {
             uint16_t factor = ((uint16_t)(limit) * 256) / max;

From 264b444fc065ea4bd9f28abe3e1868df403c31d8 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 8 Feb 2020 23:22:09 -0500
Subject: [PATCH 124/204] Two changes to improve stability. First, added a min
 wait that forces calls to show() to be at least 50 microseconds apart.
 Second, added optional calls to ESP-IDF functions that force flash operations
 to wait until show() is complete.

---
 platforms/esp/32/clockless_rmt_esp32.h | 28 ++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index de5b7c9840..bc18589e00 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -58,6 +58,13 @@
  * represented by a 32-bit pulse specification, so it is a 32X blow-up
  * in memory use.
  *
+ * NEW: Use of Flash memory on the ESP32 can interfere with the timing
+ *      of pixel output. The ESP-IDF system code disables all other
+ *      code running on *either* core during these operation. To prevent
+ *      this from happening, define this flag. It will force flash
+ *      operations to wait until the show() is done.
+ *
+ * #define FASTLED_ESP32_FLASH_LOCK
  *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
@@ -101,6 +108,9 @@ extern "C" {
 
 #include "esp_log.h"
 
+extern void spi_flash_op_lock(void);
+extern void spi_flash_op_unlock(void);
+
 #ifdef __cplusplus
 }
 #endif
@@ -200,6 +210,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     rmt_item32_t * mBuffer;
     uint16_t       mBufferSize;
 
+    // -- Make sure we can't call show() too quickly
+    CMinWait<50>   mWait;
+
 public:
 
     void init()
@@ -294,6 +307,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 initRMT();
             }
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
+
+#ifdef FASTLED_ESP32_FLASH_LOCK
+            // -- Make sure no flash operations happen right now
+            spi_flash_op_lock();
+#endif
         }
 
         if (FASTLED_RMT_BUILTIN_DRIVER)
@@ -321,6 +339,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 channel++;
             }
 
+            // -- Make sure it's been at least 50ms since last show
+            mWait.wait();
+
             // -- Start them all
             for (int i = 0; i < channel; i++) {
                 ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
@@ -333,10 +354,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
             xSemaphoreGive(gTX_sem);
 
+            mWait.mark();
+
             // -- Reset the counters
             gNumStarted = 0;
             gNumDone = 0;
             gNext = 0;
+
+#ifdef FASTLED_ESP32_FLASH_LOCK
+            // -- Release the lock on flash operations
+            spi_flash_op_unlock();
+#endif
         }
     }
 

From 5387b844f44b9d0554b5c374f974f75b33a23eb8 Mon Sep 17 00:00:00 2001
From: Ethan Johnston <rocketstrong600@gmail.com>
Date: Mon, 10 Feb 2020 08:29:38 +1000
Subject: [PATCH 125/204] Update clockless_trinket.h

---
 platforms/avr/clockless_trinket.h | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index 8b7b88b98e..7384a03c7c 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -323,14 +323,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define DUSE (0xFF - (DADVANCE-1))
 
 // Silence compiler warnings about switch/case that is explicitly intended to fall through.
-#if defined(__GNUC__) && __GNUC__ >= 7
- #define FL_FALLTHROUGH __attribute__ ((fallthrough));
-#else
- #define FL_FALLTHROUGH ((void)0);
-#endif /* __GNUC__ >= 7 */
+//#define FL_FALLTHROUGH __attribute__ ((fallthrough));
 
-	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-	// gcc will use register Y for the this pointer.
+// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+// gcc will use register Y for the this pointer.
 	static void /*__attribute__((optimize("O0")))*/  /*__attribute__ ((always_inline))*/  showRGBInternal(PixelController<RGB_ORDER> & pixels)  {
 		uint8_t *data = (uint8_t*)pixels.mData;
 		data_ptr_t port = FastPin<DATA_PIN>::port();
@@ -410,9 +406,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC14(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 				MOV_ADDDE14(b0,b1,d1,e1) _D2(4) LO1 _D3(0)
@@ -426,9 +422,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC24(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 
@@ -445,9 +441,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC04(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 				MOV_ADDDE04(b0,b1,d0,e0) _D2(4) LO1 _D3(5)

From 63b254ea0eaabc4aa8926acdb6dd6cac32b98e43 Mon Sep 17 00:00:00 2001
From: Nick Pisarro <infinityminusnine@gmail.com>
Date: Thu, 31 Oct 2019 11:40:32 -0700
Subject: [PATCH 126/204] Condition on attiny flag

---
 lib8tion/random8.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib8tion/random8.h b/lib8tion/random8.h
index ab9ac27e52..d834abd51d 100644
--- a/lib8tion/random8.h
+++ b/lib8tion/random8.h
@@ -12,8 +12,11 @@
 #define FASTLED_RAND16_2053  ((uint16_t)(2053))
 #define FASTLED_RAND16_13849 ((uint16_t)(13849))
 
-// equivalent to x * 2053
+#if defined(LIB8_ATTINY)
 #define APPLY_FASTLED_RAND16_2053(x) (x << 11) + (x << 2) + x
+#else
+#define APPLY_FASTLED_RAND16_2053(x) (x * FASTLED_RAND16_2053)
+#endif
 
 /// random number seed
 extern uint16_t rand16seed;// = RAND16_SEED;

From c12ba9e2c7c2aa073ed86e8dc2ed2795ced21fcb Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 10 Feb 2020 18:53:04 -0500
Subject: [PATCH 127/204] Changes the #define to have a value (mek suggestion)

---
 platforms/esp/32/clockless_rmt_esp32.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index bc18589e00..b8cd1f42dd 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -64,7 +64,7 @@
  *      this from happening, define this flag. It will force flash
  *      operations to wait until the show() is done.
  *
- * #define FASTLED_ESP32_FLASH_LOCK
+ * #define FASTLED_ESP32_FLASH_LOCK 1
  *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
@@ -308,7 +308,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             }
             xSemaphoreTake(gTX_sem, portMAX_DELAY);
 
-#ifdef FASTLED_ESP32_FLASH_LOCK
+#if FASTLED_ESP32_FLASH_LOCK == 1
             // -- Make sure no flash operations happen right now
             spi_flash_op_lock();
 #endif
@@ -361,7 +361,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             gNumDone = 0;
             gNext = 0;
 
-#ifdef FASTLED_ESP32_FLASH_LOCK
+#if FASTLED_ESP32_FLASH_LOCK == 1
             // -- Release the lock on flash operations
             spi_flash_op_unlock();
 #endif

From c52b4db163e75c3dcd001d8b301529d6e6709136 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 5 Mar 2020 19:18:02 -0500
Subject: [PATCH 128/204] Added an enforced minimum wait of 50 microseconds
 between calls to show

---
 platforms/esp/32/clockless_i2s_esp32.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
index a4d15ba750..263062d449 100644
--- a/platforms/esp/32/clockless_i2s_esp32.h
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -199,7 +199,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- Save the pixel controller
     PixelController<RGB_ORDER> * mPixels;
     
-public:
+    // -- Make sure we can't call show() too quickly
+    CMinWait<50>   mWait;
+
+ public:
 
     void init()
     {
@@ -574,6 +577,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             fillBuffer();
             fillBuffer();
             
+            // -- Make sure it's been at least 50ms since last show
+            mWait.wait();
+
             i2sStart();
             
             // -- Wait here while the rest of the data is sent. The interrupt handler
@@ -584,6 +590,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             
             i2sStop();
             
+            mWait.mark();
+
             // -- Reset the counters
             gNumStarted = 0;
         }

From 3c9c3463c11b364af2b17304ebe165a0dc38d73f Mon Sep 17 00:00:00 2001
From: Marc MERLIN <marc_soft@merlins.org>
Date: Tue, 24 Mar 2020 10:56:13 -0700
Subject: [PATCH 129/204] Fix GCC warnings on memmove8.

---
 colorutils.h | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/colorutils.h b/colorutils.h
index 4fcf394047..92868434a9 100644
--- a/colorutils.h
+++ b/colorutils.h
@@ -452,11 +452,11 @@ class CHSVPalette16 {
 
     CHSVPalette16( const CHSVPalette16& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
     }
     CHSVPalette16& operator=( const CHSVPalette16& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
         return *this;
     }
 
@@ -555,11 +555,11 @@ class CHSVPalette256 {
 
     CHSVPalette256( const CHSVPalette256& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
     }
     CHSVPalette256& operator=( const CHSVPalette256& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
         return *this;
     }
 
@@ -660,20 +660,20 @@ class CRGBPalette16 {
 
     CRGBPalette16( const CRGBPalette16& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
     }
     CRGBPalette16( const CRGB rhs[16])
     {
-        memmove8( &(entries[0]), &(rhs[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs[0]), sizeof( entries));
     }
     CRGBPalette16& operator=( const CRGBPalette16& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
         return *this;
     }
     CRGBPalette16& operator=( const CRGB rhs[16])
     {
-        memmove8( &(entries[0]), &(rhs[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs[0]), sizeof( entries));
         return *this;
     }
 
@@ -928,11 +928,11 @@ class CHSVPalette32 {
     
     CHSVPalette32( const CHSVPalette32& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
     }
     CHSVPalette32& operator=( const CHSVPalette32& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
         return *this;
     }
     
@@ -1034,20 +1034,20 @@ class CRGBPalette32 {
     
     CRGBPalette32( const CRGBPalette32& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
     }
     CRGBPalette32( const CRGB rhs[32])
     {
-        memmove8( &(entries[0]), &(rhs[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs[0]), sizeof( entries));
     }
     CRGBPalette32& operator=( const CRGBPalette32& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
         return *this;
     }
     CRGBPalette32& operator=( const CRGB rhs[32])
     {
-        memmove8( &(entries[0]), &(rhs[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs[0]), sizeof( entries));
         return *this;
     }
     
@@ -1322,20 +1322,20 @@ class CRGBPalette256 {
 
     CRGBPalette256( const CRGBPalette256& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
     }
     CRGBPalette256( const CRGB rhs[256])
     {
-        memmove8( &(entries[0]), &(rhs[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs[0]), sizeof( entries));
     }
     CRGBPalette256& operator=( const CRGBPalette256& rhs)
     {
-        memmove8( &(entries[0]), &(rhs.entries[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs.entries[0]), sizeof( entries));
         return *this;
     }
     CRGBPalette256& operator=( const CRGB rhs[256])
     {
-        memmove8( &(entries[0]), &(rhs[0]), sizeof( entries));
+        memmove8( (void *) &(entries[0]), &(rhs[0]), sizeof( entries));
         return *this;
     }
 

From 15b30d55307a02302c93d3163da13cd54d08c2f0 Mon Sep 17 00:00:00 2001
From: Uri Shaked <uri@urishaked.com>
Date: Wed, 25 Mar 2020 21:27:57 +0200
Subject: [PATCH 130/204] Fix small typo in comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

statemetns  → statements
---
 platforms/avr/clockless_trinket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index 7384a03c7c..824553feff 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -167,7 +167,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	}
 #define USE_ASM_MACROS
 
-// The variables that our various asm statemetns use.  The same block of variables needs to be declared for
+// The variables that our various asm statements use.  The same block of variables needs to be declared for
 // all the asm blocks because GCC is pretty stupid and it would clobber variables happily or optimize code away too aggressively
 #define ASM_VARS : /* write variables */				\
 				[count] "+x" (count),					\

From 1550a942ba69272e10e4ca56fd51dd9f074e1671 Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Fri, 27 Mar 2020 23:03:22 +0100
Subject: [PATCH 131/204] use default copy constructor and copy assignments for
 CRGB and CHSV

---
 pixeltypes.h | 31 ++++---------------------------
 1 file changed, 4 insertions(+), 27 deletions(-)

diff --git a/pixeltypes.h b/pixeltypes.h
index f4e57061e5..4abba01ca2 100644
--- a/pixeltypes.h
+++ b/pixeltypes.h
@@ -62,20 +62,9 @@ struct CHSV {
     }
 
     /// allow copy construction
-    inline CHSV(const CHSV& rhs) __attribute__((always_inline))
-    {
-        h = rhs.h;
-        s = rhs.s;
-        v = rhs.v;
-    }
+    inline CHSV(const CHSV& rhs) __attribute__((always_inline)) = default;
 
-    inline CHSV& operator= (const CHSV& rhs) __attribute__((always_inline))
-    {
-        h = rhs.h;
-        s = rhs.s;
-        v = rhs.v;
-        return *this;
-    }
+    inline CHSV& operator= (const CHSV& rhs) __attribute__((always_inline)) = default;
 
     inline CHSV& setHSV(uint8_t ih, uint8_t is, uint8_t iv) __attribute__((always_inline))
     {
@@ -162,13 +151,7 @@ struct CRGB {
     }
 
     /// allow copy construction
-	inline CRGB(const CRGB& rhs) __attribute__((always_inline))
-    {
-        r = rhs.r;
-        g = rhs.g;
-        b = rhs.b;
-    }
-
+	inline CRGB(const CRGB& rhs) __attribute__((always_inline)) = default;
     /// allow construction from HSV color
 	inline CRGB(const CHSV& rhs) __attribute__((always_inline))
     {
@@ -176,13 +159,7 @@ struct CRGB {
     }
 
     /// allow assignment from one RGB struct to another
-	inline CRGB& operator= (const CRGB& rhs) __attribute__((always_inline))
-    {
-        r = rhs.r;
-        g = rhs.g;
-        b = rhs.b;
-        return *this;
-    }
+	inline CRGB& operator= (const CRGB& rhs) __attribute__((always_inline)) = default;
 
     /// allow assignment from 32-bit (really 24-bit) 0xRRGGBB color code
 	inline CRGB& operator= (const uint32_t colorcode) __attribute__((always_inline))

From 91311f510b3ed0bcaf5b50f4ee564120f57a74e4 Mon Sep 17 00:00:00 2001
From: 5kft <5kft@users.noreply.github.com>
Date: Wed, 1 Apr 2020 18:35:36 -0700
Subject: [PATCH 132/204] added platform support for the Seeeduino XIAO board

---
 platforms/arm/d21/fastpin_arm_d21.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index fac0185806..cbfe5dbb2f 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -97,6 +97,17 @@ _FL_DEFPIN(17,  4, 0);   _FL_DEFPIN(18,  5, 0);   _FL_DEFPIN(19, 6, 0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+#elif defined(SEEED_XIAO_M0)
+
+#define MAX_PIN 10
+_FL_DEFPIN( 0, 2,0); _FL_DEFPIN( 1, 4,0); _FL_DEFPIN( 2,10,0); _FL_DEFPIN( 3,11,0);
+_FL_DEFPIN( 4, 8,0); _FL_DEFPIN( 5, 9,0); _FL_DEFPIN( 6, 8,1); _FL_DEFPIN( 7, 9,1);
+_FL_DEFPIN( 8, 7,0); _FL_DEFPIN( 9, 5,0); _FL_DEFPIN(10, 6,0);
+
+#define SPI_DATA 9
+#define SPI_CLOCK 8
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_SAMD_ZERO)
 

From bc3c6cdb5998b33a8360580c83e41ee037dae883 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Tue, 7 Apr 2020 10:22:48 +0100
Subject: [PATCH 133/204] Initial commit. Bit banging appears to be working!

---
 led_sysdefs.h                           |   3 +
 lib8tion.h                              |  38 +++++-
 platforms.h                             |   2 +
 platforms/apollo3/clockless_apollo3.h   |  14 +++
 platforms/apollo3/fastled_apollo3.h     |   8 ++
 platforms/apollo3/fastpin_apollo3.h     | 151 ++++++++++++++++++++++++
 platforms/apollo3/fastspi_apollo3.h     |  18 +++
 platforms/apollo3/led_sysdefs_apollo3.h |  39 ++++++
 8 files changed, 270 insertions(+), 3 deletions(-)
 create mode 100644 platforms/apollo3/clockless_apollo3.h
 create mode 100644 platforms/apollo3/fastled_apollo3.h
 create mode 100644 platforms/apollo3/fastpin_apollo3.h
 create mode 100644 platforms/apollo3/fastspi_apollo3.h
 create mode 100644 platforms/apollo3/led_sysdefs_apollo3.h

diff --git a/led_sysdefs.h b/led_sysdefs.h
index 04afef9a97..1301a1a034 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -35,6 +35,9 @@
 #elif defined(__AVR__)
 // AVR platforms
 #include "platforms/avr/led_sysdefs_avr.h"
+#elif defined(ARDUINO_ARCH_APOLLO3)
+// Apollo3 platforms (e.g. the Ambiq Micro Apollo3 Blue as used by the SparkFun Artemis platforms)
+#include "platforms/apollo3/led_sysdefs_apollo3.h"
 #else
 //
 // We got here because we don't recognize the platform that you're
diff --git a/lib8tion.h b/lib8tion.h
index 24c5d0abae..62db2b1d3e 100644
--- a/lib8tion.h
+++ b/lib8tion.h
@@ -181,7 +181,7 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #if !defined(__AVR__)
 #include <string.h>
 // for memmove, memcpy, and memset if not defined here
-#endif
+#endif // end of !defined(__AVR__)
 
 #if defined(__arm__)
 
@@ -195,8 +195,31 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 // Generic ARM
 #define QADD8_C 1
 #define QADD7_C 1
-#endif
+#endif // end of defined(FASTLED_TEENSY3)
+
+#define QSUB8_C 1
+#define SCALE8_C 1
+#define SCALE16BY8_C 1
+#define SCALE16_C 1
+#define ABS8_C 1
+#define MUL8_C 1
+#define QMUL8_C 1
+#define ADD8_C 1
+#define SUB8_C 1
+#define EASE8_C 1
+#define AVG8_C 1
+#define AVG7_C 1
+#define AVG16_C 1
+#define AVG15_C 1
+#define BLEND8_C 1
 
+// end of #if defined(__arm__)
+
+#elif defined(ARDUINO_ARCH_APOLLO3)
+
+// Default to using the standard C functions for now
+#define QADD8_C 1
+#define QADD7_C 1
 #define QSUB8_C 1
 #define SCALE8_C 1
 #define SCALE16BY8_C 1
@@ -213,6 +236,7 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define AVG15_C 1
 #define BLEND8_C 1
 
+// end of #elif defined(ARDUINO_ARCH_APOLLO3)
 
 #elif defined(__AVR__)
 
@@ -274,7 +298,9 @@ Lib8tion is pronounced like 'libation': lie-BAY-shun
 #define QMUL8_AVRASM 0
 #define EASE8_AVRASM 0
 #define BLEND8_AVRASM 0
-#endif
+#endif // end of !defined(LIB8_ATTINY)
+
+// end of #elif defined(__AVR__)
 
 #else
 
@@ -811,6 +837,9 @@ template<class T, int F, int I> class q {
 #ifdef FASTLED_ARM
   int operator*(int v) { return (v*i) + ((v*f)>>F); }
 #endif
+#ifdef FASTLED_APOLLO3
+  int operator*(int v) { return (v*i) + ((v*f)>>F); }
+#endif
 };
 
 template<class T, int F, int I> static uint32_t operator*(uint32_t v, q<T,F,I> & q) { return q * v; }
@@ -820,6 +849,9 @@ template<class T, int F, int I> static int16_t operator*(int16_t v, q<T,F,I> & q
 #ifdef FASTLED_ARM
 template<class T, int F, int I> static int operator*(int v, q<T,F,I> & q) { return q * v; }
 #endif
+#ifdef FASTLED_APOLLO3
+template<class T, int F, int I> static int operator*(int v, q<T,F,I> & q) { return q * v; }
+#endif
 
 /// A 4.4 integer (4 bits integer, 4 bits fraction)
 typedef q<uint8_t, 4,4> q44;
diff --git a/platforms.h b/platforms.h
index f66599fd3f..7969c9e4de 100644
--- a/platforms.h
+++ b/platforms.h
@@ -34,6 +34,8 @@
 #include "platforms/esp/8266/fastled_esp8266.h"
 #elif defined(ESP32)
 #include "platforms/esp/32/fastled_esp32.h"
+#elif defined(ARDUINO_ARCH_APOLLO3)
+#include "platforms/apollo3/fastled_apollo3.h"
 #else
 // AVR platforms
 #include "platforms/avr/fastled_avr.h"
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
new file mode 100644
index 0000000000..5ed0e27d60
--- /dev/null
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -0,0 +1,14 @@
+#ifndef __INC_CLOCKLESS_APOLLO3_H
+#define __INC_CLOCKLESS_APOLLO3_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_APOLLO3)
+
+
+
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/apollo3/fastled_apollo3.h b/platforms/apollo3/fastled_apollo3.h
new file mode 100644
index 0000000000..47f68ed459
--- /dev/null
+++ b/platforms/apollo3/fastled_apollo3.h
@@ -0,0 +1,8 @@
+#ifndef __INC_FASTLED_APOLLO3_H
+#define __INC_FASTLED_APOLLO3_H
+
+#include "fastpin_apollo3.h"
+//#include "fastspi_apollo3.h"
+//#include "clockless_apollo3.h"
+
+#endif
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
new file mode 100644
index 0000000000..712786c675
--- /dev/null
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -0,0 +1,151 @@
+#ifndef __INC_FASTPIN_APOLLO3_H
+#define __INC_FASTPIN_APOLLO3_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_FORCE_SOFTWARE_PINS)
+#warning "Software pin support forced, pin access will be slightly slower."
+#define NO_HARDWARE_PIN_SUPPORT
+#undef HAS_HARDWARE_PIN_SUPPORT
+
+#else
+
+template<uint8_t PIN> class _APOLLO3PIN {
+
+public:
+  typedef volatile uint32_t * port_ptr_t;
+  typedef uint32_t port_t;
+
+  inline static void setOutput() { pinMode(PIN, OUTPUT); }
+  inline static void setInput() { pinMode(PIN, INPUT); }
+
+  inline static void hi() __attribute__ ((always_inline)) { digitalWrite(PIN, HIGH); }
+  inline static void lo() __attribute__ ((always_inline)) { digitalWrite(PIN, LOW); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { digitalWrite(PIN, val); }
+
+  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+
+  inline static void toggle() __attribute__ ((always_inline)) { if(digitalRead(PIN)) { lo(); } else { hi(); } }
+
+  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { digitalWrite(PIN, val); }
+
+  inline static port_t hival() __attribute__ ((always_inline)) { return 0; }
+  inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+  inline static port_ptr_t port() __attribute__ ((always_inline)) { return NULL; }
+  inline static port_t mask() __attribute__ ((always_inline)) { return 0; }
+};
+
+#define _FL_DEFPIN(PIN) template<> class FastPin<PIN> : public _APOLLO3PIN<PIN> {};
+
+// Actual pin definitions
+#if defined(ARDUINO_SFE_EDGE)
+
+#define MAX_PIN 50
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(3); _FL_DEFPIN(4);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(17);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
+_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
+_FL_DEFPIN(33);
+_FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
+_FL_DEFPIN(40); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
+_FL_DEFPIN(46); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_SFE_EDGE2)
+
+#define MAX_PIN 50
+_FL_DEFPIN(0);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(23);
+_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
+_FL_DEFPIN(31); _FL_DEFPIN(32); _FL_DEFPIN(33); _FL_DEFPIN(34);
+_FL_DEFPIN(35); _FL_DEFPIN(37); _FL_DEFPIN(39);
+_FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
+_FL_DEFPIN(45); _FL_DEFPIN(48); _FL_DEFPIN(49);
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS)
+
+#define MAX_PIN 32
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
+_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
+_FL_DEFPIN(30); _FL_DEFPIN(31);
+
+#define SPI_DATA  MOSI
+#define SPI_CLOCK SCK
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_NANO)
+
+#define MAX_PIN 24
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23);
+
+#define SPI_DATA  MOSI
+#define SPI_CLOCK SCK
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AM_AP3_SFE_THING_PLUS)
+
+#define MAX_PIN 29
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
+_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28);
+
+#define SPI_DATA  MOSI
+#define SPI_CLOCK SCK
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_ATP) || defined(ARDUINO_SFE_ARTEMIS)
+
+#define MAX_PIN 50 // AP3_VARIANT_NUM_PINS
+// Pin definitions taken from (e.g.) C:\Users\...\AppData\Local\Arduino15\packages\SparkFun\hardware\apollo3\1.0.30\variants\redboard_artemis_atp\config\variant.cpp
+// ap3_variant_pinmap maps pins to pads. FastLED expects pin numbers so we'll use those.
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
+_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
+_FL_DEFPIN(31); _FL_DEFPIN(32); _FL_DEFPIN(33); _FL_DEFPIN(34);
+_FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
+_FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
+_FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
+
+//#define SPI_DATA  MOSI
+//#define SPI_CLOCK SCK
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#else
+
+#error "Unrecognised APOLLO3 board!"
+
+#endif
+
+#endif // FASTLED_FORCE_SOFTWARE_PINS
+
+FASTLED_NAMESPACE_END
+
+#endif // __INC_FASTPIN_AVR_H
diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
new file mode 100644
index 0000000000..97b5b82536
--- /dev/null
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -0,0 +1,18 @@
+#ifndef __INC_FASTSPI_APOLLO3_H
+#define __INC_FASTSPI_APOLLO3_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(AVR_HARDWARE_SPI)
+
+
+#else
+	
+// #define FASTLED_FORCE_SOFTWARE_SPI
+
+#endif
+
+FASTLED_NAMESPACE_END;
+
+
+#endif
diff --git a/platforms/apollo3/led_sysdefs_apollo3.h b/platforms/apollo3/led_sysdefs_apollo3.h
new file mode 100644
index 0000000000..7b66f7786a
--- /dev/null
+++ b/platforms/apollo3/led_sysdefs_apollo3.h
@@ -0,0 +1,39 @@
+#ifndef __INC_LED_SYSDEFS_APOLLO3_H
+#define __INC_LED_SYSDEFS_APOLLO3_H
+
+#define FASTLED_APOLLO3
+
+#ifndef INTERRUPT_THRESHOLD
+#define INTERRUPT_THRESHOLD 1
+#endif
+
+// Default to allowing interrupts
+#ifndef FASTLED_ALLOW_INTERRUPTS
+#define FASTLED_ALLOW_INTERRUPTS 1
+#endif
+
+#if FASTLED_ALLOW_INTERRUPTS == 1
+#define FASTLED_ACCURATE_CLOCK
+#endif
+
+#ifndef F_CPU
+#define F_CPU 48000000
+#endif
+
+// Default to NOT using PROGMEM
+#ifndef FASTLED_USE_PROGMEM
+#define FASTLED_USE_PROGMEM 0
+#endif
+
+// data type defs
+typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+
+#define FASTLED_NO_PINMAP
+
+// reusing/abusing cli/sei defs for due
+// These should be fine for the Apollo3. It has its own defines in cmsis_gcc.h
+#define cli()  __disable_irq();  __disable_fault_irq();
+#define sei() __enable_irq();  __enable_fault_irq();
+
+#endif

From 4d77ee9464003327fb2c9bea0afebe3b8200cf1e Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Tue, 7 Apr 2020 11:24:04 +0100
Subject: [PATCH 134/204] Added fastgpio functionality.

---
 platforms/apollo3/fastpin_apollo3.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index 712786c675..e901d0e4ad 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -16,20 +16,20 @@ template<uint8_t PIN> class _APOLLO3PIN {
   typedef volatile uint32_t * port_ptr_t;
   typedef uint32_t port_t;
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); }
-  inline static void setInput() { pinMode(PIN, INPUT); }
+  inline static void setOutput() { pinMode(PIN, OUTPUT); am_hal_gpio_fastgpio_enable(PIN); }
+  inline static void setInput() { am_hal_gpio_fastgpio_disable(PIN); pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { digitalWrite(PIN, HIGH); }
-  inline static void lo() __attribute__ ((always_inline)) { digitalWrite(PIN, LOW); }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { digitalWrite(PIN, val); }
+  inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PIN); } // { digitalWrite(PIN, HIGH); }
+  inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PIN); } // { digitalWrite(PIN, LOW); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PIN); } else { am_hal_gpio_fastgpio_clr(PIN); } } // { digitalWrite(PIN, val); }
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if(digitalRead(PIN)) { lo(); } else { hi(); } }
+  inline static void toggle() __attribute__ ((always_inline)) { if( am_hal_gpio_fastgpio_read(PIN)) { lo(); } else { hi(); } }
 
   inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
   inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { digitalWrite(PIN, val); }
+  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { set(val); }
 
   inline static port_t hival() __attribute__ ((always_inline)) { return 0; }
   inline static port_t loval() __attribute__ ((always_inline)) { return 0; }

From 88200eb07c6aade620292637fb53955a93d661b4 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Tue, 7 Apr 2020 11:28:00 +0100
Subject: [PATCH 135/204] Removing clockless

---
 platforms/apollo3/clockless_apollo3.h | 14 --------------
 platforms/apollo3/fastled_apollo3.h   |  1 -
 2 files changed, 15 deletions(-)
 delete mode 100644 platforms/apollo3/clockless_apollo3.h

diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
deleted file mode 100644
index 5ed0e27d60..0000000000
--- a/platforms/apollo3/clockless_apollo3.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __INC_CLOCKLESS_APOLLO3_H
-#define __INC_CLOCKLESS_APOLLO3_H
-
-FASTLED_NAMESPACE_BEGIN
-
-#if defined(FASTLED_APOLLO3)
-
-
-
-#endif
-
-FASTLED_NAMESPACE_END
-
-#endif
diff --git a/platforms/apollo3/fastled_apollo3.h b/platforms/apollo3/fastled_apollo3.h
index 47f68ed459..9ed935cca9 100644
--- a/platforms/apollo3/fastled_apollo3.h
+++ b/platforms/apollo3/fastled_apollo3.h
@@ -3,6 +3,5 @@
 
 #include "fastpin_apollo3.h"
 //#include "fastspi_apollo3.h"
-//#include "clockless_apollo3.h"
 
 #endif

From 7972c889655ece77beb4fd7fd5dc2d36dd45b13f Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Tue, 7 Apr 2020 11:37:15 +0100
Subject: [PATCH 136/204] Revert "Removing clockless"

This reverts commit 88200eb07c6aade620292637fb53955a93d661b4.
---
 platforms/apollo3/clockless_apollo3.h | 14 ++++++++++++++
 platforms/apollo3/fastled_apollo3.h   |  1 +
 2 files changed, 15 insertions(+)
 create mode 100644 platforms/apollo3/clockless_apollo3.h

diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
new file mode 100644
index 0000000000..5ed0e27d60
--- /dev/null
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -0,0 +1,14 @@
+#ifndef __INC_CLOCKLESS_APOLLO3_H
+#define __INC_CLOCKLESS_APOLLO3_H
+
+FASTLED_NAMESPACE_BEGIN
+
+#if defined(FASTLED_APOLLO3)
+
+
+
+#endif
+
+FASTLED_NAMESPACE_END
+
+#endif
diff --git a/platforms/apollo3/fastled_apollo3.h b/platforms/apollo3/fastled_apollo3.h
index 9ed935cca9..47f68ed459 100644
--- a/platforms/apollo3/fastled_apollo3.h
+++ b/platforms/apollo3/fastled_apollo3.h
@@ -3,5 +3,6 @@
 
 #include "fastpin_apollo3.h"
 //#include "fastspi_apollo3.h"
+//#include "clockless_apollo3.h"
 
 #endif

From 02e3091d281dcc4a81458337c882915840016009 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Tue, 7 Apr 2020 13:16:40 +0100
Subject: [PATCH 137/204] FastGPIO SPI : work in progress

---
 fastspi.h                             |   7 +-
 platforms/apollo3/clockless_apollo3.h |  54 ++++++++++++
 platforms/apollo3/fastled_apollo3.h   |   4 +-
 platforms/apollo3/fastpin_apollo3.h   |   4 +-
 platforms/apollo3/fastspi_apollo3.h   | 113 ++++++++++++++++++++++++--
 5 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/fastspi.h b/fastspi.h
index 38e8eabf07..2bf5d6d0ff 100644
--- a/fastspi.h
+++ b/fastspi.h
@@ -51,7 +51,12 @@ class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDE
 
 #if defined(SPI_DATA) && defined(SPI_CLOCK)
 
-#if defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
+#if defined(FASTLED_APOLLO3)
+
+template<uint32_t SPI_SPEED>
+class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public APOLLO3HardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
+
+#elif defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
 
 template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 5ed0e27d60..7791056434 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -5,7 +5,61 @@ FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
+/*
 
+#define FASTLED_HAS_CLOCKLESS 1
+
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+class ClocklessController : public CPixelLEDController<RGB_ORDER> {
+  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+//  data_t mPinMask;
+//  data_ptr_t mPort;
+  CMinWait<WAIT_TIME> mWait;
+public:
+  virtual void init() {
+    FastPinBB<DATA_PIN>::setOutput();
+    mPinMask = FastPinBB<DATA_PIN>::mask();
+    mPort = FastPinBB<DATA_PIN>::port();
+  }
+
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+    mWait.wait();
+    cli();
+    if(!showRGBInternal(pixels)) {
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
+    }
+    sei();
+    mWait.mark();
+  }
+
+  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+  // gcc will use register Y for the this pointer.
+  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    struct M0ClocklessData data;
+    data.d[0] = pixels.d[0];
+    data.d[1] = pixels.d[1];
+    data.d[2] = pixels.d[2];
+    data.s[0] = pixels.mScale[0];
+    data.s[1] = pixels.mScale[1];
+    data.s[2] = pixels.mScale[2];
+    data.e[0] = pixels.e[0];
+    data.e[1] = pixels.e[1];
+    data.e[2] = pixels.e[2];
+    data.adj = pixels.mAdvance;
+
+    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
+    return showLedData<8,4,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+  }
+
+
+};
+
+*/
 
 #endif
 
diff --git a/platforms/apollo3/fastled_apollo3.h b/platforms/apollo3/fastled_apollo3.h
index 47f68ed459..4c727dd0c5 100644
--- a/platforms/apollo3/fastled_apollo3.h
+++ b/platforms/apollo3/fastled_apollo3.h
@@ -2,7 +2,7 @@
 #define __INC_FASTLED_APOLLO3_H
 
 #include "fastpin_apollo3.h"
-//#include "fastspi_apollo3.h"
-//#include "clockless_apollo3.h"
+#include "fastspi_apollo3.h"
+#include "clockless_apollo3.h"
 
 #endif
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index e901d0e4ad..63b4b7626e 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -133,8 +133,8 @@ _FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
 _FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
 _FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
 
-//#define SPI_DATA  MOSI
-//#define SPI_CLOCK SCK
+#define SPI_DATA  MOSI
+#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
index 97b5b82536..12ca9e0680 100644
--- a/platforms/apollo3/fastspi_apollo3.h
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -1,18 +1,119 @@
 #ifndef __INC_FASTSPI_APOLLO3_H
 #define __INC_FASTSPI_APOLLO3_H
 
+//#include "FastLED.h"
+
 FASTLED_NAMESPACE_BEGIN
 
-#if defined(AVR_HARDWARE_SPI)
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t SPI_CLOCK_DIVIDER>
+class APOLLO3HardwareSPIOutput {
+	Selectable *m_pSelect;
 
+public:
+	APOLLO3HardwareSPIOutput() { m_pSelect = NULL; }
+	APOLLO3HArdwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
 
-#else
-	
-// #define FASTLED_FORCE_SOFTWARE_SPI
+	// set the object representing the selectable
+	void setSelect(Selectable *pSelect) { /* TODO */ }
 
-#endif
+	// initialize the SPI subssytem
+	void init() {
+		pinMode(_DATA_PIN, OUTPUT); am_hal_gpio_fastgpio_enable(_DATA_PIN);
+		pinMode(_CLOCK_PIN, OUTPUT); am_hal_gpio_fastgpio_enable(_CLOCK_PIN);
+		//enableBurstMode(); //Optional. Go to 96MHz. Roughly doubles the speed of shiftOut and fastShiftOut
+		enableFastShift(_DATA_PIN, _CLOCK_PIN);
+	}
+
+	// latch the CS select
+	void inline select() { /* TODO */ }
+
+	// release the CS select
+	void inline release() { /* TODO */ }
+
+	// wait until all queued up data has been written
+	static void waitFully() { /* TODO */ }
+
+	// write a byte out via SPI (returns immediately on writing register)
+	static void writeByte(uint8_t b) {
+		fastShiftOut(_DATA_PIN, _CLOCK_PIN, MSBFIRST, b);
+	}
+
+	// write a word out via SPI (returns immediately on writing register)
+	static void writeWord(uint16_t w) {
+		writeByte((uint8_t)((w >> 8) && 0xff));
+		writeByte((uint8_t)(w && 0xff));
+	}
+
+	// A raw set of writing byte values, assumes setup/init/waiting done elsewhere
+	static void writeBytesValueRaw(uint8_t value, int len) {
+		while(len--) { writeByte(value); }
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytesValue(uint8_t value, int len) {
+		select(); writeBytesValueRaw(value, len); release();
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	template <class D> void writeBytes(register uint8_t *data, int len) {
+		uint8_t *end = data + len;
+		select();
+		// could be optimized to write 16bit words out instead of 8bit bytes
+		while(data != end) {
+			writeByte(D::adjust(*data++));
+		}
+		D::postBlock(len);
+		waitFully();
+		release();
+	}
+
+	// A full cycle of writing a value for len bytes, including select, release, and waiting
+	void writeBytes(register uint8_t *data, int len) { writeBytes<DATA_NOP>(data, len); }
+
+	// write a single bit out, which bit from the passed in byte is determined by template parameter
+	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+		waitFully();
+		if(b & (1 << BIT)) {
+			FastPin<_DATA_PIN>::hi();
+		} else {
+			FastPin<_DATA_PIN>::lo();
+		}
+
+		FastPin<_CLOCK_PIN>::hi();
+		FastPin<_CLOCK_PIN>::lo();
+	}
+
+	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
+	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
+	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+		select();
+
+		int len = pixels.mLen; // unused?
+
+		if(FLAGS & FLAG_START_BIT) {
+			while(pixels.has(1)) {
+				writeBit<0>(1);
+				writeByte(D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+				pixels.advanceData();
+				pixels.stepDithering();
+			}
+		} else {
+			while(pixels.has(1)) {
+				writeByte(D::adjust(pixels.loadAndScale0()));
+				writeByte(D::adjust(pixels.loadAndScale1()));
+				writeByte(D::adjust(pixels.loadAndScale2()));
+				pixels.advanceData();
+				pixels.stepDithering();
+			}
+		}
+		D::postBlock(len);
+		release();
+	}
 
-FASTLED_NAMESPACE_END;
+};
 
+FASTLED_NAMESPACE_END
 
 #endif

From 19db4dffcf3e15583053fb13d5fd0746368e5d04 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Tue, 7 Apr 2020 16:11:32 +0100
Subject: [PATCH 138/204] SPI now works but is currently no faster than bit
 banging...

---
 platforms/apollo3/fastpin_apollo3.h | 25 +++++++++------
 platforms/apollo3/fastspi_apollo3.h | 47 ++++++++++++++---------------
 2 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index 63b4b7626e..a489ff7802 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -82,9 +82,10 @@ _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
 _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
 _FL_DEFPIN(30); _FL_DEFPIN(31);
-
-#define SPI_DATA  MOSI
-#define SPI_CLOCK SCK
+//These two lines are commented out as dedicates SPI support using fastShiftOut produces
+//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
+//#define SPI_DATA MOSI
+//#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
@@ -97,8 +98,10 @@ _FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
 _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23);
 
-#define SPI_DATA  MOSI
-#define SPI_CLOCK SCK
+//These two lines are commented out as dedicates SPI support using fastShiftOut produces
+//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
+//#define SPI_DATA MOSI
+//#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
@@ -112,8 +115,10 @@ _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
 _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28);
 
-#define SPI_DATA  MOSI
-#define SPI_CLOCK SCK
+//These two lines are commented out as dedicates SPI support using fastShiftOut produces
+//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
+//#define SPI_DATA MOSI
+//#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
@@ -133,8 +138,10 @@ _FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
 _FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
 _FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
 
-#define SPI_DATA  MOSI
-#define SPI_CLOCK SCK
+//These two lines are commented out as dedicates SPI support using fastShiftOut produces
+//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
+//#define SPI_DATA MOSI
+//#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
index 12ca9e0680..a17a165d2e 100644
--- a/platforms/apollo3/fastspi_apollo3.h
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -1,7 +1,7 @@
 #ifndef __INC_FASTSPI_APOLLO3_H
 #define __INC_FASTSPI_APOLLO3_H
 
-//#include "FastLED.h"
+#include "FastLED.h"
 
 FASTLED_NAMESPACE_BEGIN
 
@@ -11,15 +11,13 @@ class APOLLO3HardwareSPIOutput {
 
 public:
 	APOLLO3HardwareSPIOutput() { m_pSelect = NULL; }
-	APOLLO3HArdwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
+	APOLLO3HardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
 
 	// set the object representing the selectable
-	void setSelect(Selectable *pSelect) { /* TODO */ }
+	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
 
 	// initialize the SPI subssytem
 	void init() {
-		pinMode(_DATA_PIN, OUTPUT); am_hal_gpio_fastgpio_enable(_DATA_PIN);
-		pinMode(_CLOCK_PIN, OUTPUT); am_hal_gpio_fastgpio_enable(_CLOCK_PIN);
 		//enableBurstMode(); //Optional. Go to 96MHz. Roughly doubles the speed of shiftOut and fastShiftOut
 		enableFastShift(_DATA_PIN, _CLOCK_PIN);
 	}
@@ -40,8 +38,8 @@ class APOLLO3HardwareSPIOutput {
 
 	// write a word out via SPI (returns immediately on writing register)
 	static void writeWord(uint16_t w) {
-		writeByte((uint8_t)((w >> 8) && 0xff));
-		writeByte((uint8_t)(w && 0xff));
+		writeByte((uint8_t)((w >> 8) & 0xff));
+		writeByte((uint8_t)(w & 0xff));
 	}
 
 	// A raw set of writing byte values, assumes setup/init/waiting done elsewhere
@@ -51,20 +49,22 @@ class APOLLO3HardwareSPIOutput {
 
 	// A full cycle of writing a value for len bytes, including select, release, and waiting
 	void writeBytesValue(uint8_t value, int len) {
-		select(); writeBytesValueRaw(value, len); release();
+		//select();
+		writeBytesValueRaw(value, len);
+		//release();
 	}
 
 	// A full cycle of writing a value for len bytes, including select, release, and waiting
 	template <class D> void writeBytes(register uint8_t *data, int len) {
 		uint8_t *end = data + len;
-		select();
+		//select();
 		// could be optimized to write 16bit words out instead of 8bit bytes
 		while(data != end) {
 			writeByte(D::adjust(*data++));
 		}
 		D::postBlock(len);
-		waitFully();
-		release();
+		//waitFully();
+		//release();
 	}
 
 	// A full cycle of writing a value for len bytes, including select, release, and waiting
@@ -72,7 +72,7 @@ class APOLLO3HardwareSPIOutput {
 
 	// write a single bit out, which bit from the passed in byte is determined by template parameter
 	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
-		waitFully();
+		//waitFully();
 		if(b & (1 << BIT)) {
 			FastPin<_DATA_PIN>::hi();
 		} else {
@@ -86,30 +86,29 @@ class APOLLO3HardwareSPIOutput {
 	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
 	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
 	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
-		select();
+		//select();
 
-		int len = pixels.mLen; // unused?
+		int len = pixels.mLen;
 
-		if(FLAGS & FLAG_START_BIT) {
-			while(pixels.has(1)) {
+		//select();
+		while(pixels.has(1)) {
+			if(FLAGS & FLAG_START_BIT) {
 				writeBit<0>(1);
 				writeByte(D::adjust(pixels.loadAndScale0()));
 				writeByte(D::adjust(pixels.loadAndScale1()));
 				writeByte(D::adjust(pixels.loadAndScale2()));
-				pixels.advanceData();
-				pixels.stepDithering();
-			}
-		} else {
-			while(pixels.has(1)) {
+			} else {
 				writeByte(D::adjust(pixels.loadAndScale0()));
 				writeByte(D::adjust(pixels.loadAndScale1()));
 				writeByte(D::adjust(pixels.loadAndScale2()));
-				pixels.advanceData();
-				pixels.stepDithering();
 			}
+
+			pixels.advanceData();
+			pixels.stepDithering();
 		}
 		D::postBlock(len);
-		release();
+		//waitFully();
+		//release();
 	}
 
 };

From 47ff74f9f2a014f17ef23e0ef35ed1dc1049a4a5 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Wed, 8 Apr 2020 16:37:34 +0100
Subject: [PATCH 139/204] Update clockless_apollo3.h

---
 platforms/apollo3/clockless_apollo3.h | 374 +++++++++++++++++++++++---
 1 file changed, 341 insertions(+), 33 deletions(-)

diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 7791056434..44587c7566 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -5,61 +5,369 @@ FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
-/*
+#include "ap3_analog.h"
 
 #define FASTLED_HAS_CLOCKLESS 1
 
-template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
-  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
-  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+	typedef typename FastPin<DATA_PIN>::port_t data_t;
 
-//  data_t mPinMask;
-//  data_ptr_t mPort;
   CMinWait<WAIT_TIME> mWait;
+
 public:
-  virtual void init() {
-    FastPinBB<DATA_PIN>::setOutput();
-    mPinMask = FastPinBB<DATA_PIN>::mask();
-    mPort = FastPinBB<DATA_PIN>::port();
-  }
+	virtual void init() {
+		FastPin<DATA_PIN>::setOutput();
+	}
 
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
-  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+protected:
+
+	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
     mWait.wait();
-    cli();
-    if(!showRGBInternal(pixels)) {
+		if(!showRGBInternal(pixels)) {
       sei(); delayMicroseconds(WAIT_TIME); cli();
       showRGBInternal(pixels);
     }
-    sei();
     mWait.mark();
   }
 
-  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-  // gcc will use register Y for the this pointer.
-  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-    struct M0ClocklessData data;
-    data.d[0] = pixels.d[0];
-    data.d[1] = pixels.d[1];
-    data.d[2] = pixels.d[2];
-    data.s[0] = pixels.mScale[0];
-    data.s[1] = pixels.mScale[1];
-    data.s[2] = pixels.mScale[2];
-    data.e[0] = pixels.e[0];
-    data.e[1] = pixels.e[1];
-    data.e[2] = pixels.e[2];
-    data.adj = pixels.mAdvance;
-
-    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
-    return showLedData<8,4,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint8_t & b)  {
+    uint32_t clk = AM_HAL_CTIMER_HFRC_12MHZ;
+    uint32_t fw = 0;
+    uint32_t th = 0;
+    fw = (T1+T2+T3) * 10000 / 12;
+
+		for(register uint32_t i = BITS-1; i > 0; i--) {
+			if(b&0x80) {
+        th = T3 * 10000 / 12;
+			} else {
+        th = (T2 + T3) * 1000 / 12;
+			}
+      ap3_pwm_output_once(DATA_PIN, th, fw, clk);
+			b <<= 1;
+		}
+
+    if(b&0x80) {
+      th = T3 * 10000 / 12;
+    } else {
+      th = (T2 + T3) * 10000 / 12;
+    }
+    ap3_pwm_output_once(DATA_PIN, th, fw, clk);
+	}
+
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+		// Setup the pixel controller and load/scale the first byte
+		pixels.preStepFirstByteDithering();
+		register uint8_t b = pixels.loadAndScale0();
+
+		cli();
+
+		while(pixels.has(1)) {
+			pixels.stepDithering();
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			cli();
+			#endif
+
+			// Write first byte, read next byte
+			writeBits<8+XTRA0>(b);
+			b = pixels.loadAndScale1();
+
+			// Write second byte, read 3rd byte
+			writeBits<8+XTRA0>(b);
+			b = pixels.loadAndScale2();
+
+			// Write third byte, read 1st byte of next pixel
+			writeBits<8+XTRA0>(b);
+			b = pixels.advanceAndLoadAndScale0();
+
+			#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			sei();
+			#endif
+		};
+
+		sei();
+		return 1;
+	}
+
+  static bool ap3_pwm_is_running(uint32_t ui32TimerNumber, uint32_t ui32TimerSegment)
+  {
+    volatile uint32_t *pui32ConfigReg;
+    bool is_enabled = false;
+
+    //
+    // Find the correct control register.
+    //
+    pui32ConfigReg = (uint32_t *)CTIMERADDRn(CTIMER, ui32TimerNumber, CTRL0);
+
+    //
+    // Begin critical section while config registers are read and modified.
+    //
+    AM_CRITICAL_BEGIN
+
+    //
+    // Read the current value.
+    //
+    uint32_t ui32ConfigVal = *pui32ConfigReg;
+
+    //
+    // Check the "enable bit"
+    //
+    if (ui32ConfigVal & (CTIMER_CTRL0_TMRA0EN_Msk | CTIMER_CTRL0_TMRB0EN_Msk))
+    {
+        is_enabled = true;
+    }
+
+    //
+    // Done with critical section.
+    //
+    AM_CRITICAL_END
+
+    return is_enabled;
   }
 
+  static void ap3_pwm_wait_for_pulse(uint32_t timer, uint32_t segment, uint32_t output, uint32_t margin)
+  {
+
+      volatile uint32_t *pui32CompareReg;
+      volatile uint32_t ctimer_val;
+      uint32_t cmpr0;
 
+      // Only wait if the ctimer is running to avoid a deadlock
+      if (ap3_pwm_is_running(timer, segment))
+      {
+
+          // Get the comapre register address
+          if (segment == AM_HAL_CTIMER_TIMERA)
+          {
+              if (output == AM_HAL_CTIMER_OUTPUT_NORMAL)
+              {
+                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRA0);
+              }
+              else
+              {
+                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRAUXA0);
+              }
+          }
+          else
+          {
+              if (output == AM_HAL_CTIMER_OUTPUT_NORMAL)
+              {
+                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRB0);
+              }
+              else
+              {
+                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRAUXB0);
+              }
+          }
+
+          // Get the compare value
+          cmpr0 = ((uint32_t)(*(pui32CompareReg)) & 0x0000FFFF);
+
+          if (cmpr0)
+          { // Only wait when cmpr0 is greater than 0 to avoid an infinite while loop
+              // Wait for the timer value to be less than the compare value so that it is safe to change
+              ctimer_val = am_hal_ctimer_read(timer, segment);
+              while ((ctimer_val + 0) >= cmpr0)
+              {
+                  ctimer_val = am_hal_ctimer_read(timer, segment);
+              }
+          }
+      }
+  }
+
+  #define CTXPADNUM(ctx) ((CTx_tbl[ctx] >> 0) & 0x3f)
+  #define CTXPADFNC(ctx) ((CTx_tbl[ctx] >> 8) & 0x7)
+  #define CTX(pad, fn) ((fn << 8) | (pad << 0))
+  #define OUTC(timB, timN, N2) ((N2 << 4) | (timB << 3) | (timN << 0))
+  #define OUTCTIMN(ctx, n) (outcfg_tbl[ctx][n] & (0x7 << 0))
+  #define OUTCTIMB(ctx, n) (outcfg_tbl[ctx][n] & (0x1 << 3))
+  #define OUTCO2(ctx, n) (outcfg_tbl[ctx][n] & (0x1 << 4))
+
+  static ap3_err_t ap3_pwm_output_once(uint8_t pin, uint32_t th, uint32_t fw, uint32_t clk)
+  {
+    static const uint16_t CTx_tbl[32] =
+    {
+            CTX(12, 2), CTX(25, 2), CTX(13, 2), CTX(26, 2), CTX(18, 2), // 0 - 4
+            CTX(27, 2), CTX(19, 2), CTX(28, 2), CTX(5, 7), CTX(29, 2),  // 5 - 9
+            CTX(6, 5), CTX(30, 2), CTX(22, 2), CTX(31, 2), CTX(23, 2),  // 10 - 14
+            CTX(32, 2), CTX(42, 2), CTX(4, 6), CTX(43, 2), CTX(7, 7),   // 15 - 19
+            CTX(44, 2), CTX(24, 5), CTX(45, 2), CTX(33, 6), CTX(46, 2), // 20 - 24
+            CTX(39, 2), CTX(47, 2), CTX(35, 5), CTX(48, 2), CTX(37, 7), // 25 - 29
+            CTX(49, 2), CTX(11, 2)                                      // 30 - 31
+    };
+
+    static const uint8_t outcfg_tbl[32][4] =
+        {
+            {OUTC(0, 0, 0), OUTC(1, 2, 1), OUTC(0, 5, 1), OUTC(0, 6, 0)}, // CTX0:  A0OUT,  B2OUT2, A5OUT2, A6OUT
+            {OUTC(0, 0, 1), OUTC(0, 0, 0), OUTC(0, 5, 0), OUTC(1, 7, 1)}, // CTX1:  A0OUT2, A0OUT,  A5OUT,  B7OUT2
+            {OUTC(1, 0, 0), OUTC(1, 1, 1), OUTC(1, 6, 1), OUTC(0, 7, 0)}, // CTX2:  B0OUT,  B1OUT2, B6OUT2, A7OUT
+            {OUTC(1, 0, 1), OUTC(1, 0, 0), OUTC(0, 1, 0), OUTC(0, 6, 0)}, // CTX3:  B0OUT2, B0OUT,  A1OUT,  A6OUT
+            {OUTC(0, 1, 0), OUTC(0, 2, 1), OUTC(0, 5, 1), OUTC(1, 5, 0)}, // CTX4:  A1OUT,  A2OUT2, A5OUT2, B5OUT
+            {OUTC(0, 1, 1), OUTC(0, 1, 0), OUTC(1, 6, 0), OUTC(0, 7, 0)}, // CTX5:  A1OUT2, A1OUT,  B6OUT,  A7OUT
+            {OUTC(1, 1, 0), OUTC(0, 1, 0), OUTC(1, 5, 1), OUTC(1, 7, 0)}, // CTX6:  B1OUT,  A1OUT,  B5OUT2, B7OUT
+            {OUTC(1, 1, 1), OUTC(1, 1, 0), OUTC(1, 5, 0), OUTC(0, 7, 0)}, // CTX7:  B1OUT2, B1OUT,  B5OUT,  A7OUT
+            {OUTC(0, 2, 0), OUTC(0, 3, 1), OUTC(0, 4, 1), OUTC(1, 6, 0)}, // CTX8:  A2OUT,  A3OUT2, A4OUT2, B6OUT
+            {OUTC(0, 2, 1), OUTC(0, 2, 0), OUTC(0, 4, 0), OUTC(1, 0, 0)}, // CTX9:  A2OUT2, A2OUT,  A4OUT,  B0OUT
+            {OUTC(1, 2, 0), OUTC(1, 3, 1), OUTC(1, 4, 1), OUTC(0, 6, 0)}, // CTX10: B2OUT,  B3OUT2, B4OUT2, A6OUT
+            {OUTC(1, 2, 1), OUTC(1, 2, 0), OUTC(1, 4, 0), OUTC(1, 5, 1)}, // CTX11: B2OUT2, B2OUT,  B4OUT,  B5OUT2
+            {OUTC(0, 3, 0), OUTC(1, 1, 0), OUTC(1, 0, 1), OUTC(1, 6, 1)}, // CTX12: A3OUT,  B1OUT,  B0OUT2, B6OUT2
+            {OUTC(0, 3, 1), OUTC(0, 3, 0), OUTC(0, 6, 0), OUTC(1, 4, 1)}, // CTX13: A3OUT2, A3OUT,  A6OUT,  B4OUT2
+            {OUTC(1, 3, 0), OUTC(1, 1, 0), OUTC(1, 7, 1), OUTC(0, 7, 0)}, // CTX14: B3OUT,  B1OUT,  B7OUT2, A7OUT
+            {OUTC(1, 3, 1), OUTC(1, 3, 0), OUTC(0, 7, 0), OUTC(0, 4, 1)}, // CTX15: B3OUT2, B3OUT,  A7OUT,  A4OUT2
+            {OUTC(0, 4, 0), OUTC(0, 0, 0), OUTC(0, 0, 1), OUTC(1, 3, 1)}, // CTX16: A4OUT,  A0OUT,  A0OUT2, B3OUT2
+            {OUTC(0, 4, 1), OUTC(1, 7, 0), OUTC(0, 4, 0), OUTC(0, 1, 1)}, // CTX17: A4OUT2, B7OUT,  A4OUT,  A1OUT2
+            {OUTC(1, 4, 0), OUTC(1, 0, 0), OUTC(0, 0, 0), OUTC(0, 3, 1)}, // CTX18: B4OUT,  B0OUT,  A0OUT,  A3OUT2
+            {OUTC(1, 4, 1), OUTC(0, 2, 0), OUTC(1, 4, 0), OUTC(1, 1, 1)}, // CTX19: B4OUT2, A2OUT,  B4OUT,  B1OUT2
+            {OUTC(0, 5, 0), OUTC(0, 1, 0), OUTC(0, 1, 1), OUTC(1, 2, 1)}, // CTX20: A5OUT,  A1OUT,  A1OUT2, B2OUT2
+            {OUTC(0, 5, 1), OUTC(0, 1, 0), OUTC(1, 5, 0), OUTC(0, 0, 1)}, // CTX21: A5OUT2, A1OUT,  B5OUT,  A0OUT2
+            {OUTC(1, 5, 0), OUTC(0, 6, 0), OUTC(0, 1, 0), OUTC(0, 2, 1)}, // CTX22: B5OUT,  A6OUT,  A1OUT,  A2OUT2
+            {OUTC(1, 5, 1), OUTC(0, 7, 0), OUTC(0, 5, 0), OUTC(1, 0, 1)}, // CTX23: B5OUT2, A7OUT,  A5OUT,  B0OUT2
+            {OUTC(0, 6, 0), OUTC(0, 2, 0), OUTC(0, 1, 0), OUTC(1, 1, 1)}, // CTX24: A6OUT,  A2OUT,  A1OUT,  B1OUT2
+            {OUTC(1, 4, 1), OUTC(1, 2, 0), OUTC(0, 6, 0), OUTC(0, 2, 1)}, // CTX25: B4OUT2, B2OUT,  A6OUT,  A2OUT2
+            {OUTC(1, 6, 0), OUTC(1, 2, 0), OUTC(0, 5, 0), OUTC(0, 1, 1)}, // CTX26: B6OUT,  B2OUT,  A5OUT,  A1OUT2
+            {OUTC(1, 6, 1), OUTC(0, 1, 0), OUTC(1, 6, 0), OUTC(1, 2, 1)}, // CTX27: B6OUT2, A1OUT,  B6OUT,  B2OUT2
+            {OUTC(0, 7, 0), OUTC(0, 3, 0), OUTC(0, 5, 1), OUTC(1, 0, 1)}, // CTX28: A7OUT,  A3OUT,  A5OUT2, B0OUT2
+            {OUTC(1, 5, 1), OUTC(0, 1, 0), OUTC(0, 7, 0), OUTC(0, 3, 1)}, // CTX29: B5OUT2, A1OUT,  A7OUT,  A3OUT2
+            {OUTC(1, 7, 0), OUTC(1, 3, 0), OUTC(0, 4, 1), OUTC(0, 0, 1)}, // CTX30: B7OUT,  B3OUT,  A4OUT2, A0OUT2
+            {OUTC(1, 7, 1), OUTC(0, 6, 0), OUTC(1, 7, 0), OUTC(1, 3, 1)}, // CTX31: B7OUT2, A6OUT,  B7OUT,  B3OUT2
+    };
+
+      // handle configuration, if necessary
+      ap3_err_t retval = AP3_OK;
+
+      if (fw > 0)
+      { // reduce fw so that the user's desired value is the period
+          fw--;
+      }
+
+      ap3_gpio_pad_t pad = ap3_gpio_pin2pad(pin);
+      if ((pad == AP3_GPIO_PAD_UNUSED) || (pad >= AP3_GPIO_MAX_PADS))
+      {
+          return AP3_INVALID_ARG;
+      }
+
+      uint32_t timer = 0;
+      uint32_t segment = AM_HAL_CTIMER_TIMERA;
+      uint32_t output = AM_HAL_CTIMER_OUTPUT_NORMAL;
+
+      uint8_t ctx = 0;
+      for (ctx = 0; ctx < 32; ctx++)
+      {
+          if (CTXPADNUM(ctx) == pad)
+          {
+              break;
+          }
+      }
+      if (ctx >= 32)
+      {
+          return AP3_ERR; // could not find pad in CTx table
+      }
+      // Now use CTx index to get configuration information
+
+      // Now, for the given pad, determine the above values
+      if ((pad == 39) || (pad == 37))
+      {
+          // pads 39 and 37 must be handled differently to avoid conflicting with other pins
+          if (pad == 39)
+          {
+              // 39
+              timer = 6;
+              segment = AM_HAL_CTIMER_TIMERA;
+              output = AM_HAL_CTIMER_OUTPUT_SECONDARY;
+          }
+          else
+          {
+              // 37
+              timer = 7;
+              segment = AM_HAL_CTIMER_TIMERA;
+              output = AM_HAL_CTIMER_OUTPUT_SECONDARY;
+          }
+      }
+      else
+      { // Use the 0th index of the outcfg_tbl to select the functions
+          timer = OUTCTIMN(ctx, 0);
+          if (OUTCTIMB(ctx, 0))
+          {
+              segment = AM_HAL_CTIMER_TIMERB;
+          }
+          if (OUTCO2(ctx, 0))
+          {
+              output = AM_HAL_CTIMER_OUTPUT_SECONDARY;
+          }
+      }
+
+      // Ensure that th is not greater than the fw
+      if (th > fw)
+      {
+          th = fw;
+      }
+
+      // Test for AM_HAL_CTIMER_OUTPUT_FORCE0 or AM_HAL_CTIMER_OUTPUT_FORCE1
+      bool set_periods = true;
+      if ((th == 0) || (fw == 0))
+      {
+          output = AM_HAL_CTIMER_OUTPUT_FORCE0;
+          set_periods = false; // disable setting periods when going into a forced mode
+      }
+      else if (th == fw)
+      {
+          output = AM_HAL_CTIMER_OUTPUT_FORCE1;
+          set_periods = false; // disable setting periods when going into a forced mode
+      }
+
+      // Wait until after high pulse to change the state (avoids inversion)
+      ap3_pwm_wait_for_pulse(timer, segment, output, 10);
+
+      // Configure the pin
+      am_hal_ctimer_output_config(timer,
+                                  segment,
+                                  pad,
+                                  output,
+                                  AM_HAL_GPIO_PIN_DRIVESTRENGTH_12MA); //
+
+      // Configure the pulse mode with our clock source
+      am_hal_ctimer_config_single(timer,
+                                  segment,
+                                  // (AM_HAL_CTIMER_FN_PWM_REPEAT | AP3_ANALOG_CLK | AM_HAL_CTIMER_INT_ENABLE) );
+                                  (AM_HAL_CTIMER_FN_PWM_ONCE | clk));
+
+      if (set_periods)
+      {
+          // If this pad uses secondary output:
+          if (output == AM_HAL_CTIMER_OUTPUT_SECONDARY)
+          {
+              // Need to explicitly enable compare registers 2/3
+              uint32_t *pui32ConfigReg = NULL;
+              pui32ConfigReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, AUX0);
+              uint32_t ui32WriteVal = AM_REGVAL(pui32ConfigReg);
+              uint32_t ui32ConfigVal = (1 << CTIMER_AUX0_TMRA0EN23_Pos); // using CTIMER_AUX0_TMRA0EN23_Pos because for now this number is common to all CTimer instances
+              if (segment == AM_HAL_CTIMER_TIMERB)
+              {
+                  ui32ConfigVal = ((ui32ConfigVal & 0xFFFF) << 16);
+              }
+              ui32WriteVal = (ui32WriteVal & ~(segment)) | ui32ConfigVal;
+              AM_REGVAL(pui32ConfigReg) = ui32WriteVal;
+
+              // then set the duty cycle with the 'aux' function
+              am_hal_ctimer_aux_period_set(timer, segment, fw, th);
+          }
+          else
+          {
+              // Otherwise simply set the primary duty cycle
+              am_hal_ctimer_period_set(timer, segment, fw, th);
+          }
+
+          am_hal_ctimer_start(timer, segment); // Start the timer only when there are periods to compare to
+      }
+
+      return AP3_OK;
+  }
 };
 
-*/
 
 #endif
 

From dcd774f73a843a23e40e140ebe1e5c6dfcbbf4b8 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Wed, 8 Apr 2020 21:33:10 +0100
Subject: [PATCH 140/204] Trying to use systick for clockless

---
 platforms/apollo3/clockless_apollo3.h   | 344 ++++--------------------
 platforms/apollo3/fastpin_apollo3.h     |   6 +-
 platforms/apollo3/led_sysdefs_apollo3.h |   4 +-
 3 files changed, 55 insertions(+), 299 deletions(-)

diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 44587c7566..145471f765 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -5,7 +5,12 @@ FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
-#include "ap3_analog.h"
+//#include "ap3_analog.h"
+#include "am_hal_systick.h"
+
+#ifndef SYSTICK_MAX_TICKS
+#define SYSTICK_MAX_TICKS ((1 << 24)-1)
+#endif
 
 #define FASTLED_HAS_CLOCKLESS 1
 
@@ -19,69 +24,93 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
+    FastPin<DATA_PIN>::lo();
+    am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
+    am_hal_systick_load(0x00FFFFFF);
+    am_hal_systick_int_enable();
+    am_hal_interrupt_master_enable();
+    am_hal_systick_start();
 	}
 
+  // extern "C" void am_systick_isr(void)
+  // {
+  //   am_hal_systick_reset();
+  // }
+
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
+    //mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
+      //sei(); delayMicroseconds(WAIT_TIME); cli();
+      //showRGBInternal(pixels);
     }
-    mWait.mark();
+    //mWait.mark();
   }
 
-	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint8_t & b)  {
-    uint32_t clk = AM_HAL_CTIMER_HFRC_12MHZ;
-    uint32_t fw = 0;
-    uint32_t th = 0;
-    fw = (T1+T2+T3) * 10000 / 12;
-
+	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint8_t & b)  {
 		for(register uint32_t i = BITS-1; i > 0; i--) {
+      while(am_hal_systick_count() < next_mark);
+      next_mark = am_hal_systick_count() + 10;//(T1+T2+T3);
+      //if (next_mark > SYSTICK_MAX_TICKS) { next_mark = next_mark - SYSTICK_MAX_TICKS; }
+      FastPin<DATA_PIN>::hi();
 			if(b&0x80) {
-        th = T3 * 10000 / 12;
+        while((next_mark - am_hal_systick_count()) > 5);//(T3+(200*(F_CPU/24000000))));
+        FastPin<DATA_PIN>::lo();
 			} else {
-        th = (T2 + T3) * 1000 / 12;
+        while((next_mark - am_hal_systick_count()) > 7);//(T2+T3+(200*(F_CPU/24000000))));
+        FastPin<DATA_PIN>::lo();
 			}
-      ap3_pwm_output_once(DATA_PIN, th, fw, clk);
 			b <<= 1;
 		}
 
+    while(am_hal_systick_count() < next_mark);
+    next_mark = am_hal_systick_count() + 10;//(T1+T2+T3);
+    //if (next_mark > SYSTICK_MAX_TICKS) { next_mark = next_mark - SYSTICK_MAX_TICKS; }
+    FastPin<DATA_PIN>::hi();
     if(b&0x80) {
-      th = T3 * 10000 / 12;
+      while((next_mark - am_hal_systick_count()) > 5);//(T3+(200*(F_CPU/24000000))));
+      FastPin<DATA_PIN>::lo();
     } else {
-      th = (T2 + T3) * 10000 / 12;
+      while((next_mark - am_hal_systick_count()) > 7);//(T2+T3+(200*(F_CPU/24000000))));
+      FastPin<DATA_PIN>::lo();
     }
-    ap3_pwm_output_once(DATA_PIN, th, fw, clk);
 	}
 
 	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+    FastPin<DATA_PIN>::lo();
+
 		// Setup the pixel controller and load/scale the first byte
 		pixels.preStepFirstByteDithering();
 		register uint8_t b = pixels.loadAndScale0();
 
 		cli();
+    register uint32_t next_mark = am_hal_systick_count() + 10;//(T1+T2+T3);
+    if (next_mark > SYSTICK_MAX_TICKS) { next_mark = next_mark - SYSTICK_MAX_TICKS; }
 
 		while(pixels.has(1)) {
 			pixels.stepDithering();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			cli();
+      // if interrupts took longer than 45µs, punt on the current frame
+			if(am_hal_systick_count() > next_mark) {
+				if((am_hal_systick_count() - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+			}
 			#endif
 
 			// Write first byte, read next byte
-			writeBits<8+XTRA0>(b);
+			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.loadAndScale1();
 
 			// Write second byte, read 3rd byte
-			writeBits<8+XTRA0>(b);
+			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.loadAndScale2();
 
 			// Write third byte, read 1st byte of next pixel
-			writeBits<8+XTRA0>(b);
+			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.advanceAndLoadAndScale0();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
@@ -90,282 +119,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		};
 
 		sei();
-		return 1;
+		return (am_hal_systick_count());
 	}
 
-  static bool ap3_pwm_is_running(uint32_t ui32TimerNumber, uint32_t ui32TimerSegment)
-  {
-    volatile uint32_t *pui32ConfigReg;
-    bool is_enabled = false;
-
-    //
-    // Find the correct control register.
-    //
-    pui32ConfigReg = (uint32_t *)CTIMERADDRn(CTIMER, ui32TimerNumber, CTRL0);
-
-    //
-    // Begin critical section while config registers are read and modified.
-    //
-    AM_CRITICAL_BEGIN
-
-    //
-    // Read the current value.
-    //
-    uint32_t ui32ConfigVal = *pui32ConfigReg;
-
-    //
-    // Check the "enable bit"
-    //
-    if (ui32ConfigVal & (CTIMER_CTRL0_TMRA0EN_Msk | CTIMER_CTRL0_TMRB0EN_Msk))
-    {
-        is_enabled = true;
-    }
-
-    //
-    // Done with critical section.
-    //
-    AM_CRITICAL_END
-
-    return is_enabled;
-  }
-
-  static void ap3_pwm_wait_for_pulse(uint32_t timer, uint32_t segment, uint32_t output, uint32_t margin)
-  {
-
-      volatile uint32_t *pui32CompareReg;
-      volatile uint32_t ctimer_val;
-      uint32_t cmpr0;
-
-      // Only wait if the ctimer is running to avoid a deadlock
-      if (ap3_pwm_is_running(timer, segment))
-      {
-
-          // Get the comapre register address
-          if (segment == AM_HAL_CTIMER_TIMERA)
-          {
-              if (output == AM_HAL_CTIMER_OUTPUT_NORMAL)
-              {
-                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRA0);
-              }
-              else
-              {
-                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRAUXA0);
-              }
-          }
-          else
-          {
-              if (output == AM_HAL_CTIMER_OUTPUT_NORMAL)
-              {
-                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRB0);
-              }
-              else
-              {
-                  pui32CompareReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, CMPRAUXB0);
-              }
-          }
-
-          // Get the compare value
-          cmpr0 = ((uint32_t)(*(pui32CompareReg)) & 0x0000FFFF);
-
-          if (cmpr0)
-          { // Only wait when cmpr0 is greater than 0 to avoid an infinite while loop
-              // Wait for the timer value to be less than the compare value so that it is safe to change
-              ctimer_val = am_hal_ctimer_read(timer, segment);
-              while ((ctimer_val + 0) >= cmpr0)
-              {
-                  ctimer_val = am_hal_ctimer_read(timer, segment);
-              }
-          }
-      }
-  }
-
-  #define CTXPADNUM(ctx) ((CTx_tbl[ctx] >> 0) & 0x3f)
-  #define CTXPADFNC(ctx) ((CTx_tbl[ctx] >> 8) & 0x7)
-  #define CTX(pad, fn) ((fn << 8) | (pad << 0))
-  #define OUTC(timB, timN, N2) ((N2 << 4) | (timB << 3) | (timN << 0))
-  #define OUTCTIMN(ctx, n) (outcfg_tbl[ctx][n] & (0x7 << 0))
-  #define OUTCTIMB(ctx, n) (outcfg_tbl[ctx][n] & (0x1 << 3))
-  #define OUTCO2(ctx, n) (outcfg_tbl[ctx][n] & (0x1 << 4))
-
-  static ap3_err_t ap3_pwm_output_once(uint8_t pin, uint32_t th, uint32_t fw, uint32_t clk)
-  {
-    static const uint16_t CTx_tbl[32] =
-    {
-            CTX(12, 2), CTX(25, 2), CTX(13, 2), CTX(26, 2), CTX(18, 2), // 0 - 4
-            CTX(27, 2), CTX(19, 2), CTX(28, 2), CTX(5, 7), CTX(29, 2),  // 5 - 9
-            CTX(6, 5), CTX(30, 2), CTX(22, 2), CTX(31, 2), CTX(23, 2),  // 10 - 14
-            CTX(32, 2), CTX(42, 2), CTX(4, 6), CTX(43, 2), CTX(7, 7),   // 15 - 19
-            CTX(44, 2), CTX(24, 5), CTX(45, 2), CTX(33, 6), CTX(46, 2), // 20 - 24
-            CTX(39, 2), CTX(47, 2), CTX(35, 5), CTX(48, 2), CTX(37, 7), // 25 - 29
-            CTX(49, 2), CTX(11, 2)                                      // 30 - 31
-    };
-
-    static const uint8_t outcfg_tbl[32][4] =
-        {
-            {OUTC(0, 0, 0), OUTC(1, 2, 1), OUTC(0, 5, 1), OUTC(0, 6, 0)}, // CTX0:  A0OUT,  B2OUT2, A5OUT2, A6OUT
-            {OUTC(0, 0, 1), OUTC(0, 0, 0), OUTC(0, 5, 0), OUTC(1, 7, 1)}, // CTX1:  A0OUT2, A0OUT,  A5OUT,  B7OUT2
-            {OUTC(1, 0, 0), OUTC(1, 1, 1), OUTC(1, 6, 1), OUTC(0, 7, 0)}, // CTX2:  B0OUT,  B1OUT2, B6OUT2, A7OUT
-            {OUTC(1, 0, 1), OUTC(1, 0, 0), OUTC(0, 1, 0), OUTC(0, 6, 0)}, // CTX3:  B0OUT2, B0OUT,  A1OUT,  A6OUT
-            {OUTC(0, 1, 0), OUTC(0, 2, 1), OUTC(0, 5, 1), OUTC(1, 5, 0)}, // CTX4:  A1OUT,  A2OUT2, A5OUT2, B5OUT
-            {OUTC(0, 1, 1), OUTC(0, 1, 0), OUTC(1, 6, 0), OUTC(0, 7, 0)}, // CTX5:  A1OUT2, A1OUT,  B6OUT,  A7OUT
-            {OUTC(1, 1, 0), OUTC(0, 1, 0), OUTC(1, 5, 1), OUTC(1, 7, 0)}, // CTX6:  B1OUT,  A1OUT,  B5OUT2, B7OUT
-            {OUTC(1, 1, 1), OUTC(1, 1, 0), OUTC(1, 5, 0), OUTC(0, 7, 0)}, // CTX7:  B1OUT2, B1OUT,  B5OUT,  A7OUT
-            {OUTC(0, 2, 0), OUTC(0, 3, 1), OUTC(0, 4, 1), OUTC(1, 6, 0)}, // CTX8:  A2OUT,  A3OUT2, A4OUT2, B6OUT
-            {OUTC(0, 2, 1), OUTC(0, 2, 0), OUTC(0, 4, 0), OUTC(1, 0, 0)}, // CTX9:  A2OUT2, A2OUT,  A4OUT,  B0OUT
-            {OUTC(1, 2, 0), OUTC(1, 3, 1), OUTC(1, 4, 1), OUTC(0, 6, 0)}, // CTX10: B2OUT,  B3OUT2, B4OUT2, A6OUT
-            {OUTC(1, 2, 1), OUTC(1, 2, 0), OUTC(1, 4, 0), OUTC(1, 5, 1)}, // CTX11: B2OUT2, B2OUT,  B4OUT,  B5OUT2
-            {OUTC(0, 3, 0), OUTC(1, 1, 0), OUTC(1, 0, 1), OUTC(1, 6, 1)}, // CTX12: A3OUT,  B1OUT,  B0OUT2, B6OUT2
-            {OUTC(0, 3, 1), OUTC(0, 3, 0), OUTC(0, 6, 0), OUTC(1, 4, 1)}, // CTX13: A3OUT2, A3OUT,  A6OUT,  B4OUT2
-            {OUTC(1, 3, 0), OUTC(1, 1, 0), OUTC(1, 7, 1), OUTC(0, 7, 0)}, // CTX14: B3OUT,  B1OUT,  B7OUT2, A7OUT
-            {OUTC(1, 3, 1), OUTC(1, 3, 0), OUTC(0, 7, 0), OUTC(0, 4, 1)}, // CTX15: B3OUT2, B3OUT,  A7OUT,  A4OUT2
-            {OUTC(0, 4, 0), OUTC(0, 0, 0), OUTC(0, 0, 1), OUTC(1, 3, 1)}, // CTX16: A4OUT,  A0OUT,  A0OUT2, B3OUT2
-            {OUTC(0, 4, 1), OUTC(1, 7, 0), OUTC(0, 4, 0), OUTC(0, 1, 1)}, // CTX17: A4OUT2, B7OUT,  A4OUT,  A1OUT2
-            {OUTC(1, 4, 0), OUTC(1, 0, 0), OUTC(0, 0, 0), OUTC(0, 3, 1)}, // CTX18: B4OUT,  B0OUT,  A0OUT,  A3OUT2
-            {OUTC(1, 4, 1), OUTC(0, 2, 0), OUTC(1, 4, 0), OUTC(1, 1, 1)}, // CTX19: B4OUT2, A2OUT,  B4OUT,  B1OUT2
-            {OUTC(0, 5, 0), OUTC(0, 1, 0), OUTC(0, 1, 1), OUTC(1, 2, 1)}, // CTX20: A5OUT,  A1OUT,  A1OUT2, B2OUT2
-            {OUTC(0, 5, 1), OUTC(0, 1, 0), OUTC(1, 5, 0), OUTC(0, 0, 1)}, // CTX21: A5OUT2, A1OUT,  B5OUT,  A0OUT2
-            {OUTC(1, 5, 0), OUTC(0, 6, 0), OUTC(0, 1, 0), OUTC(0, 2, 1)}, // CTX22: B5OUT,  A6OUT,  A1OUT,  A2OUT2
-            {OUTC(1, 5, 1), OUTC(0, 7, 0), OUTC(0, 5, 0), OUTC(1, 0, 1)}, // CTX23: B5OUT2, A7OUT,  A5OUT,  B0OUT2
-            {OUTC(0, 6, 0), OUTC(0, 2, 0), OUTC(0, 1, 0), OUTC(1, 1, 1)}, // CTX24: A6OUT,  A2OUT,  A1OUT,  B1OUT2
-            {OUTC(1, 4, 1), OUTC(1, 2, 0), OUTC(0, 6, 0), OUTC(0, 2, 1)}, // CTX25: B4OUT2, B2OUT,  A6OUT,  A2OUT2
-            {OUTC(1, 6, 0), OUTC(1, 2, 0), OUTC(0, 5, 0), OUTC(0, 1, 1)}, // CTX26: B6OUT,  B2OUT,  A5OUT,  A1OUT2
-            {OUTC(1, 6, 1), OUTC(0, 1, 0), OUTC(1, 6, 0), OUTC(1, 2, 1)}, // CTX27: B6OUT2, A1OUT,  B6OUT,  B2OUT2
-            {OUTC(0, 7, 0), OUTC(0, 3, 0), OUTC(0, 5, 1), OUTC(1, 0, 1)}, // CTX28: A7OUT,  A3OUT,  A5OUT2, B0OUT2
-            {OUTC(1, 5, 1), OUTC(0, 1, 0), OUTC(0, 7, 0), OUTC(0, 3, 1)}, // CTX29: B5OUT2, A1OUT,  A7OUT,  A3OUT2
-            {OUTC(1, 7, 0), OUTC(1, 3, 0), OUTC(0, 4, 1), OUTC(0, 0, 1)}, // CTX30: B7OUT,  B3OUT,  A4OUT2, A0OUT2
-            {OUTC(1, 7, 1), OUTC(0, 6, 0), OUTC(1, 7, 0), OUTC(1, 3, 1)}, // CTX31: B7OUT2, A6OUT,  B7OUT,  B3OUT2
-    };
-
-      // handle configuration, if necessary
-      ap3_err_t retval = AP3_OK;
-
-      if (fw > 0)
-      { // reduce fw so that the user's desired value is the period
-          fw--;
-      }
-
-      ap3_gpio_pad_t pad = ap3_gpio_pin2pad(pin);
-      if ((pad == AP3_GPIO_PAD_UNUSED) || (pad >= AP3_GPIO_MAX_PADS))
-      {
-          return AP3_INVALID_ARG;
-      }
-
-      uint32_t timer = 0;
-      uint32_t segment = AM_HAL_CTIMER_TIMERA;
-      uint32_t output = AM_HAL_CTIMER_OUTPUT_NORMAL;
-
-      uint8_t ctx = 0;
-      for (ctx = 0; ctx < 32; ctx++)
-      {
-          if (CTXPADNUM(ctx) == pad)
-          {
-              break;
-          }
-      }
-      if (ctx >= 32)
-      {
-          return AP3_ERR; // could not find pad in CTx table
-      }
-      // Now use CTx index to get configuration information
-
-      // Now, for the given pad, determine the above values
-      if ((pad == 39) || (pad == 37))
-      {
-          // pads 39 and 37 must be handled differently to avoid conflicting with other pins
-          if (pad == 39)
-          {
-              // 39
-              timer = 6;
-              segment = AM_HAL_CTIMER_TIMERA;
-              output = AM_HAL_CTIMER_OUTPUT_SECONDARY;
-          }
-          else
-          {
-              // 37
-              timer = 7;
-              segment = AM_HAL_CTIMER_TIMERA;
-              output = AM_HAL_CTIMER_OUTPUT_SECONDARY;
-          }
-      }
-      else
-      { // Use the 0th index of the outcfg_tbl to select the functions
-          timer = OUTCTIMN(ctx, 0);
-          if (OUTCTIMB(ctx, 0))
-          {
-              segment = AM_HAL_CTIMER_TIMERB;
-          }
-          if (OUTCO2(ctx, 0))
-          {
-              output = AM_HAL_CTIMER_OUTPUT_SECONDARY;
-          }
-      }
-
-      // Ensure that th is not greater than the fw
-      if (th > fw)
-      {
-          th = fw;
-      }
-
-      // Test for AM_HAL_CTIMER_OUTPUT_FORCE0 or AM_HAL_CTIMER_OUTPUT_FORCE1
-      bool set_periods = true;
-      if ((th == 0) || (fw == 0))
-      {
-          output = AM_HAL_CTIMER_OUTPUT_FORCE0;
-          set_periods = false; // disable setting periods when going into a forced mode
-      }
-      else if (th == fw)
-      {
-          output = AM_HAL_CTIMER_OUTPUT_FORCE1;
-          set_periods = false; // disable setting periods when going into a forced mode
-      }
-
-      // Wait until after high pulse to change the state (avoids inversion)
-      ap3_pwm_wait_for_pulse(timer, segment, output, 10);
-
-      // Configure the pin
-      am_hal_ctimer_output_config(timer,
-                                  segment,
-                                  pad,
-                                  output,
-                                  AM_HAL_GPIO_PIN_DRIVESTRENGTH_12MA); //
-
-      // Configure the pulse mode with our clock source
-      am_hal_ctimer_config_single(timer,
-                                  segment,
-                                  // (AM_HAL_CTIMER_FN_PWM_REPEAT | AP3_ANALOG_CLK | AM_HAL_CTIMER_INT_ENABLE) );
-                                  (AM_HAL_CTIMER_FN_PWM_ONCE | clk));
-
-      if (set_periods)
-      {
-          // If this pad uses secondary output:
-          if (output == AM_HAL_CTIMER_OUTPUT_SECONDARY)
-          {
-              // Need to explicitly enable compare registers 2/3
-              uint32_t *pui32ConfigReg = NULL;
-              pui32ConfigReg = (uint32_t *)CTIMERADDRn(CTIMER, timer, AUX0);
-              uint32_t ui32WriteVal = AM_REGVAL(pui32ConfigReg);
-              uint32_t ui32ConfigVal = (1 << CTIMER_AUX0_TMRA0EN23_Pos); // using CTIMER_AUX0_TMRA0EN23_Pos because for now this number is common to all CTimer instances
-              if (segment == AM_HAL_CTIMER_TIMERB)
-              {
-                  ui32ConfigVal = ((ui32ConfigVal & 0xFFFF) << 16);
-              }
-              ui32WriteVal = (ui32WriteVal & ~(segment)) | ui32ConfigVal;
-              AM_REGVAL(pui32ConfigReg) = ui32WriteVal;
-
-              // then set the duty cycle with the 'aux' function
-              am_hal_ctimer_aux_period_set(timer, segment, fw, th);
-          }
-          else
-          {
-              // Otherwise simply set the primary duty cycle
-              am_hal_ctimer_period_set(timer, segment, fw, th);
-          }
-
-          am_hal_ctimer_start(timer, segment); // Start the timer only when there are periods to compare to
-      }
-
-      return AP3_OK;
-  }
 };
 
 
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index a489ff7802..a30ef0c598 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -19,9 +19,9 @@ template<uint8_t PIN> class _APOLLO3PIN {
   inline static void setOutput() { pinMode(PIN, OUTPUT); am_hal_gpio_fastgpio_enable(PIN); }
   inline static void setInput() { am_hal_gpio_fastgpio_disable(PIN); pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PIN); } // { digitalWrite(PIN, HIGH); }
-  inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PIN); } // { digitalWrite(PIN, LOW); }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PIN); } else { am_hal_gpio_fastgpio_clr(PIN); } } // { digitalWrite(PIN, val); }
+  inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PIN); }
+  inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PIN); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PIN); } else { am_hal_gpio_fastgpio_clr(PIN); } }
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
diff --git a/platforms/apollo3/led_sysdefs_apollo3.h b/platforms/apollo3/led_sysdefs_apollo3.h
index 7b66f7786a..d7eeb57ad5 100644
--- a/platforms/apollo3/led_sysdefs_apollo3.h
+++ b/platforms/apollo3/led_sysdefs_apollo3.h
@@ -33,7 +33,7 @@ typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile u
 
 // reusing/abusing cli/sei defs for due
 // These should be fine for the Apollo3. It has its own defines in cmsis_gcc.h
-#define cli()  __disable_irq();  __disable_fault_irq();
-#define sei() __enable_irq();  __enable_fault_irq();
+#define cli() __disable_irq();  //__disable_fault_irq();
+#define sei() __enable_irq();  //__enable_fault_irq();
 
 #endif

From 1baa14e5db58ffbd0d3dc03d9f69c3b7d3b1af00 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Thu, 9 Apr 2020 13:07:31 +0100
Subject: [PATCH 141/204] Clockless updates using SysTick

---
 platforms.cpp                         |  18 +++-
 platforms/apollo3/clockless_apollo3.h | 119 ++++++++++++++++++--------
 2 files changed, 98 insertions(+), 39 deletions(-)

diff --git a/platforms.cpp b/platforms.cpp
index 47a0088314..29b0c4b260 100644
--- a/platforms.cpp
+++ b/platforms.cpp
@@ -15,7 +15,7 @@
     #ifdef __cplusplus
         extern "C" {
     #endif
-            // NOTE: Update platforms.cpp in root of FastLED library if this changes        
+            // NOTE: Update platforms.cpp in root of FastLED library if this changes
             #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
                 void PWM0_IRQHandler(void) { isrCount++; PWM_Arbiter<0>::isr_handler(); }
             #endif
@@ -34,7 +34,21 @@
 
 #endif // defined(NRF52_SERIES)
 
-
+// ISR for the APOLLO3 SysTick
+#if defined(FASTLED_APOLLO3)
+
+  // SysTick Interrupt Service Routine
+  #ifdef __cplusplus
+    extern "C" {
+  #endif
+    void SysTick_Handler(void) {
+      // We don't actually need to do anything in the ISR. There just needs to be one!
+    }
+  #ifdef __cplusplus
+    }
+  #endif
+
+#endif // defined(FASTLED_APOLLO3)
 
 // FASTLED_NAMESPACE_BEGIN
 // FASTLED_NAMESPACE_END
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 145471f765..d07cf9e027 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -5,14 +5,14 @@ FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
-//#include "ap3_analog.h"
-#include "am_hal_systick.h"
+#define FASTLED_HAS_CLOCKLESS 1
 
-#ifndef SYSTICK_MAX_TICKS
-#define SYSTICK_MAX_TICKS ((1 << 24)-1)
-#endif
+#define DO_RGBW // Uncomment this line to enable support for (e.g.) SK6812RGBW that need extra white bits
 
-#define FASTLED_HAS_CLOCKLESS 1
+#ifdef DO_RGBW
+// Set all the white LEDs to this level. (What were you expecting for free!? ;-)
+#define WHITE_LEVEL 1
+#endif
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
@@ -23,62 +23,96 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 public:
 	virtual void init() {
+		// Initialize everything
+		// This is _very_ SparkFun Artemis / Ambiq Micro Apollo3 Blue specific!
+
+		// Configure DATA_PIN for FastGPIO (settings are in fastpin_apollo3.h)
 		FastPin<DATA_PIN>::setOutput();
     FastPin<DATA_PIN>::lo();
-    am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
-    am_hal_systick_load(0x00FFFFFF);
-    am_hal_systick_int_enable();
-    am_hal_interrupt_master_enable();
-    am_hal_systick_start();
+
+		// Make sure the system clock is running at the full 48MHz
+	  am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
+
+	  // Make sure interrupts are enabled
+	  am_hal_interrupt_master_enable();
+
+	  // Enable SysTick Interrupts in the NVIC
+	  NVIC_EnableIRQ(SysTick_IRQn);
+
+		// SysTick is 24-bit and counts down (not up)
+
+	  // Stop the SysTick (just in case it is already running).
+	  // This clears the ENABLE bit in the SysTick Control and Status Register (SYST_CSR).
+	  // In Ambiq naming convention: the control register is SysTick->CTRL
+	  am_hal_systick_stop();
+
+	  // Call SysTick_Config
+	  // This is defined in core_cm4.h
+	  // It loads the specified LOAD value into the SysTick Reload Value Register (SYST_RVR)
+	  // In Ambiq naming convention: the reload register is SysTick->LOAD
+	  // It sets the SysTick interrupt priority
+	  // It clears the SysTick Current Value Register (SYST_CVR)
+	  // In Ambiq naming convention: the current value register is SysTick->VAL
+	  // Finally it sets these bits in the SysTick Control and Status Register (SYST_CSR):
+	  // CLKSOURCE: SysTick uses the processor clock
+	  // TICKINT: When the count reaches zero, the SysTick exception (interrupt) is changed to pending
+	  // ENABLE: Enables the counter
+	  // SysTick_Config returns 0 if successful. 1 indicates a failure (the LOAD value was invalid).
+	  SysTick_Config(0xFFFFFFUL); // The LOAD value needs to be 24-bit
 	}
 
-  // extern "C" void am_systick_isr(void)
-  // {
-  //   am_hal_systick_reset();
-  // }
+	virtual uint16_t getMaxRefreshRate() const { return 400; } // This can probably be increased?
 
-	virtual uint16_t getMaxRefreshRate() const { return 400; }
+	static void SysTick_Handler(void) {
+		// We don't actually need to do anything in the ISR. There just needs to be one!
+	}
 
 protected:
 
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    //mWait.wait();
+    mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      //sei(); delayMicroseconds(WAIT_TIME); cli();
-      //showRGBInternal(pixels);
+      sei(); delayMicroseconds(WAIT_TIME); cli();
+      showRGBInternal(pixels);
     }
-    //mWait.mark();
+    mWait.mark();
   }
 
+	// SysTick counts down not up and is 24-bit, so let's ex-or it so it appears to count up
+	#define am_hal_systick_count_inverted() (am_hal_systick_count() ^ 0xFFFFFF)
+
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint8_t & b)  {
-		for(register uint32_t i = BITS-1; i > 0; i--) {
-      while(am_hal_systick_count() < next_mark);
-      next_mark = am_hal_systick_count() + 10;//(T1+T2+T3);
-      //if (next_mark > SYSTICK_MAX_TICKS) { next_mark = next_mark - SYSTICK_MAX_TICKS; }
+		// SysTick counts down (not up) and is 24-bit
+		for(register uint32_t i = BITS-1; i > 0; i--) { // We could speed this up by using Bit Banding
+      while(am_hal_systick_count_inverted() < next_mark);
+      next_mark = (am_hal_systick_count_inverted() + T1+T2+T3) & 0xFFFFFF;
+			// (This will glitch when next_mark would normally exceed 0xFFFFFF)
       FastPin<DATA_PIN>::hi();
 			if(b&0x80) {
-        while((next_mark - am_hal_systick_count()) > 5);//(T3+(200*(F_CPU/24000000))));
+        while((next_mark - am_hal_systick_count_inverted()) > (T3));//+(2*(F_CPU/24000000))));
         FastPin<DATA_PIN>::lo();
 			} else {
-        while((next_mark - am_hal_systick_count()) > 7);//(T2+T3+(200*(F_CPU/24000000))));
+        while((next_mark - am_hal_systick_count_inverted()) > (T2+T3));//+(2*(F_CPU/24000000))));
         FastPin<DATA_PIN>::lo();
 			}
 			b <<= 1;
 		}
 
-    while(am_hal_systick_count() < next_mark);
-    next_mark = am_hal_systick_count() + 10;//(T1+T2+T3);
-    //if (next_mark > SYSTICK_MAX_TICKS) { next_mark = next_mark - SYSTICK_MAX_TICKS; }
+    while(am_hal_systick_count_inverted() < next_mark);
+    next_mark = (am_hal_systick_count_inverted() + T1+T2+T3) & 0xFFFFFF;
+		// (This will glitch when next_mark would normally exceed 0xFFFFFF)
     FastPin<DATA_PIN>::hi();
     if(b&0x80) {
-      while((next_mark - am_hal_systick_count()) > 5);//(T3+(200*(F_CPU/24000000))));
+      while((next_mark - am_hal_systick_count_inverted()) > (T3));//+(2*(F_CPU/24000000))));
       FastPin<DATA_PIN>::lo();
     } else {
-      while((next_mark - am_hal_systick_count()) > 7);//(T2+T3+(200*(F_CPU/24000000))));
+      while((next_mark - am_hal_systick_count_inverted()) > (T2+T3));//+(2*(F_CPU/24000000))));
       FastPin<DATA_PIN>::lo();
     }
 	}
 
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
 	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
     FastPin<DATA_PIN>::lo();
 
@@ -86,18 +120,23 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		pixels.preStepFirstByteDithering();
 		register uint8_t b = pixels.loadAndScale0();
 
+		// The SysTick ISR appears not be working so let's manually reload the SysTick VAL
+		//am_hal_systick_load(0xFFFFFF);
+
 		cli();
-    register uint32_t next_mark = am_hal_systick_count() + 10;//(T1+T2+T3);
-    if (next_mark > SYSTICK_MAX_TICKS) { next_mark = next_mark - SYSTICK_MAX_TICKS; }
+		// Calculate next_mark (the time of the next DATA_PIN transition)
+		// SysTick counts down (not up) and is 24-bit so let's use the inverted version and mask it to 24 bits
+		// (This will glitch when next_mark would normally exceed 0xFFFFFF)
+    register uint32_t next_mark = (am_hal_systick_count_inverted() + T1+T2+T3) & 0xFFFFFF;
 
 		while(pixels.has(1)) {
 			pixels.stepDithering();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			cli();
-      // if interrupts took longer than 45µs, punt on the current frame
-			if(am_hal_systick_count() > next_mark) {
-				if((am_hal_systick_count() - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+			if(am_hal_systick_count_inverted() > next_mark) { // Have we already missed the next_mark?
+				// If we have exceeded next_mark by an excessive amount, then bail (return 0)
+				if((am_hal_systick_count_inverted() - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
 			}
 			#endif
 
@@ -113,13 +152,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.advanceAndLoadAndScale0();
 
+			// Write the extra white bits if the RGBW strip needs them
+			#ifdef DO_RGBW
+			register uint8_t white_level = WHITE_LEVEL;
+			writeBits<8+XTRA0>(next_mark, white_level);
+			#endif
+
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			sei();
 			#endif
 		};
 
 		sei();
-		return (am_hal_systick_count());
+		return (am_hal_systick_count_inverted());
 	}
 
 };

From 9f3f693e47fa7134f5d2449e34a0a3216e400bfa Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Thu, 9 Apr 2020 17:09:11 +0100
Subject: [PATCH 142/204] Clockless is working nicely!

---
 platforms.cpp                           | 16 ------
 platforms/apollo3/clockless_apollo3.h   | 75 ++++++++++---------------
 platforms/apollo3/led_sysdefs_apollo3.h |  4 +-
 3 files changed, 32 insertions(+), 63 deletions(-)

diff --git a/platforms.cpp b/platforms.cpp
index 29b0c4b260..511cdf7913 100644
--- a/platforms.cpp
+++ b/platforms.cpp
@@ -34,21 +34,5 @@
 
 #endif // defined(NRF52_SERIES)
 
-// ISR for the APOLLO3 SysTick
-#if defined(FASTLED_APOLLO3)
-
-  // SysTick Interrupt Service Routine
-  #ifdef __cplusplus
-    extern "C" {
-  #endif
-    void SysTick_Handler(void) {
-      // We don't actually need to do anything in the ISR. There just needs to be one!
-    }
-  #ifdef __cplusplus
-    }
-  #endif
-
-#endif // defined(FASTLED_APOLLO3)
-
 // FASTLED_NAMESPACE_BEGIN
 // FASTLED_NAMESPACE_END
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index d07cf9e027..b0b86c3154 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -7,13 +7,6 @@ FASTLED_NAMESPACE_BEGIN
 
 #define FASTLED_HAS_CLOCKLESS 1
 
-#define DO_RGBW // Uncomment this line to enable support for (e.g.) SK6812RGBW that need extra white bits
-
-#ifdef DO_RGBW
-// Set all the white LEDs to this level. (What were you expecting for free!? ;-)
-#define WHITE_LEVEL 1
-#endif
-
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
@@ -63,10 +56,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 	virtual uint16_t getMaxRefreshRate() const { return 400; } // This can probably be increased?
 
-	static void SysTick_Handler(void) {
-		// We don't actually need to do anything in the ISR. There just needs to be one!
-	}
-
 protected:
 
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
@@ -78,35 +67,34 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
     mWait.mark();
   }
 
-	// SysTick counts down not up and is 24-bit, so let's ex-or it so it appears to count up
-	#define am_hal_systick_count_inverted() (am_hal_systick_count() ^ 0xFFFFFF)
-
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint8_t & b)  {
 		// SysTick counts down (not up) and is 24-bit
 		for(register uint32_t i = BITS-1; i > 0; i--) { // We could speed this up by using Bit Banding
-      while(am_hal_systick_count_inverted() < next_mark);
-      next_mark = (am_hal_systick_count_inverted() + T1+T2+T3) & 0xFFFFFF;
-			// (This will glitch when next_mark would normally exceed 0xFFFFFF)
-      FastPin<DATA_PIN>::hi();
+      while(am_hal_systick_count() > next_mark) { ; } // Wait for the remainder of this cycle to complete
+			// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
+			// SysTick counts down (not up) and is 24-bit
+			next_mark = (am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+			FastPin<DATA_PIN>::hi();
 			if(b&0x80) {
-        while((next_mark - am_hal_systick_count_inverted()) > (T3));//+(2*(F_CPU/24000000))));
+        while((am_hal_systick_count() - next_mark) > (T3)) { ; }
         FastPin<DATA_PIN>::lo();
 			} else {
-        while((next_mark - am_hal_systick_count_inverted()) > (T2+T3));//+(2*(F_CPU/24000000))));
+        while((am_hal_systick_count() - next_mark) > (T2+T3)) { ; }
         FastPin<DATA_PIN>::lo();
 			}
 			b <<= 1;
 		}
 
-    while(am_hal_systick_count_inverted() < next_mark);
-    next_mark = (am_hal_systick_count_inverted() + T1+T2+T3) & 0xFFFFFF;
-		// (This will glitch when next_mark would normally exceed 0xFFFFFF)
-    FastPin<DATA_PIN>::hi();
+    while(am_hal_systick_count() > next_mark) { ; }// Wait for the remainder of this cycle to complete
+		// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
+		// SysTick counts down (not up) and is 24-bit
+		next_mark = (am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+		FastPin<DATA_PIN>::hi();
     if(b&0x80) {
-      while((next_mark - am_hal_systick_count_inverted()) > (T3));//+(2*(F_CPU/24000000))));
+      while((am_hal_systick_count() - next_mark) > (T3)) { ; }
       FastPin<DATA_PIN>::lo();
     } else {
-      while((next_mark - am_hal_systick_count_inverted()) > (T2+T3));//+(2*(F_CPU/24000000))));
+      while((am_hal_systick_count() - next_mark) > (T2+T3)) { ; }
       FastPin<DATA_PIN>::lo();
     }
 	}
@@ -114,29 +102,28 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
 	// gcc will use register Y for the this pointer.
 	static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-    FastPin<DATA_PIN>::lo();
 
 		// Setup the pixel controller and load/scale the first byte
 		pixels.preStepFirstByteDithering();
 		register uint8_t b = pixels.loadAndScale0();
 
-		// The SysTick ISR appears not be working so let's manually reload the SysTick VAL
-		//am_hal_systick_load(0xFFFFFF);
-
 		cli();
-		// Calculate next_mark (the time of the next DATA_PIN transition)
-		// SysTick counts down (not up) and is 24-bit so let's use the inverted version and mask it to 24 bits
-		// (This will glitch when next_mark would normally exceed 0xFFFFFF)
-    register uint32_t next_mark = (am_hal_systick_count_inverted() + T1+T2+T3) & 0xFFFFFF;
 
-		while(pixels.has(1)) {
+		// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
+		// SysTick counts down (not up) and is 24-bit
+		// The subtraction could underflow (wrap round) so let's mask the result to 24 bits
+		register uint32_t next_mark = (am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+
+		while(pixels.has(1)) { // Keep going for as long as we have pixels
 			pixels.stepDithering();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			cli();
-			if(am_hal_systick_count_inverted() > next_mark) { // Have we already missed the next_mark?
+
+			// Have we already missed the next_mark?
+			if(am_hal_systick_count() < next_mark) {
 				// If we have exceeded next_mark by an excessive amount, then bail (return 0)
-				if((am_hal_systick_count_inverted() - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+				if((next_mark - am_hal_systick_count()) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
 			}
 			#endif
 
@@ -152,19 +139,17 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 			writeBits<8+XTRA0>(next_mark, b);
 			b = pixels.advanceAndLoadAndScale0();
 
-			// Write the extra white bits if the RGBW strip needs them
-			#ifdef DO_RGBW
-			register uint8_t white_level = WHITE_LEVEL;
-			writeBits<8+XTRA0>(next_mark, white_level);
-			#endif
-
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			sei();
 			#endif
-		};
+		}; // end of while(pixels.has(1))
+
+		// Unfortunately SysTick relies on interrupts to reload it once it reaches zero
+		// So we had better reload it here instead...
+		am_hal_systick_load(0xFFFFFFUL);
 
 		sei();
-		return (am_hal_systick_count_inverted());
+		return (1);
 	}
 
 };
diff --git a/platforms/apollo3/led_sysdefs_apollo3.h b/platforms/apollo3/led_sysdefs_apollo3.h
index d7eeb57ad5..be74e24de8 100644
--- a/platforms/apollo3/led_sysdefs_apollo3.h
+++ b/platforms/apollo3/led_sysdefs_apollo3.h
@@ -26,8 +26,8 @@
 #endif
 
 // data type defs
-typedef volatile       uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
-typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
+typedef volatile uint8_t RoReg; /**< Read only 8-bit register (volatile const unsigned int) */
+typedef volatile uint8_t RwReg; /**< Read-Write 8-bit register (volatile unsigned int) */
 
 #define FASTLED_NO_PINMAP
 

From 68b9bfbb23d3a3343eb1446cc5bbebfe075a50f4 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Fri, 10 Apr 2020 13:17:41 +0100
Subject: [PATCH 143/204] Better SPI functionality - but pauses between
 bytes...

---
 platforms/apollo3/clockless_apollo3.h | 55 ++++++++++++++++++--------
 platforms/apollo3/fastpin_apollo3.h   | 53 +++++++++++++++++--------
 platforms/apollo3/fastspi_apollo3.h   | 56 +++++++++++++++++++--------
 3 files changed, 114 insertions(+), 50 deletions(-)

diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index b0b86c3154..4f9aaac7d9 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -5,6 +5,22 @@ FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
+//*****************************************************************************
+//
+// Code taken from Ambiq Micro's am_hal_systick.c
+// and converted to inline static for speed
+//
+//! @brief Get the current count value in the SYSTICK.
+//!
+//! This function gets the current count value in the systick timer.
+//!
+//! @return Current count value.
+//
+//*****************************************************************************
+__attribute__ ((always_inline)) inline static uint32_t __am_hal_systick_count() {
+	return SysTick->VAL;
+}
+
 #define FASTLED_HAS_CLOCKLESS 1
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
@@ -27,25 +43,25 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	  am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
 
 	  // Make sure interrupts are enabled
-	  am_hal_interrupt_master_enable();
+	  //am_hal_interrupt_master_enable();
 
 	  // Enable SysTick Interrupts in the NVIC
-	  NVIC_EnableIRQ(SysTick_IRQn);
+	  //NVIC_EnableIRQ(SysTick_IRQn);
 
 		// SysTick is 24-bit and counts down (not up)
 
 	  // Stop the SysTick (just in case it is already running).
 	  // This clears the ENABLE bit in the SysTick Control and Status Register (SYST_CSR).
-	  // In Ambiq naming convention: the control register is SysTick->CTRL
+	  // In Ambiq naming convention, the control register is SysTick->CTRL
 	  am_hal_systick_stop();
 
 	  // Call SysTick_Config
 	  // This is defined in core_cm4.h
 	  // It loads the specified LOAD value into the SysTick Reload Value Register (SYST_RVR)
-	  // In Ambiq naming convention: the reload register is SysTick->LOAD
+	  // In Ambiq naming convention, the reload register is SysTick->LOAD
 	  // It sets the SysTick interrupt priority
 	  // It clears the SysTick Current Value Register (SYST_CVR)
-	  // In Ambiq naming convention: the current value register is SysTick->VAL
+	  // In Ambiq naming convention, the current value register is SysTick->VAL
 	  // Finally it sets these bits in the SysTick Control and Status Register (SYST_CSR):
 	  // CLKSOURCE: SysTick uses the processor clock
 	  // TICKINT: When the count reaches zero, the SysTick exception (interrupt) is changed to pending
@@ -70,31 +86,35 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint8_t & b)  {
 		// SysTick counts down (not up) and is 24-bit
 		for(register uint32_t i = BITS-1; i > 0; i--) { // We could speed this up by using Bit Banding
-      while(am_hal_systick_count() > next_mark) { ; } // Wait for the remainder of this cycle to complete
+      while(__am_hal_systick_count() > next_mark) { ; } // Wait for the remainder of this cycle to complete
 			// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
 			// SysTick counts down (not up) and is 24-bit
-			next_mark = (am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+			next_mark = (__am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
 			FastPin<DATA_PIN>::hi();
 			if(b&0x80) {
-        while((am_hal_systick_count() - next_mark) > (T3)) { ; }
+				// "1 code" = longer pulse width
+	      while((__am_hal_systick_count() - next_mark) > (T3+(3*(F_CPU/24000000)))) { ; }
         FastPin<DATA_PIN>::lo();
 			} else {
-        while((am_hal_systick_count() - next_mark) > (T2+T3)) { ; }
+				// "0 code" = shorter pulse width
+	      while((__am_hal_systick_count() - next_mark) > (T2+T3+(4*(F_CPU/24000000)))) { ; }
         FastPin<DATA_PIN>::lo();
 			}
 			b <<= 1;
 		}
 
-    while(am_hal_systick_count() > next_mark) { ; }// Wait for the remainder of this cycle to complete
+    while(__am_hal_systick_count() > next_mark) { ; }// Wait for the remainder of this cycle to complete
 		// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
 		// SysTick counts down (not up) and is 24-bit
-		next_mark = (am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+		next_mark = (__am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
 		FastPin<DATA_PIN>::hi();
     if(b&0x80) {
-      while((am_hal_systick_count() - next_mark) > (T3)) { ; }
+			// "1 code" = longer pulse width
+      while((__am_hal_systick_count() - next_mark) > (T3+(2*(F_CPU/24000000)))) { ; }
       FastPin<DATA_PIN>::lo();
     } else {
-      while((am_hal_systick_count() - next_mark) > (T2+T3)) { ; }
+			// "0 code" = shorter pulse width
+      while((__am_hal_systick_count() - next_mark) > (T2+T3+(4*(F_CPU/24000000)))) { ; }
       FastPin<DATA_PIN>::lo();
     }
 	}
@@ -112,7 +132,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
 		// SysTick counts down (not up) and is 24-bit
 		// The subtraction could underflow (wrap round) so let's mask the result to 24 bits
-		register uint32_t next_mark = (am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+		register uint32_t next_mark = (__am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
 
 		while(pixels.has(1)) { // Keep going for as long as we have pixels
 			pixels.stepDithering();
@@ -121,9 +141,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 			cli();
 
 			// Have we already missed the next_mark?
-			if(am_hal_systick_count() < next_mark) {
+			if(__am_hal_systick_count() < next_mark) {
 				// If we have exceeded next_mark by an excessive amount, then bail (return 0)
-				if((next_mark - am_hal_systick_count()) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+				if((next_mark - __am_hal_systick_count()) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
 			}
 			#endif
 
@@ -144,7 +164,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 			#endif
 		}; // end of while(pixels.has(1))
 
-		// Unfortunately SysTick relies on interrupts to reload it once it reaches zero
+		// Unfortunately SysTick relies on an interrupt to reload it once it reaches zero
+		// and having interrupts disabled for most of the above means the interrupt doesn't get serviced.
 		// So we had better reload it here instead...
 		am_hal_systick_load(0xFFFFFFUL);
 
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index a30ef0c598..30f8916e4f 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -82,10 +82,12 @@ _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
 _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
 _FL_DEFPIN(30); _FL_DEFPIN(31);
-//These two lines are commented out as dedicates SPI support using fastShiftOut produces
-//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
-//#define SPI_DATA MOSI
-//#define SPI_CLOCK SCK
+
+//The Artemis RedBoard has 4 SPI ports defined by default
+//TO DO: implement multiple SPI functionality
+//#define AP3_FASTLED_SPI_IOM 0
+#define SPI_DATA MOSI
+#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
@@ -98,10 +100,9 @@ _FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
 _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23);
 
-//These two lines are commented out as dedicates SPI support using fastShiftOut produces
-//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
-//#define SPI_DATA MOSI
-//#define SPI_CLOCK SCK
+//#define AP3_FASTLED_SPI_IOM 0
+#define SPI_DATA MOSI
+#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
@@ -115,14 +116,33 @@ _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
 _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28);
 
-//These two lines are commented out as dedicates SPI support using fastShiftOut produces
-//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
-//#define SPI_DATA MOSI
-//#define SPI_CLOCK SCK
+//#define AP3_FASTLED_SPI_IOM 0
+#define SPI_DATA MOSI
+#define SPI_CLOCK SCK
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_ATP)
+
+#define MAX_PIN 50 // AP3_VARIANT_NUM_PINS
+_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
+_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
+_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
+_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
+_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
+_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
+_FL_DEFPIN(31); _FL_DEFPIN(32); _FL_DEFPIN(33); _FL_DEFPIN(34);
+_FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
+_FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
+_FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
+
+//#define AP3_FASTLED_SPI_IOM 0
+#define SPI_DATA MOSI
+#define SPI_CLOCK SCK
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
-#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_ATP) || defined(ARDUINO_SFE_ARTEMIS)
+#elif defined(ARDUINO_SFE_ARTEMIS)
 
 #define MAX_PIN 50 // AP3_VARIANT_NUM_PINS
 // Pin definitions taken from (e.g.) C:\Users\...\AppData\Local\Arduino15\packages\SparkFun\hardware\apollo3\1.0.30\variants\redboard_artemis_atp\config\variant.cpp
@@ -138,10 +158,9 @@ _FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
 _FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
 _FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
 
-//These two lines are commented out as dedicates SPI support using fastShiftOut produces
-//glitchy results that is slower than bit banging. TO DO: implement 'proper' SPI functionality
-//#define SPI_DATA MOSI
-//#define SPI_CLOCK SCK
+//The Artemis module has all six SPI ports defined by default _but_
+//does not assign pins for them.
+//TO DO: implement multiple SPI functionality with correct pin mapping
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
index a17a165d2e..0a9b061637 100644
--- a/platforms/apollo3/fastspi_apollo3.h
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -5,7 +5,11 @@
 
 FASTLED_NAMESPACE_BEGIN
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t SPI_CLOCK_DIVIDER>
+#if defined(FASTLED_APOLLO3)
+
+#include <SPI.h>
+
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t SPI_CLOCK_SPEED>
 class APOLLO3HardwareSPIOutput {
 	Selectable *m_pSelect;
 
@@ -19,21 +23,34 @@ class APOLLO3HardwareSPIOutput {
 	// initialize the SPI subssytem
 	void init() {
 		//enableBurstMode(); //Optional. Go to 96MHz. Roughly doubles the speed of shiftOut and fastShiftOut
-		enableFastShift(_DATA_PIN, _CLOCK_PIN);
+		pinMode(_DATA_PIN, OUTPUT);
+		pinMode(_CLOCK_PIN, OUTPUT);
+		am_hal_gpio_fastgpio_enable(_DATA_PIN);
+		am_hal_gpio_fastgpio_enable(_CLOCK_PIN);
+		SPI.begin();
 	}
 
 	// latch the CS select
-	void inline select() { /* TODO */ }
+	void inline select() __attribute__((always_inline)) {
+		// Begin the SPI transaction
+		// We want CPOL/CKP to be 0 and CPHA to be 0 so we need SPI Mode 0
+		SPI.beginTransaction(SPISettings((F_CPU/SPI_CLOCK_SPEED), MSBFIRST, AM_HAL_IOM_SPI_MODE_0));
+		if(m_pSelect != NULL) { m_pSelect->select(); }
+	}
 
 	// release the CS select
-	void inline release() { /* TODO */ }
+	void inline release() {
+		if(m_pSelect != NULL) { m_pSelect->release(); }
+		SPI.endTransaction();
+	}
 
 	// wait until all queued up data has been written
 	static void waitFully() { /* TODO */ }
 
 	// write a byte out via SPI (returns immediately on writing register)
 	static void writeByte(uint8_t b) {
-		fastShiftOut(_DATA_PIN, _CLOCK_PIN, MSBFIRST, b);
+		//fastShiftOut(_DATA_PIN, _CLOCK_PIN, MSBFIRST, b);
+		SPI.transferOut(&b,1);
 	}
 
 	// write a word out via SPI (returns immediately on writing register)
@@ -49,22 +66,22 @@ class APOLLO3HardwareSPIOutput {
 
 	// A full cycle of writing a value for len bytes, including select, release, and waiting
 	void writeBytesValue(uint8_t value, int len) {
-		//select();
+		select();
 		writeBytesValueRaw(value, len);
-		//release();
+		release();
 	}
 
 	// A full cycle of writing a value for len bytes, including select, release, and waiting
 	template <class D> void writeBytes(register uint8_t *data, int len) {
 		uint8_t *end = data + len;
-		//select();
+		select();
 		// could be optimized to write 16bit words out instead of 8bit bytes
 		while(data != end) {
 			writeByte(D::adjust(*data++));
 		}
 		D::postBlock(len);
-		//waitFully();
-		//release();
+		waitFully();
+		release();
 	}
 
 	// A full cycle of writing a value for len bytes, including select, release, and waiting
@@ -74,19 +91,24 @@ class APOLLO3HardwareSPIOutput {
 	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
 		//waitFully();
 		if(b & (1 << BIT)) {
-			FastPin<_DATA_PIN>::hi();
+			//digitalWrite(_DATA_PIN, HIGH); //FastPin<_DATA_PIN>::hi();
+			am_hal_gpio_fastgpio_set(_DATA_PIN);
 		} else {
-			FastPin<_DATA_PIN>::lo();
+			//digitalWrite(_DATA_PIN, LOW); //FastPin<_DATA_PIN>::lo();
+			am_hal_gpio_fastgpio_clr(_DATA_PIN);
 		}
 
-		FastPin<_CLOCK_PIN>::hi();
-		FastPin<_CLOCK_PIN>::lo();
+		//digitalWrite(_CLOCK_PIN, HIGH);
+		//digitalWrite(_CLOCK_PIN, LOW);
+		am_hal_gpio_fastgpio_set(_CLOCK_PIN);
+		__NOP();
+		am_hal_gpio_fastgpio_clr(_CLOCK_PIN);
 	}
 
 	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
 	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
 	template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
-		//select();
+		select();
 
 		int len = pixels.mLen;
 
@@ -108,11 +130,13 @@ class APOLLO3HardwareSPIOutput {
 		}
 		D::postBlock(len);
 		//waitFully();
-		//release();
+		release();
 	}
 
 };
 
+#endif
+
 FASTLED_NAMESPACE_END
 
 #endif

From 83380112db861b996f6ae76494526c7ffdbc3d48 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Fri, 10 Apr 2020 18:27:47 +0100
Subject: [PATCH 144/204] Reverted to fastgpio for the SPI functions

---
 fastspi.h                             | 12 +++---
 platforms/apollo3/clockless_apollo3.h |  2 +-
 platforms/apollo3/fastpin_apollo3.h   | 42 +--------------------
 platforms/apollo3/fastspi_apollo3.h   | 54 ++++++++++++---------------
 4 files changed, 31 insertions(+), 79 deletions(-)

diff --git a/fastspi.h b/fastspi.h
index 2bf5d6d0ff..2245ffe924 100644
--- a/fastspi.h
+++ b/fastspi.h
@@ -49,14 +49,14 @@ template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class SPIOutput : public NRF52SPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
 #endif
 
-#if defined(SPI_DATA) && defined(SPI_CLOCK)
-
-#if defined(FASTLED_APOLLO3)
+#if defined(FASTLED_APOLLO3) && defined(FASTLED_ALL_PINS_HARDWARE_SPI)
+template<uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
+class SPIOutput : public APOLLO3HardwareSPIOutput<_DATA_PIN, _CLOCK_PIN, _SPI_CLOCK_DIVIDER> {};
+#endif
 
-template<uint32_t SPI_SPEED>
-class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public APOLLO3HardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> {};
+#if defined(SPI_DATA) && defined(SPI_CLOCK)
 
-#elif defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
+#if defined(FASTLED_TEENSY3) && defined(ARM_HARDWARE_SPI)
 
 template<uint32_t SPI_SPEED>
 class SPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED> : public ARMHardwareSPIOutput<SPI_DATA, SPI_CLOCK, SPI_SPEED, 0x4002C000> {};
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 4f9aaac7d9..34dd06c78f 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -70,7 +70,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	  SysTick_Config(0xFFFFFFUL); // The LOAD value needs to be 24-bit
 	}
 
-	virtual uint16_t getMaxRefreshRate() const { return 400; } // This can probably be increased?
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
 
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index 30f8916e4f..ddf1b6f9dc 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -83,12 +83,6 @@ _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
 _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
 _FL_DEFPIN(30); _FL_DEFPIN(31);
 
-//The Artemis RedBoard has 4 SPI ports defined by default
-//TO DO: implement multiple SPI functionality
-//#define AP3_FASTLED_SPI_IOM 0
-#define SPI_DATA MOSI
-#define SPI_CLOCK SCK
-
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_NANO)
@@ -100,10 +94,6 @@ _FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
 _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23);
 
-//#define AP3_FASTLED_SPI_IOM 0
-#define SPI_DATA MOSI
-#define SPI_CLOCK SCK
-
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AM_AP3_SFE_THING_PLUS)
@@ -116,37 +106,11 @@ _FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
 _FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
 _FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28);
 
-//#define AP3_FASTLED_SPI_IOM 0
-#define SPI_DATA MOSI
-#define SPI_CLOCK SCK
-
-#define HAS_HARDWARE_PIN_SUPPORT 1
-
-#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_ATP)
-
-#define MAX_PIN 50 // AP3_VARIANT_NUM_PINS
-_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
-_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
-_FL_DEFPIN(31); _FL_DEFPIN(32); _FL_DEFPIN(33); _FL_DEFPIN(34);
-_FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
-_FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
-_FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
-
-//#define AP3_FASTLED_SPI_IOM 0
-#define SPI_DATA MOSI
-#define SPI_CLOCK SCK
-
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
-#elif defined(ARDUINO_SFE_ARTEMIS)
+#elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_ATP) || defined(ARDUINO_SFE_ARTEMIS)
 
 #define MAX_PIN 50 // AP3_VARIANT_NUM_PINS
-// Pin definitions taken from (e.g.) C:\Users\...\AppData\Local\Arduino15\packages\SparkFun\hardware\apollo3\1.0.30\variants\redboard_artemis_atp\config\variant.cpp
-// ap3_variant_pinmap maps pins to pads. FastLED expects pin numbers so we'll use those.
 _FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
 _FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
 _FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
@@ -158,10 +122,6 @@ _FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
 _FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
 _FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
 
-//The Artemis module has all six SPI ports defined by default _but_
-//does not assign pins for them.
-//TO DO: implement multiple SPI functionality with correct pin mapping
-
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #else
diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
index 0a9b061637..9f7af38bec 100644
--- a/platforms/apollo3/fastspi_apollo3.h
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -1,15 +1,19 @@
 #ifndef __INC_FASTSPI_APOLLO3_H
 #define __INC_FASTSPI_APOLLO3_H
 
+// This is the implementation of fastspi for the Apollo3.
+// It uses fastgpio instead of actual SPI, which means you can use it on all pins.
+// It can run slightly faster than the default fastpin (bit banging).
+
 #include "FastLED.h"
 
 FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
-#include <SPI.h>
+#define FASTLED_ALL_PINS_HARDWARE_SPI
 
-template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t SPI_CLOCK_SPEED>
+template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class APOLLO3HardwareSPIOutput {
 	Selectable *m_pSelect;
 
@@ -20,37 +24,33 @@ class APOLLO3HardwareSPIOutput {
 	// set the object representing the selectable
 	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
 
-	// initialize the SPI subssytem
+	// initialize the pins for fastgpio
 	void init() {
-		//enableBurstMode(); //Optional. Go to 96MHz. Roughly doubles the speed of shiftOut and fastShiftOut
 		pinMode(_DATA_PIN, OUTPUT);
 		pinMode(_CLOCK_PIN, OUTPUT);
 		am_hal_gpio_fastgpio_enable(_DATA_PIN);
 		am_hal_gpio_fastgpio_enable(_CLOCK_PIN);
-		SPI.begin();
 	}
 
 	// latch the CS select
-	void inline select() __attribute__((always_inline)) {
-		// Begin the SPI transaction
-		// We want CPOL/CKP to be 0 and CPHA to be 0 so we need SPI Mode 0
-		SPI.beginTransaction(SPISettings((F_CPU/SPI_CLOCK_SPEED), MSBFIRST, AM_HAL_IOM_SPI_MODE_0));
-		if(m_pSelect != NULL) { m_pSelect->select(); }
-	}
+	void inline select() { /* TODO */ }
 
 	// release the CS select
-	void inline release() {
-		if(m_pSelect != NULL) { m_pSelect->release(); }
-		SPI.endTransaction();
-	}
+	void inline release() { /* TODO */ }
 
 	// wait until all queued up data has been written
 	static void waitFully() { /* TODO */ }
 
-	// write a byte out via SPI (returns immediately on writing register)
+	// write a byte as bits
 	static void writeByte(uint8_t b) {
-		//fastShiftOut(_DATA_PIN, _CLOCK_PIN, MSBFIRST, b);
-		SPI.transferOut(&b,1);
+		writeBit<7>(b);
+		writeBit<6>(b);
+		writeBit<5>(b);
+		writeBit<4>(b);
+		writeBit<3>(b);
+		writeBit<2>(b);
+		writeBit<1>(b);
+		writeBit<0>(b);
 	}
 
 	// write a word out via SPI (returns immediately on writing register)
@@ -91,18 +91,15 @@ class APOLLO3HardwareSPIOutput {
 	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
 		//waitFully();
 		if(b & (1 << BIT)) {
-			//digitalWrite(_DATA_PIN, HIGH); //FastPin<_DATA_PIN>::hi();
 			am_hal_gpio_fastgpio_set(_DATA_PIN);
 		} else {
-			//digitalWrite(_DATA_PIN, LOW); //FastPin<_DATA_PIN>::lo();
 			am_hal_gpio_fastgpio_clr(_DATA_PIN);
 		}
 
-		//digitalWrite(_CLOCK_PIN, HIGH);
-		//digitalWrite(_CLOCK_PIN, LOW);
 		am_hal_gpio_fastgpio_set(_CLOCK_PIN);
-		__NOP();
+		for (int16_t d = (_SPI_CLOCK_DIVIDER >> 1); d > 0; d--) { __NOP(); }
 		am_hal_gpio_fastgpio_clr(_CLOCK_PIN);
+		for (int16_t d = ((_SPI_CLOCK_DIVIDER >> 1) - 1); d > 0; d--) { __NOP(); }
 	}
 
 	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
@@ -112,18 +109,13 @@ class APOLLO3HardwareSPIOutput {
 
 		int len = pixels.mLen;
 
-		//select();
 		while(pixels.has(1)) {
 			if(FLAGS & FLAG_START_BIT) {
 				writeBit<0>(1);
-				writeByte(D::adjust(pixels.loadAndScale0()));
-				writeByte(D::adjust(pixels.loadAndScale1()));
-				writeByte(D::adjust(pixels.loadAndScale2()));
-			} else {
-				writeByte(D::adjust(pixels.loadAndScale0()));
-				writeByte(D::adjust(pixels.loadAndScale1()));
-				writeByte(D::adjust(pixels.loadAndScale2()));
 			}
+			writeByte(D::adjust(pixels.loadAndScale0()));
+			writeByte(D::adjust(pixels.loadAndScale1()));
+			writeByte(D::adjust(pixels.loadAndScale2()));
 
 			pixels.advanceData();
 			pixels.stepDithering();

From b48e7030993b8deb08b3d52db0e25da6bcfcf621 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Sat, 11 Apr 2020 09:11:30 +0100
Subject: [PATCH 145/204] Reverted accidental whitespace changes

---
 platforms.cpp                         | 4 +++-
 platforms/apollo3/clockless_apollo3.h | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/platforms.cpp b/platforms.cpp
index 511cdf7913..47a0088314 100644
--- a/platforms.cpp
+++ b/platforms.cpp
@@ -15,7 +15,7 @@
     #ifdef __cplusplus
         extern "C" {
     #endif
-            // NOTE: Update platforms.cpp in root of FastLED library if this changes
+            // NOTE: Update platforms.cpp in root of FastLED library if this changes        
             #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
                 void PWM0_IRQHandler(void) { isrCount++; PWM_Arbiter<0>::isr_handler(); }
             #endif
@@ -34,5 +34,7 @@
 
 #endif // defined(NRF52_SERIES)
 
+
+
 // FASTLED_NAMESPACE_BEGIN
 // FASTLED_NAMESPACE_END
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 34dd06c78f..1a5299b893 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -5,6 +5,9 @@ FASTLED_NAMESPACE_BEGIN
 
 #if defined(FASTLED_APOLLO3)
 
+// Clockless support for the SparkFun Artemis / Ambiq Micro Apollo3 Blue
+// Uses SysTick to govern the pulse timing
+
 //*****************************************************************************
 //
 // Code taken from Ambiq Micro's am_hal_systick.c
@@ -33,7 +36,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 public:
 	virtual void init() {
 		// Initialize everything
-		// This is _very_ SparkFun Artemis / Ambiq Micro Apollo3 Blue specific!
 
 		// Configure DATA_PIN for FastGPIO (settings are in fastpin_apollo3.h)
 		FastPin<DATA_PIN>::setOutput();

From 1c40a85e9f974808e721c51a8193846a70cfd030 Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Sun, 12 Apr 2020 12:50:40 +0100
Subject: [PATCH 146/204] Fixed the _spi_clock_divider

---
 platforms/apollo3/fastspi_apollo3.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
index 9f7af38bec..1259ee8111 100644
--- a/platforms/apollo3/fastspi_apollo3.h
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -97,9 +97,9 @@ class APOLLO3HardwareSPIOutput {
 		}
 
 		am_hal_gpio_fastgpio_set(_CLOCK_PIN);
-		for (int16_t d = (_SPI_CLOCK_DIVIDER >> 1); d > 0; d--) { __NOP(); }
+		for (uint32_t d = (_SPI_CLOCK_DIVIDER >> 1); d > 0; d--) { __NOP(); }
 		am_hal_gpio_fastgpio_clr(_CLOCK_PIN);
-		for (int16_t d = ((_SPI_CLOCK_DIVIDER >> 1) - 1); d > 0; d--) { __NOP(); }
+		for (uint32_t d = (_SPI_CLOCK_DIVIDER >> 1); d > 0; d--) { __NOP(); }
 	}
 
 	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template

From b07e106a8d44a418bddc076ac79e31cad564f31a Mon Sep 17 00:00:00 2001
From: Paul <5690545+PaulZC@users.noreply.github.com>
Date: Mon, 13 Apr 2020 10:34:55 +0100
Subject: [PATCH 147/204] Corrected the pin,pad allocations

---
 platforms/apollo3/clockless_apollo3.h |   4 +-
 platforms/apollo3/fastpin_apollo3.h   | 128 +++++++++++++-------------
 platforms/apollo3/fastspi_apollo3.h   |  16 ++--
 3 files changed, 75 insertions(+), 73 deletions(-)

diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index 1a5299b893..d881eee4ae 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -26,7 +26,7 @@ __attribute__ ((always_inline)) inline static uint32_t __am_hal_systick_count()
 
 #define FASTLED_HAS_CLOCKLESS 1
 
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
+template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
 	typedef typename FastPin<DATA_PIN>::port_t data_t;
@@ -44,7 +44,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		// Make sure the system clock is running at the full 48MHz
 	  am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
 
-	  // Make sure interrupts are enabled
+		// Make sure interrupts are enabled
 	  //am_hal_interrupt_master_enable();
 
 	  // Enable SysTick Interrupts in the NVIC
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index ddf1b6f9dc..eb9e453744 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -10,22 +10,22 @@ FASTLED_NAMESPACE_BEGIN
 
 #else
 
-template<uint8_t PIN> class _APOLLO3PIN {
+template<uint8_t PIN, uint8_t PAD> class _APOLLO3PIN {
 
 public:
   typedef volatile uint32_t * port_ptr_t;
   typedef uint32_t port_t;
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); am_hal_gpio_fastgpio_enable(PIN); }
-  inline static void setInput() { am_hal_gpio_fastgpio_disable(PIN); pinMode(PIN, INPUT); }
+  inline static void setOutput() { pinMode(PIN, OUTPUT); am_hal_gpio_fastgpio_enable(PAD); }
+  inline static void setInput() { am_hal_gpio_fastgpio_disable(PAD); pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PIN); }
-  inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PIN); }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PIN); } else { am_hal_gpio_fastgpio_clr(PIN); } }
+  inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PAD); }
+  inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PAD); }
+  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PAD); } else { am_hal_gpio_fastgpio_clr(PAD); } }
 
   inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if( am_hal_gpio_fastgpio_read(PIN)) { lo(); } else { hi(); } }
+  inline static void toggle() __attribute__ ((always_inline)) { if( am_hal_gpio_fastgpio_read(PAD)) { lo(); } else { hi(); } }
 
   inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
   inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
@@ -37,90 +37,92 @@ template<uint8_t PIN> class _APOLLO3PIN {
   inline static port_t mask() __attribute__ ((always_inline)) { return 0; }
 };
 
-#define _FL_DEFPIN(PIN) template<> class FastPin<PIN> : public _APOLLO3PIN<PIN> {};
+// For the Apollo3 we need to define both the pin number and the associated pad
+// to avoid having to use ap3_gpio_pin2pad for fastgpio (which would slow things down)
+#define _FL_DEFPIN(PIN, PAD) template<> class FastPin<PIN> : public _APOLLO3PIN<PIN, PAD> {};
 
-// Actual pin definitions
+// Actual (pin, pad) definitions
 #if defined(ARDUINO_SFE_EDGE)
 
-#define MAX_PIN 50
-_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(3); _FL_DEFPIN(4);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(17);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
-_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
-_FL_DEFPIN(33);
-_FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
-_FL_DEFPIN(40); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
-_FL_DEFPIN(46); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
+#define MAX_PIN 49
+_FL_DEFPIN(0, 0); _FL_DEFPIN(1, 1); _FL_DEFPIN(3, 3); _FL_DEFPIN(4, 4);
+_FL_DEFPIN(5, 5); _FL_DEFPIN(6, 6); _FL_DEFPIN(7, 7); _FL_DEFPIN(8, 8); _FL_DEFPIN(9, 9);
+_FL_DEFPIN(10, 10); _FL_DEFPIN(11, 11); _FL_DEFPIN(12, 12); _FL_DEFPIN(13, 13); _FL_DEFPIN(14, 14);
+_FL_DEFPIN(15, 15); _FL_DEFPIN(17, 17);
+_FL_DEFPIN(20, 20); _FL_DEFPIN(21, 21); _FL_DEFPIN(22, 22); _FL_DEFPIN(23, 23); _FL_DEFPIN(24, 24);
+_FL_DEFPIN(25, 25); _FL_DEFPIN(26, 26); _FL_DEFPIN(27, 27); _FL_DEFPIN(28, 28); _FL_DEFPIN(29, 29);
+_FL_DEFPIN(33, 33);
+_FL_DEFPIN(36, 36); _FL_DEFPIN(37, 37); _FL_DEFPIN(38, 38); _FL_DEFPIN(39, 39);
+_FL_DEFPIN(40, 40); _FL_DEFPIN(42, 42); _FL_DEFPIN(43, 43); _FL_DEFPIN(44, 44);
+_FL_DEFPIN(46, 46); _FL_DEFPIN(47, 47); _FL_DEFPIN(48, 48); _FL_DEFPIN(49, 49);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_SFE_EDGE2)
 
-#define MAX_PIN 50
-_FL_DEFPIN(0);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(23);
-_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
-_FL_DEFPIN(31); _FL_DEFPIN(32); _FL_DEFPIN(33); _FL_DEFPIN(34);
-_FL_DEFPIN(35); _FL_DEFPIN(37); _FL_DEFPIN(39);
-_FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
-_FL_DEFPIN(45); _FL_DEFPIN(48); _FL_DEFPIN(49);
+#define MAX_PIN 49
+_FL_DEFPIN(0, 0);
+_FL_DEFPIN(5, 5); _FL_DEFPIN(6, 6); _FL_DEFPIN(7, 7); _FL_DEFPIN(8, 8); _FL_DEFPIN(9, 9);
+_FL_DEFPIN(11, 11); _FL_DEFPIN(12, 12); _FL_DEFPIN(13, 13); _FL_DEFPIN(14, 14);
+_FL_DEFPIN(15, 15); _FL_DEFPIN(16, 16); _FL_DEFPIN(17, 17); _FL_DEFPIN(18, 18); _FL_DEFPIN(19, 19);
+_FL_DEFPIN(20, 20); _FL_DEFPIN(21, 21); _FL_DEFPIN(23, 23);
+_FL_DEFPIN(25, 25); _FL_DEFPIN(26, 26); _FL_DEFPIN(27, 27); _FL_DEFPIN(28, 28); _FL_DEFPIN(29, 29);
+_FL_DEFPIN(31, 31); _FL_DEFPIN(32, 32); _FL_DEFPIN(33, 33); _FL_DEFPIN(34, 34);
+_FL_DEFPIN(35, 35); _FL_DEFPIN(37, 37); _FL_DEFPIN(39, 39);
+_FL_DEFPIN(40, 40); _FL_DEFPIN(41, 41); _FL_DEFPIN(42, 42); _FL_DEFPIN(43, 43); _FL_DEFPIN(44, 44);
+_FL_DEFPIN(45, 45); _FL_DEFPIN(48, 48); _FL_DEFPIN(49, 49);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS)
 
-#define MAX_PIN 32
-_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
-_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
-_FL_DEFPIN(30); _FL_DEFPIN(31);
+#define MAX_PIN 31
+_FL_DEFPIN(0, 25); _FL_DEFPIN(1, 24); _FL_DEFPIN(2, 35); _FL_DEFPIN(3, 4); _FL_DEFPIN(4, 22);
+_FL_DEFPIN(5, 23); _FL_DEFPIN(6, 27); _FL_DEFPIN(7, 28); _FL_DEFPIN(8, 32); _FL_DEFPIN(9, 12);
+_FL_DEFPIN(10, 13); _FL_DEFPIN(11, 7); _FL_DEFPIN(12, 6); _FL_DEFPIN(13, 5); _FL_DEFPIN(14, 40);
+_FL_DEFPIN(15, 39); _FL_DEFPIN(16, 29); _FL_DEFPIN(17, 11); _FL_DEFPIN(18, 34); _FL_DEFPIN(19, 33);
+_FL_DEFPIN(20, 16); _FL_DEFPIN(21, 31); _FL_DEFPIN(22, 48); _FL_DEFPIN(23, 49); _FL_DEFPIN(24, 8);
+_FL_DEFPIN(25, 9); _FL_DEFPIN(26, 10); _FL_DEFPIN(27, 38); _FL_DEFPIN(28, 42); _FL_DEFPIN(29, 43);
+_FL_DEFPIN(30, 36); _FL_DEFPIN(31, 37);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_NANO)
 
-#define MAX_PIN 24
-_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23);
+#define MAX_PIN 23
+_FL_DEFPIN(0, 13); _FL_DEFPIN(1, 33); _FL_DEFPIN(2, 11); _FL_DEFPIN(3, 29); _FL_DEFPIN(4, 18);
+_FL_DEFPIN(5, 31); _FL_DEFPIN(6, 43); _FL_DEFPIN(7, 42); _FL_DEFPIN(8, 38); _FL_DEFPIN(9, 39);
+_FL_DEFPIN(10, 40); _FL_DEFPIN(11, 5); _FL_DEFPIN(12, 7); _FL_DEFPIN(13, 6); _FL_DEFPIN(14, 35);
+_FL_DEFPIN(15, 32); _FL_DEFPIN(16, 12); _FL_DEFPIN(17, 32); _FL_DEFPIN(18, 12); _FL_DEFPIN(19, 19);
+_FL_DEFPIN(20, 48); _FL_DEFPIN(21, 49); _FL_DEFPIN(22, 36); _FL_DEFPIN(23, 37);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AM_AP3_SFE_THING_PLUS)
 
-#define MAX_PIN 29
-_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
-_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28);
+#define MAX_PIN 28
+_FL_DEFPIN(0, 25); _FL_DEFPIN(1, 24); _FL_DEFPIN(2, 44); _FL_DEFPIN(3, 35); _FL_DEFPIN(4, 4);
+_FL_DEFPIN(5, 22); _FL_DEFPIN(6, 23); _FL_DEFPIN(7, 27); _FL_DEFPIN(8, 28); _FL_DEFPIN(9, 32);
+_FL_DEFPIN(10, 14); _FL_DEFPIN(11, 7); _FL_DEFPIN(12, 6); _FL_DEFPIN(13, 5); _FL_DEFPIN(14, 40);
+_FL_DEFPIN(15, 39); _FL_DEFPIN(16, 43); _FL_DEFPIN(17, 42); _FL_DEFPIN(18, 26); _FL_DEFPIN(19, 33);
+_FL_DEFPIN(20, 13); _FL_DEFPIN(21, 11); _FL_DEFPIN(22, 29); _FL_DEFPIN(23, 12); _FL_DEFPIN(24, 31);
+_FL_DEFPIN(25, 48); _FL_DEFPIN(26, 49); _FL_DEFPIN(27, 36); _FL_DEFPIN(28, 37);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
 #elif defined(ARDUINO_AM_AP3_SFE_BB_ARTEMIS_ATP) || defined(ARDUINO_SFE_ARTEMIS)
 
-#define MAX_PIN 50 // AP3_VARIANT_NUM_PINS
-_FL_DEFPIN(0); _FL_DEFPIN(1); _FL_DEFPIN(2); _FL_DEFPIN(3); _FL_DEFPIN(4);
-_FL_DEFPIN(5); _FL_DEFPIN(6); _FL_DEFPIN(7); _FL_DEFPIN(8); _FL_DEFPIN(9);
-_FL_DEFPIN(10); _FL_DEFPIN(11); _FL_DEFPIN(12); _FL_DEFPIN(13); _FL_DEFPIN(14);
-_FL_DEFPIN(15); _FL_DEFPIN(16); _FL_DEFPIN(17); _FL_DEFPIN(18); _FL_DEFPIN(19);
-_FL_DEFPIN(20); _FL_DEFPIN(21); _FL_DEFPIN(22); _FL_DEFPIN(23); _FL_DEFPIN(24);
-_FL_DEFPIN(25); _FL_DEFPIN(26); _FL_DEFPIN(27); _FL_DEFPIN(28); _FL_DEFPIN(29);
-_FL_DEFPIN(31); _FL_DEFPIN(32); _FL_DEFPIN(33); _FL_DEFPIN(34);
-_FL_DEFPIN(35); _FL_DEFPIN(36); _FL_DEFPIN(37); _FL_DEFPIN(38); _FL_DEFPIN(39);
-_FL_DEFPIN(40); _FL_DEFPIN(41); _FL_DEFPIN(42); _FL_DEFPIN(43); _FL_DEFPIN(44);
-_FL_DEFPIN(45); _FL_DEFPIN(47); _FL_DEFPIN(48); _FL_DEFPIN(49);
+#define MAX_PIN 49
+_FL_DEFPIN(0, 0); _FL_DEFPIN(1, 1); _FL_DEFPIN(2, 2); _FL_DEFPIN(3, 3); _FL_DEFPIN(4, 4);
+_FL_DEFPIN(5, 5); _FL_DEFPIN(6, 6); _FL_DEFPIN(7, 7); _FL_DEFPIN(8, 8); _FL_DEFPIN(9, 9);
+_FL_DEFPIN(10, 10); _FL_DEFPIN(11, 11); _FL_DEFPIN(12, 12); _FL_DEFPIN(13, 13); _FL_DEFPIN(14, 14);
+_FL_DEFPIN(15, 15); _FL_DEFPIN(16, 16); _FL_DEFPIN(17, 17); _FL_DEFPIN(18, 18); _FL_DEFPIN(19, 19);
+_FL_DEFPIN(20, 20); _FL_DEFPIN(21, 21); _FL_DEFPIN(22, 22); _FL_DEFPIN(23, 23); _FL_DEFPIN(24, 24);
+_FL_DEFPIN(25, 25); _FL_DEFPIN(26, 26); _FL_DEFPIN(27, 27); _FL_DEFPIN(28, 28); _FL_DEFPIN(29, 29);
+_FL_DEFPIN(31, 31); _FL_DEFPIN(32, 32); _FL_DEFPIN(33, 33); _FL_DEFPIN(34, 34);
+_FL_DEFPIN(35, 35); _FL_DEFPIN(36, 36); _FL_DEFPIN(37, 37); _FL_DEFPIN(38, 38); _FL_DEFPIN(39, 39);
+_FL_DEFPIN(40, 40); _FL_DEFPIN(41, 41); _FL_DEFPIN(42, 42); _FL_DEFPIN(43, 43); _FL_DEFPIN(44, 44);
+_FL_DEFPIN(45, 45); _FL_DEFPIN(47, 47); _FL_DEFPIN(48, 48); _FL_DEFPIN(49, 49);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
diff --git a/platforms/apollo3/fastspi_apollo3.h b/platforms/apollo3/fastspi_apollo3.h
index 1259ee8111..0c77d31978 100644
--- a/platforms/apollo3/fastspi_apollo3.h
+++ b/platforms/apollo3/fastspi_apollo3.h
@@ -26,10 +26,10 @@ class APOLLO3HardwareSPIOutput {
 
 	// initialize the pins for fastgpio
 	void init() {
-		pinMode(_DATA_PIN, OUTPUT);
-		pinMode(_CLOCK_PIN, OUTPUT);
-		am_hal_gpio_fastgpio_enable(_DATA_PIN);
-		am_hal_gpio_fastgpio_enable(_CLOCK_PIN);
+		FastPin<_CLOCK_PIN>::setOutput();
+		FastPin<_CLOCK_PIN>::lo();
+		FastPin<_DATA_PIN>::setOutput();
+		FastPin<_DATA_PIN>::lo();
 	}
 
 	// latch the CS select
@@ -91,14 +91,14 @@ class APOLLO3HardwareSPIOutput {
 	template <uint8_t BIT> inline static void writeBit(uint8_t b) {
 		//waitFully();
 		if(b & (1 << BIT)) {
-			am_hal_gpio_fastgpio_set(_DATA_PIN);
+			FastPin<_DATA_PIN>::hi();
 		} else {
-			am_hal_gpio_fastgpio_clr(_DATA_PIN);
+			FastPin<_DATA_PIN>::lo();
 		}
 
-		am_hal_gpio_fastgpio_set(_CLOCK_PIN);
+		FastPin<_CLOCK_PIN>::hi();
 		for (uint32_t d = (_SPI_CLOCK_DIVIDER >> 1); d > 0; d--) { __NOP(); }
-		am_hal_gpio_fastgpio_clr(_CLOCK_PIN);
+		FastPin<_CLOCK_PIN>::lo();
 		for (uint32_t d = (_SPI_CLOCK_DIVIDER >> 1); d > 0; d--) { __NOP(); }
 	}
 

From 13fc0654620f686c4146d3bd7869bcb858d6745c Mon Sep 17 00:00:00 2001
From: 7FM <41307817+7FM@users.noreply.github.com>
Date: Sat, 18 Apr 2020 18:31:20 +0200
Subject: [PATCH 148/204] Fix namespace for esp8266 fastpin

---
 platforms/esp/8266/fastpin_esp8266.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/8266/fastpin_esp8266.h b/platforms/esp/8266/fastpin_esp8266.h
index 69085bf9a2..1ce7934b04 100644
--- a/platforms/esp/8266/fastpin_esp8266.h
+++ b/platforms/esp/8266/fastpin_esp8266.h
@@ -98,4 +98,4 @@ _FL_DEFPIN(8,15); _FL_DEFPIN(9,3); _FL_DEFPIN(10,1);
 
 #define HAS_HARDWARE_PIN_SUPPORT
 
-#define FASTLED_NAMESPACE_END
+FASTLED_NAMESPACE_END

From 8dfc43a8f0423db8800f3638dde6a858857435e7 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Sun, 19 Apr 2020 15:52:12 -0700
Subject: [PATCH 149/204] Fix arduino header mappings

---
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 111 ++++++++++++------
 1 file changed, 74 insertions(+), 37 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index 9bb07721fd..ec9b09b072 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -56,8 +56,6 @@
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
 
-    #define MAX_PIN (33u) // 34 if wanting to use NFC1 test point
-
     // Arduino pins 0..7
     _FL_DEFPIN( 0, 25, 0); // D0  is P0.25 -- UART TX
     //_FL_DEFPIN( 1, 24, 0); // D1  is P0.24 -- UART RX
@@ -163,11 +161,12 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Adafruit Bluefruit on nRF52840DK PCA10056 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
     
     #if defined(USE_ARDUINO_PIN_NUMBERING)
-        /* pca10056_schematic_and_pcb.pdf
-           Page 3 shows the Arduino Pin to GPIO Px.xx mapping
+        #error "Define of `USE_ARDUINO_PIN_NUMBERING` has known errors in pin mapping -- select different mapping"
+    #elif defined(USE_ARDUINO_UNO_R3_HEADER_PIN_NUMBERING)
+        /* The following allows defining and using the FastPin<> templates,
+           using the Arduino UNO R3 connector pin definitions.
         */
         _FL_DEFPIN( 0,  1, 1); // D0  is P1.01 
         _FL_DEFPIN( 1,  2, 1); // D1  is P1.02 
@@ -183,40 +182,78 @@
         _FL_DEFPIN(11, 13, 1); // D11 is P1.13 
         _FL_DEFPIN(12, 14, 1); // D12 is P1.14
         _FL_DEFPIN(13, 15, 1); // D13 is P1.15 
-        _FL_DEFPIN(14,  0, 0); // D14 is P0.00 (if SB4 bridged)
-        _FL_DEFPIN(15,  1, 0); // D15 is P0.01 (if SB3 bridged)
-        _FL_DEFPIN(16,  5, 0); // D16 is P0.05 (aka AIN3, aka UART RTS)
-        _FL_DEFPIN(17,  6, 0); // D17 is P0.06 (UART TxD)
-        _FL_DEFPIN(18,  7, 0); // D18 is P0.07 (UART CTS default)
-        _FL_DEFPIN(19,  8, 0); // D19 is P0.08 (UART RxD)
-        _FL_DEFPIN(20,  9, 0); // D20 is P0.09 (NFC1)
-        _FL_DEFPIN(21, 10, 0); // D21 is P0.10 (NFC2)
-        _FL_DEFPIN(22, 11, 0); // D22 is P0.11 (TRACEDATA2 / BUTTON1 default)
-        _FL_DEFPIN(23, 12, 0); // D23 is P0.12 (TRACEDATA1 / BUTTON2 default)
-        _FL_DEFPIN(24, 13, 0); // D24 is P0.13 (LED1)
-        _FL_DEFPIN(25, 14, 0); // D25 is P0.14 (LED2)
-        _FL_DEFPIN(26, 15, 0); // D26 is P0.15 (LED3)
-        _FL_DEFPIN(27, 16, 0); // D27 is P0.16 (LED4)
-        _FL_DEFPIN(28, 17, 0); // D28 is P0.17 (QSPI !CS , unless SB13 cut)
+        // Arduino UNO uses pins D14..D19 to map to header pins A0..A5
+        // AREF has no equivalent digital pin map on Arduino, would be P0.02
+        _FL_DEFPIN(14,  3, 0); // D14 / A0 is P0.03
+        _FL_DEFPIN(15,  4, 0); // D15 / A1 is P0.04
+        _FL_DEFPIN(16, 28, 0); // D16 / A2 is P0.28
+        _FL_DEFPIN(17, 29, 0); // D17 / A3 is P0.29
+        // Cannot determine which pin on PCA10056 would be intended solely from UNO R3 digital pin number
+        //_FL_DEFPIN(18, 30, 0); // D18 could be one of two pins: A4 would be P0.30, SDA would be P0.26
+        //_FL_DEFPIN(19, 31, 0); // D19 could be one of two pins: A5 would be P0.31, SCL would be P0.27
+    #elif defined(USE_ARDUINO_MEGA_2560_REV3_HEADER_PIN_NUMBERING)
+        /* The following allows defining and using the FastPin<> templates,
+           using the Arduino UNO R3 connector pin definitions.
+        */
+        _FL_DEFPIN( 0,  1, 1); // D0  is P1.01
+        _FL_DEFPIN( 1,  2, 1); // D1  is P1.02
+        _FL_DEFPIN( 2,  3, 1); // D2  is P1.03
+        _FL_DEFPIN( 3,  4, 1); // D3  is P1.04
+        _FL_DEFPIN( 4,  5, 1); // D4  is P1.05
+        _FL_DEFPIN( 5,  6, 1); // D5  is P1.06
+        _FL_DEFPIN( 6,  7, 1); // D6  is P1.07 (BUTTON1 option)
+        _FL_DEFPIN( 7,  8, 1); // D7  is P1.08 (BUTTON2 option)
+        _FL_DEFPIN( 8, 10, 1); // D8  is P1.10
+        _FL_DEFPIN( 9, 11, 1); // D9  is P1.11
+        _FL_DEFPIN(10, 12, 1); // D10 is P1.12
+        _FL_DEFPIN(11, 13, 1); // D11 is P1.13
+        _FL_DEFPIN(12, 14, 1); // D12 is P1.14
+        _FL_DEFPIN(13, 15, 1); // D13 is P1.15
+
+        // Arduino MEGA 2560 has additional digital pins on lower digital header
+        _FL_DEFPIN(14, 10, 0); // D14 is P0.10
+        _FL_DEFPIN(15,  9, 0); // D15 is P0.09
+        _FL_DEFPIN(16,  8, 0); // D16 is P0.08
+        _FL_DEFPIN(17,  7, 0); // D17 is P0.07
+        _FL_DEFPIN(18,  6, 0); // D14 is P0.06
+        _FL_DEFPIN(19,  5, 0); // D15 is P0.05
+        // Cannot determine which pin on PCA10056 would be intended solely from UNO MEGA 2560 digital pin number
+        //_FL_DEFPIN(20,  1, 0); // D20 could be one of two pins: D20 on lower header would be P0.01, SDA would be P0.26
+        //_FL_DEFPIN(21,  0, 0); // D21 could be one of two pins: D21 on lower header would be P0.00, SCL would be P0.27
+
+        // Arduino MEGA 2560 has D22-D53 exposed on perpendicular two-row header
+        // PCA10056 has support for D22-D38 via a 2x19 header at that location (D39 is GND on PCA10056)
+        _FL_DEFPIN(22, 11, 0); // D22 is P0.11
+        _FL_DEFPIN(23, 12, 0); // D23 is P0.12
+        _FL_DEFPIN(24, 13, 0); // D24 is P0.13
+        _FL_DEFPIN(25, 14, 0); // D25 is P0.14
+        _FL_DEFPIN(26, 15, 0); // D26 is P0.15
+        _FL_DEFPIN(27, 16, 0); // D27 is P0.16
+        // _FL_DEFPIN(28, 17, 0); // D28 is P0.17 (QSPI !CS )
         // _FL_DEFPIN(29, 18, 0); // D29 is P0.18 (RESET)
-        _FL_DEFPIN(30, 19, 0); // D30 is P0.19 (QSPI CLK , unless SB11 cut)
-        _FL_DEFPIN(31, 20, 0); // D31 is P0.20 (QSPI DIO0, unless SB12 cut)
-        _FL_DEFPIN(32, 21, 0); // D32 is P0.21 (QSPI DIO1, unless SB14 cut)
-        _FL_DEFPIN(33, 22, 0); // D33 is P0.22 (QSPI DIO2, unless SB15 cut)
-        _FL_DEFPIN(34, 23, 0); // D34 is P0.23 (QSPI DIO3, unless SB10 cut)
-        _FL_DEFPIN(35, 24, 0); // D35 is P0.24 (BUTTON3)
-        _FL_DEFPIN(36, 25, 0); // D36 is P0.25 (BUTTON4)
-        _FL_DEFPIN(37, 00, 1); // D37 is P1.00 (TRACEDATA0 / SWO)
-        _FL_DEFPIN(38, 09, 1); // D38 is P1.09 (TRACEDATA3)
-        //_FL_DEFPIN(??,  2, 0); // D?? is P0.02 (AREF, aka AIN0)
-        //_FL_DEFPIN(??,  3, 0); // D?? is P0.03 (A0,   aka AIN1)
-        //_FL_DEFPIN(??,  4, 0); // D?? is P0.04 (A1,   aka AIN2, aka UART CTS option)
-        //_FL_DEFPIN(??, 28, 0); // D?? is P0.28 (A2,   aka AIN4)
-        //_FL_DEFPIN(??, 29, 0); // D?? is P0.29 (A3,   aka AIN5)
-        //_FL_DEFPIN(??, 30, 0); // D?? is P0.30 (A4,   aka AIN6)
-        //_FL_DEFPIN(??, 31, 0); // D?? is P0.31 (A5,   aka AIN7)
+        // _FL_DEFPIN(30, 19, 0); // D30 is P0.19 (QSPI CLK)
+        // _FL_DEFPIN(31, 20, 0); // D31 is P0.20 (QSPI DIO0)
+        // _FL_DEFPIN(32, 21, 0); // D32 is P0.21 (QSPI DIO1)
+        // _FL_DEFPIN(33, 22, 0); // D33 is P0.22 (QSPI DIO2)
+        // _FL_DEFPIN(34, 23, 0); // D34 is P0.23 (QSPI DIO3)
+        _FL_DEFPIN(35, 24, 0); // D35 is P0.24
+        _FL_DEFPIN(36, 25, 0); // D36 is P0.25
+        _FL_DEFPIN(37,  0, 1); // D37 is P1.00
+        _FL_DEFPIN(38,  9, 1); // D38 is P1.09
+        // _FL_DEFPIN(39, , 0); // D39 is P0.
 
-    #else
+
+        // Arduino MEGA 2560 uses pins D54..D59 to map to header pins A0..A5
+        // (it also has D60..D69 for A6..A15, which have no corresponding header on PCA10056)
+        // AREF has no equivalent digital pin map on Arduino, would be P0.02
+        _FL_DEFPIN(54,  3, 0); // D54 / A0 is P0.03
+        _FL_DEFPIN(55,  4, 0); // D55 / A1 is P0.04
+        _FL_DEFPIN(56, 28, 0); // D56 / A2 is P0.28
+        _FL_DEFPIN(57, 29, 0); // D57 / A3 is P0.29
+        _FL_DEFPIN(58, 30, 0); // D58 / A4 is P0.30
+        _FL_DEFPIN(59, 31, 0); // D59 / A5 is P0.31
+
+    #else // identity mapping of arduino pin to port/pin
         /* 48 pins, defined using natural mapping in Adafruit's variant.cpp (!) */
         _DEFPIN_ARM_IDENTITY_P0( 0); // P0.00 (XL1 .. ensure SB4 bridged, SB2 cut)
         _DEFPIN_ARM_IDENTITY_P0( 1); // P0.01 (XL2 .. ensure SB3 bridged, SB1 cut)

From e9278484e8b59eb588bddc3d8cad01ab46bda1c0 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Sun, 19 Apr 2020 15:52:55 -0700
Subject: [PATCH 150/204] Cannot determine high speed capability in header

---
 platforms/arm/nrf52/fastpin_arm_nrf52.h | 147 +-----------------------
 1 file changed, 3 insertions(+), 144 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
index e3da3898b4..7a780876a6 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -131,150 +131,9 @@ template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUM
   FASTLED_NRF52_INLINE_ATTRIBUTE static void fastset(register port_ptr_t port, register port_t val) { *port = val; }
   constexpr                      static uint32_t   nrf_pin2() { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
   constexpr                      static bool       LowSpeedOnlyRecommended() {
-    // only allow one function body.
-    #undef _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT
-
-    // unique cases for each board / processor package / module?
-    #if defined(NRF52810_XXAA) && defined(NRF52810_PACKAGE_QFN48)
-        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-            #error "Multiple board match"
-        #endif
-        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
-        static_assert(_PORT_NUMBER == 0, "nRF52810 only has one port");
-        return (
-            (_PIN_NUMBER == 25) ||
-            (_PIN_NUMBER == 26) ||
-            (_PIN_NUMBER == 27) ||
-            (_PIN_NUMBER == 28) ||
-            (_PIN_NUMBER == 29)
-            );
-    #endif
-    #if defined(NRF52810_XXAA) && defined(NRF52810_PACKAGE_QFN32)
-        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-            #error "Multiple board match"
-        #endif
-        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
-        static_assert(_PORT_NUMBER == 0, "nRF52810 only has one port");
-        if (_PORT_NUMBER == 0) {
-            if (
-                (_PIN_NUMBER == 26) ||
-                (_PIN_NUMBER == 27)
-                ) {
-                return true;
-            }
-        }
-        return false;
-    #endif
-    #if defined(NRF52832_XXAA) || defined(NRF52832_XXAB)
-        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-            #error "Multiple board match"
-        #endif
-        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
-        static_assert(_PORT_NUMBER == 0, "nRF52832 only has one port");
-        // data sheets shows the same pins in both QFN48 and WLCSP package
-        // are recommended as low-speed only:
-        return (
-            (_PIN_NUMBER == 22) ||
-            (_PIN_NUMBER == 23) ||
-            (_PIN_NUMBER == 24) ||
-            (_PIN_NUMBER == 25) ||
-            (_PIN_NUMBER == 26) ||
-            (_PIN_NUMBER == 27) ||
-            (_PIN_NUMBER == 28) ||
-            (_PIN_NUMBER == 29) ||
-            (_PIN_NUMBER == 30) ||
-            (_PIN_NUMBER == 31)
-            );
-    #endif
-    #if defined(NRF52840_XXAA) && defined(NRF52840_PACKAGE_aQFN73)
-        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-            #error "Multiple board match"
-        #endif
-        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
-        static_assert(_PORT_NUMBER == 0 || _PORT_NUMBER == 1, "nRF52840 only has two ports");
-        return
-            (
-                (
-                    (_PORT_NUMBER == 0) &&
-                    (
-                        (_PIN_NUMBER ==  2) ||
-                        (_PIN_NUMBER ==  3) ||
-                        (_PIN_NUMBER ==  9) ||
-                        (_PIN_NUMBER == 10) ||
-                        (_PIN_NUMBER == 11) ||
-                        (_PIN_NUMBER == 12) ||
-                        (_PIN_NUMBER == 14) ||
-                        (_PIN_NUMBER == 28) ||
-                        (_PIN_NUMBER == 29) ||
-                        (_PIN_NUMBER == 30) ||
-                        (_PIN_NUMBER == 31)
-                    )
-                )
-                ||
-                (
-                    (_PORT_NUMBER == 1) &&
-                    (
-                        (_PIN_NUMBER ==  2) ||
-                        (_PIN_NUMBER ==  3) ||
-                        (_PIN_NUMBER ==  4) ||
-                        (_PIN_NUMBER ==  5) ||
-                        (_PIN_NUMBER ==  6) ||
-                        (_PIN_NUMBER ==  7) ||
-                        (_PIN_NUMBER == 10) ||
-                        (_PIN_NUMBER == 13) ||
-                        (_PIN_NUMBER == 15)
-                    )
-                )
-            );
-    #endif
-    #if false && defined(NRF52840_XXAA) && (defined(NRF52840_PACKAGE_aQFN73) || defined(ARDUINO_NRF52840_FEATHER))
-        // Adafruit nRF52840 feather uses RAYTAC MDBT50Q module, which is aQFN73
-        // See https://cdn-learn.adafruit.com/assets/assets/000/068/544/original/Raytac_MDBT50Q.pdf
-        #if defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-            #error "Multiple board match"
-        #endif
-        #define _FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT 1
-        static_assert(_PORT_NUMBER == 0 || _PORT_NUMBER == 1, "nRF52840 only has two ports");
-        return
-            (
-                (
-                    (_PORT_NUMBER == 0) &&
-                    (
-                        (_PIN_NUMBER ==  2) ||
-                        (_PIN_NUMBER ==  3) ||
-                        (_PIN_NUMBER ==  9) ||
-                        (_PIN_NUMBER == 10) ||
-                        (_PIN_NUMBER == 28) ||
-                        (_PIN_NUMBER == 29) ||
-                        (_PIN_NUMBER == 30) ||
-                        (_PIN_NUMBER == 31)
-                    )
-                )
-                ||
-                (
-                    (_PORT_NUMBER == 1) &&
-                    (
-                        (_PIN_NUMBER ==  1) ||
-                        (_PIN_NUMBER ==  2) ||
-                        (_PIN_NUMBER ==  3) ||
-                        (_PIN_NUMBER ==  4) ||
-                        (_PIN_NUMBER ==  5) ||
-                        (_PIN_NUMBER ==  6) ||
-                        (_PIN_NUMBER ==  7) ||
-                        (_PIN_NUMBER == 10) ||
-                        (_PIN_NUMBER == 11) ||
-                        (_PIN_NUMBER == 12) ||
-                        (_PIN_NUMBER == 13) ||
-                        (_PIN_NUMBER == 14) ||
-                        (_PIN_NUMBER == 15)
-                    )
-                )
-            );
-    #endif
-    #if !defined(_FASTLED_NRF52_LOW_SPEED_ONLY_BOARD_DETECT)
-        #warning "Unknown board / package, ... caller must determine pins that support high-speed"
-        return false; // choosing default to be FALSE, to allow users to ATTEMPT to use high-speed on pins where support is not known
-    #endif
+    // Caller must always determine if high speed use if allowed on a given pin,
+    // because it depends on more than just the chip packaging ... it depends on entire board (and even system) design.
+    return false; // choosing default to be FALSE, to allow users to ATTEMPT to use high-speed on pins where support is not known
   }
   // Expose the nrf pin (port/pin combined), port, and pin as properties (e.g., for setting up SPI)
 

From 5af3423bc0b5a773ac0d71bbf4d230d35492e42f Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Sun, 19 Apr 2020 16:02:42 -0700
Subject: [PATCH 151/204] Allow quieter compilation

---
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 50 ++++++++++++-------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index ec9b09b072..9e7f72be19 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -12,7 +12,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Adafruit Bluefruit nRF52832 Feather is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Adafruit Bluefruit nRF52832 Feather is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _DEFPIN_ARM_IDENTITY_P0( 0); // xtal 1
     _DEFPIN_ARM_IDENTITY_P0( 1); // xtal 2
     _DEFPIN_ARM_IDENTITY_P0( 2); // a0
@@ -109,8 +111,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Adafruit Bluefruit nRF52840 Metro Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Adafruit Bluefruit nRF52840 Metro Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _FL_DEFPIN( 0, 25, 0); // D0  is P0.25 (UART TX)
     _FL_DEFPIN( 1, 24, 0); // D1  is P0.24 (UART RX)
     _FL_DEFPIN( 2, 10, 1); // D2  is P1.10 
@@ -164,7 +167,7 @@
     
     #if defined(USE_ARDUINO_PIN_NUMBERING)
         #error "Define of `USE_ARDUINO_PIN_NUMBERING` has known errors in pin mapping -- select different mapping"
-    #elif defined(USE_ARDUINO_UNO_R3_HEADER_PIN_NUMBERING)
+    #elif defined(FASTLED_NRF52_USE_ARDUINO_UNO_R3_HEADER_PIN_NUMBERING)
         /* The following allows defining and using the FastPin<> templates,
            using the Arduino UNO R3 connector pin definitions.
         */
@@ -191,7 +194,7 @@
         // Cannot determine which pin on PCA10056 would be intended solely from UNO R3 digital pin number
         //_FL_DEFPIN(18, 30, 0); // D18 could be one of two pins: A4 would be P0.30, SDA would be P0.26
         //_FL_DEFPIN(19, 31, 0); // D19 could be one of two pins: A5 would be P0.31, SCL would be P0.27
-    #elif defined(USE_ARDUINO_MEGA_2560_REV3_HEADER_PIN_NUMBERING)
+    #elif defined(FASTLED_NRF52_USE_ARDUINO_MEGA_2560_REV3_HEADER_PIN_NUMBERING)
         /* The following allows defining and using the FastPin<> templates,
            using the Arduino UNO R3 connector pin definitions.
         */
@@ -314,8 +317,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Electronut labs bluey is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Electronut labs bluey is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _FL_DEFPIN( 0, 26, 0); // D0  is P0.26
     _FL_DEFPIN( 1, 27, 0); // D1  is P0.27
     _FL_DEFPIN( 2, 22, 0); // D2  is P0.22 (SPI SS  )
@@ -353,7 +357,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Electronut labs hackaBLE is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Electronut labs hackaBLE is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _FL_DEFPIN( 0, 14, 0); // D0  is P0.14 (RX)
     _FL_DEFPIN( 1, 13, 0); // D1  is P0.13 (TX)
     _FL_DEFPIN( 2, 12, 0); // D2  is P0.12
@@ -392,7 +398,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Electronut labs hackaBLE_v2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Electronut labs hackaBLE_v2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _DEFPIN_ARM_IDENTITY_P0( 0); // P0.00
     _DEFPIN_ARM_IDENTITY_P0( 1); // P0.01
     _DEFPIN_ARM_IDENTITY_P0( 2); // P0.02 (A0 / SDA / AREF)
@@ -435,7 +443,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "RedBear Blend 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "RedBear Blend 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _FL_DEFPIN( 0, 11, 0); // D0  is P0.11
     _FL_DEFPIN( 1, 12, 0); // D1  is P0.12
     _FL_DEFPIN( 2, 13, 0); // D2  is P0.13
@@ -471,7 +481,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "RedBear BLE Nano 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "RedBear BLE Nano 2 is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _FL_DEFPIN( 0, 30, 0); // D0  is P0.30 (A0 / RX)
     _FL_DEFPIN( 1, 29, 0); // D1  is P0.29 (A1 / TX)
     _FL_DEFPIN( 2, 28, 0); // D2  is P0.28 (A2 / SDA)
@@ -494,7 +506,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Nordic Semiconductor nRF52 DK is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Nordic Semiconductor nRF52 DK is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     _FL_DEFPIN( 0, 11, 0); // D0  is P0.11
     _FL_DEFPIN( 1, 12, 0); // D1  is P0.12
     _FL_DEFPIN( 2, 13, 0); // D2  is P0.13 (BUTTON1)
@@ -531,7 +545,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Taida Century nRF52 mini board is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Taida Century nRF52 mini board is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
     //_FL_DEFPIN( 0, 25, 0); // D0  is P0.xx (near radio!)
     //_FL_DEFPIN( 1, 26, 0); // D1  is P0.xx (near radio!)
     //_FL_DEFPIN( 2, 27, 0); // D2  is P0.xx (near radio!)
@@ -568,15 +584,15 @@
 
 // Generic nRF52832
 // See https://github.com/sandeepmistry/arduino-nRF5/blob/master/boards.txt
-#if defined(ARDUINO_GENERIC) && (\
-    defined(NRF52832_XXAA) || defined(NRF52832_XXAB)\
-    )
+#if defined(ARDUINO_GENERIC) && (  defined(NRF52832_XXAA) || defined(NRF52832_XXAB)  )
     #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
         #error "Cannot define more than one board at a time"
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Using `generic` NRF52832 board is an untested configuration -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Using `generic` NRF52832 board is an untested configuration -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
 
     _DEFPIN_ARM_IDENTITY_P0( 0); // P0.00 (    UART RX
     _DEFPIN_ARM_IDENTITY_P0( 1); // P0.01 (A0, UART TX)

From fbacc9875235e7c5170ceffc52a219ee16f65ba8 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Sun, 19 Apr 2020 16:04:23 -0700
Subject: [PATCH 152/204] Fix warning about multi-line `//` comments

---
 platforms/arm/nrf52/led_sysdefs_arm_nrf52.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h b/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
index 440aed9e4c..d4e26ce925 100644
--- a/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
+++ b/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
@@ -47,11 +47,12 @@ typedef __IO uint32_t RwReg;
 #define sei()  __enable_irq()
 
 #define FASTLED_NRF52_DEBUGPRINT(format, ...)
-//#define FASTLED_NRF52_DEBUGPRINT(format, ...)\
-//    do {\
-//        FastLED_NRF52_DebugPrint(format, ##__VA_ARGS__);\
-//    } while(0);
-
+/*
+#define FASTLED_NRF52_DEBUGPRINT(format, ...)\
+    do {\
+        FastLED_NRF52_DebugPrint(format, ##__VA_ARGS__);\
+    } while(0);
+*/
 
 
 

From 9d1df116512574eef3e0a5b7b2e91e5f7a8d83a2 Mon Sep 17 00:00:00 2001
From: Drzony <drzony@gmail.com>
Date: Sat, 25 Apr 2020 22:02:14 +0200
Subject: [PATCH 153/204] ESP32: Fixed SemaphoreGive when using builtin driver

---
 platforms/esp/32/clockless_i2s_esp32.h | 4 ++--
 platforms/esp/32/clockless_rmt_esp32.h | 8 ++++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
index a4d15ba750..6570b7e6d0 100644
--- a/platforms/esp/32/clockless_i2s_esp32.h
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -510,8 +510,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
        
         // -- Allocate i2s interrupt
         SET_PERI_REG_BITS(I2S_INT_ENA_REG(I2S_DEVICE), I2S_OUT_EOF_INT_ENA_V, 1, I2S_OUT_EOF_INT_ENA_S);
-        esp_err_t e = esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
-                                     &interruptHandler, 0, &gI2S_intr_handle);
+        esp_intr_alloc(interruptSource, 0, // ESP_INTR_FLAG_INTRDISABLED | ESP_INTR_FLAG_LEVEL3,
+                       &interruptHandler, 0, &gI2S_intr_handle);
         
         // -- Create a semaphore to block execution until all the controllers are done
         if (gTX_sem == NULL) {
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index b8cd1f42dd..e41f443e1b 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -481,8 +481,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 
         if (gNumDone == gNumControllers) {
             // -- If this is the last controller, signal that we are all done
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                xSemaphoreGive(gTX_sem);
+            } else {
+                xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+                if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+            }
         } else {
             // -- Otherwise, if there are still controllers waiting, then
             //    start the next one on this channel

From 423588ff54933c016787dd3c605985b42733de74 Mon Sep 17 00:00:00 2001
From: Drzony <drzony@gmail.com>
Date: Sat, 25 Apr 2020 23:09:02 +0200
Subject: [PATCH 154/204] ESP23: I2S comment out unused variables

---
 platforms/esp/32/clockless_i2s_esp32.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
index 6570b7e6d0..d65b9d87cd 100644
--- a/platforms/esp/32/clockless_i2s_esp32.h
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -363,7 +363,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         freq=1/(CLOCK_DIVIDER_N+(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A);
         freq=freq*I2S_BASE_CLK;
         // Serial.printf("calculted for i2s frequency:%f Mhz N:%d B:%d A:%d\n",freq/1000000,CLOCK_DIVIDER_N,CLOCK_DIVIDER_B,CLOCK_DIVIDER_A);
-        double pulseduration=1000000000/freq;
+        // double pulseduration=1000000000/freq;
         // Serial.printf("Pulse duration: %f ns\n",pulseduration);
         // gPulsesPerBit = (T1ns + T2ns + T3ns)/FASTLED_I2S_NS_PER_PULSE;
         
@@ -645,7 +645,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
         
         // -- Transpose and encode the pixel data for the DMA buffer
-        int buf_index = 0;
+        // int buf_index = 0;
         for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
             
             // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...

From 12ec2bab311301907db21a94ebfa47edf9cf36b2 Mon Sep 17 00:00:00 2001
From: ardnew <andrew@ardnew.com>
Date: Mon, 18 May 2020 23:38:56 -0500
Subject: [PATCH 155/204] add support for Adafruit ItsyBitsy nRF52840

---
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 56 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index 9e7f72be19..375a719609 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -309,6 +309,60 @@
     #endif
 #endif // defined (ARDUINO_NRF52840_PCA10056)
 
+// Adafruit ItsyBitsy nRF52840 Express
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52_ITSYBITSY)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Adafruit ItsyBitsy nRF52840 Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+
+    //  [D0 .. D13] (digital)
+    _FL_DEFPIN( 0, 25, 0); // D0  is P0.25 (UART RX)
+    _FL_DEFPIN( 1, 24, 0); // D1  is P0.24 (UART TX)
+    _FL_DEFPIN( 2,  2, 1); // D2  is P1.02
+    _FL_DEFPIN( 3,  6, 0); // D3  is P0.06 LED
+    _FL_DEFPIN( 4, 29, 0); // D4  is P0.29 Button
+    _FL_DEFPIN( 5, 27, 0); // D5  is P0.27
+    _FL_DEFPIN( 6,  9, 1); // D6  is P1.09 (DotStar Clock)
+    _FL_DEFPIN( 7,  8, 1); // D7  is P1.08
+    _FL_DEFPIN( 8,  8, 0); // D8  is P0.08 (DotStar Data)
+    _FL_DEFPIN( 9,  7, 0); // D9  is P0.07
+    _FL_DEFPIN(10,  5, 0); // D10 is P0.05
+    _FL_DEFPIN(11, 26, 0); // D11 is P0.26
+    _FL_DEFPIN(12, 11, 0); // D12 is P0.11
+    _FL_DEFPIN(13, 12, 0); // D13 is P0.12
+
+    //  [D14 .. D20] (analog [A0 .. A6])
+    _FL_DEFPIN(14,  4, 0); // D14 is P0.04 (A0)
+    _FL_DEFPIN(15, 30, 0); // D15 is P0.30 (A1)
+    _FL_DEFPIN(16, 28, 0); // D16 is P0.28 (A2)
+    _FL_DEFPIN(17, 31, 0); // D17 is P0.31 (A3)
+    _FL_DEFPIN(18,  2, 0); // D18 is P0.02 (A4)
+    _FL_DEFPIN(19,  3, 0); // D19 is P0.03 (A5)
+    _FL_DEFPIN(20,  5, 0); // D20 is P0.05 (A6/D10)
+
+    //  [D21 .. D22] (I2C)
+    _FL_DEFPIN(21, 16, 0); // D21 is P0.16 (SDA)
+    _FL_DEFPIN(22, 14, 0); // D22 is P0.14 (SCL)
+
+    //  [D23 .. D25] (SPI)
+    _FL_DEFPIN(23, 20, 0); // D23 is P0.20 (SPI MISO)
+    _FL_DEFPIN(24, 15, 0); // D24 is P0.15 (SPI MOSI)
+    _FL_DEFPIN(25, 13, 0); // D25 is P0.13 (SPI SCK )
+
+    //  [D26 .. D31] (QSPI)
+    _FL_DEFPIN(26, 19, 0); // D26 is P0.19 (QSPI CLK)
+    _FL_DEFPIN(27, 23, 0); // D27 is P0.23 (QSPI CS)
+    _FL_DEFPIN(28, 21, 0); // D28 is P0.21 (QSPI Data 0)
+    _FL_DEFPIN(29, 22, 0); // D29 is P0.22 (QSPI Data 1)
+    _FL_DEFPIN(30,  0, 1); // D30 is P1.00 (QSPI Data 2)
+    _FL_DEFPIN(31, 17, 0); // D31 is P0.17 (QSPI Data 3)
+
+#endif // defined (ARDUINO_NRF52_ITSYBITSY)
+
 // Electronut labs bluey
 // See https://github.com/sandeepmistry/arduino-nRF5/blob/master/variants/bluey/variant.cpp
 #if defined(ARDUINO_ELECTRONUT_BLUEY)
@@ -629,4 +683,4 @@
 #endif // defined(ARDUINO_GENERIC)
 
 
-#endif // __FASTPIN_ARM_NRF52_VARIANTS_H
\ No newline at end of file
+#endif // __FASTPIN_ARM_NRF52_VARIANTS_H

From a30150a754309123eabdeb6a2fd320bde70cafc8 Mon Sep 17 00:00:00 2001
From: ardnew <andrew@ardnew.com>
Date: Tue, 19 May 2020 00:35:39 -0500
Subject: [PATCH 156/204] protect untested itsybitsy nRF52 warning with
 suppression macro

---
 platforms/arm/nrf52/fastpin_arm_nrf52_variants.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index 375a719609..c8aa2e8913 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -317,7 +317,9 @@
     #else
         #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
     #endif
-    #warning "Adafruit ItsyBitsy nRF52840 Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #if !defined(FASTLED_NRF52_SUPPRESS_UNTESTED_BOARD_WARNING)
+        #warning "Adafruit ItsyBitsy nRF52840 Express is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+    #endif
 
     //  [D0 .. D13] (digital)
     _FL_DEFPIN( 0, 25, 0); // D0  is P0.25 (UART RX)

From 60c05c3a9ad86870ec7b39ddd941d5d6b3492e97 Mon Sep 17 00:00:00 2001
From: Martin Falatic <martin@falatic.com>
Date: Sun, 24 May 2020 19:04:34 -0700
Subject: [PATCH 157/204] Ensure getMaxRefreshRate() returns a non-zero default
 for Teensy 4.x

Fixes #912
---
 platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h | 1 +
 platforms/arm/mxrt1062/clockless_arm_mxrt1062.h       | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index 694297e26f..73f73de876 100644
--- a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -92,6 +92,7 @@ class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_OR
 
   }
 
+  virtual uint16_t getMaxRefreshRate() const { return 400; }
 
   virtual void showPixels(PixelController<RGB_ORDER, LANES, __FL_T4_MASK> & pixels) {
 		mWait.wait();
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
index 468c15ddfb..ed72713aef 100644
--- a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -38,6 +38,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
     FastPin<DATA_PIN>::lo();
 	}
 
+  virtual uint16_t getMaxRefreshRate() const { return 400; }
+
 protected:
 
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {

From 2c444d5139a684a6af9ecc34007a74e5d555368d Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Mon, 9 Mar 2020 04:37:56 -0700
Subject: [PATCH 158/204] Enable Circuit Playground Bluefruit

---
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 49 +++++++++++++++++++
 platforms/arm/nrf52/led_sysdefs_arm_nrf52.h   |  9 +---
 2 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index c8aa2e8913..2ff1bdc9d7 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -156,6 +156,55 @@
     _FL_DEFPIN(39, 10, 0); // D39 is P0.10 NFC2
 #endif // defined (ARDUINO_NRF52840_METRO)
 
+// Adafruit Circuit Playground Bluefruit
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52840_CIRCUITPLAY)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+    #warning "Adafruit Circuit Playground Bluefruit is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
+
+    _FL_DEFPIN( 0, 30, 0); // D0  is P0.30 (GPIO D0  / A6 / UART RX)
+    _FL_DEFPIN( 1, 24, 0); // D1  is P0.14 (GPIO D1  / A7 / UART TX)
+    _FL_DEFPIN( 2,  5, 0); // D2  is P0.05 (GPIO D2  / A5 / SDA)
+    _FL_DEFPIN( 3,  4, 0); // D3  is P0.04 (GPIO D3  / A4 / SCL)
+    _FL_DEFPIN( 4,  2, 1); // D4  is P1.02 (GPIO D4  / Left Button)
+    _FL_DEFPIN( 5, 15, 1); // D5  is P1.15 (GPIO D5  / Right button)
+    _FL_DEFPIN( 6,  2, 0); // D6  is P0.02 (GPIO D6  / A1)
+    _FL_DEFPIN( 7,  6, 1); // D7  is P1.06 (GPIO D7  / Slide Switch)
+    _FL_DEFPIN( 8, 13, 0); // D8  is P0.13 (GPIO D8  / NeoPixels)
+    _FL_DEFPIN( 9, 29, 0); // D9  is P0.29 (GPIO D9  / A2)
+    _FL_DEFPIN(10,  3, 0); // D10 is P0.03 (GPIO D10 / A3)
+    _FL_DEFPIN(11,  4, 1); // D11 is P1.04 (GPIO D11 / Speaker Shutdown)
+    _FL_DEFPIN(12, 26, 0); // D12 is P0.26 (GPIO D12 / A0 / Audio Out)
+    _FL_DEFPIN(13, 14, 1); // D13 is P1.14 (GPIO D13 / Red LED)
+
+    // Yes, these are the same pins, listed a second time
+    // because they are defined as distinct pins in the pin mapping
+    // No, I don't know why this was thought to be a good idea by Hathach.
+    _FL_DEFPIN(14, 26, 0); // A0  is P0.26 (GPIO D12 / A0 / Audio Out)
+    _FL_DEFPIN(15,  2, 0); // A1  is P0.02 (GPIO D6  / A1)
+    _FL_DEFPIN(16, 29, 0); // A2  is P0.29 (GPIO D9  / A2)
+    _FL_DEFPIN(17,  3, 0); // A3  is P0.03 (GPIO D10 / A3)
+    _FL_DEFPIN(18,  4, 0); // A4  is P0.04 (GPIO D3  / A4 / SCL)
+    _FL_DEFPIN(19,  5, 0); // A5  is P0.05 (GPIO D2  / A5 / SDA)
+    _FL_DEFPIN(20, 30, 0); // A6  is P0.30 (GPIO D0  / A6 / UART RX)
+    _FL_DEFPIN(21, 24, 0); // A7  is P0.14 (GPIO D1  / A7 / UART TX)
+
+    // Other pins
+    _FL_DEFPIN(22, 28, 0); // A8  is P0.28 (Light Sensor)
+    _FL_DEFPIN(23, 31, 0); // A9  is P0.31 (Thermistor Sensor)
+    _FL_DEFPIN(24, 16, 0); //     is P0.16 (PDM DAT)
+    _FL_DEFPIN(25, 17, 0); //     is P0.17 (PDM CLK)
+    _FL_DEFPIN(26, 12, 1); //     is P1.12 (LIS SCL)
+    _FL_DEFPIN(27, 13, 1); //     is P1.13 (LIS IRQ)
+    _FL_DEFPIN(28, 10, 1); //     is P1.10 (LIS SDA)
+    _FL_DEFPIN(36,  6, 0); // D36 is P0.06 (?? Neopixel  / Sensor switch ??)
+#endif // defined (ARDUINO_NRF52840_CIRCUITPLAY)
+
+
 // Adafruit Bluefruit on nRF52840DK PCA10056
 // From https://www.adafruit.com/package_adafruit_index.json
 #if defined (ARDUINO_NRF52840_PCA10056)
diff --git a/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h b/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
index d4e26ce925..3a7ea5820a 100644
--- a/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
+++ b/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
@@ -46,14 +46,7 @@ typedef __IO uint32_t RwReg;
 #define cli()  __disable_irq()
 #define sei()  __enable_irq()
 
-#define FASTLED_NRF52_DEBUGPRINT(format, ...)
-/*
 #define FASTLED_NRF52_DEBUGPRINT(format, ...)\
-    do {\
-        FastLED_NRF52_DebugPrint(format, ##__VA_ARGS__);\
-    } while(0);
-*/
-
-
+//    do { FastLED_NRF52_DebugPrint(format, ##__VA_ARGS__); } while(0);
 
 #endif // __LED_SYSDEFS_ARM_NRF52

From 1befc5acd2b835553369b5acdc8e14d1f0f89f43 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Sun, 24 May 2020 14:46:34 -0700
Subject: [PATCH 159/204] Fix Circuit Playground nRF52 Express

---
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 77 +++++++------------
 1 file changed, 28 insertions(+), 49 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index 2ff1bdc9d7..a59592e9a2 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -49,6 +49,34 @@
     _DEFPIN_ARM_IDENTITY_P0(31); // A7
 #endif // defined (ARDUINO_NRF52832_FEATHER) 
 
+// Adafruit Circuit Playground Bluefruit
+// From https://www.adafruit.com/package_adafruit_index.json
+#if defined (ARDUINO_NRF52840_CIRCUITPLAY)
+    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
+        #error "Cannot define more than one board at a time"
+    #else
+        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
+    #endif
+
+    // This board is a bit of a mess ... as it defines
+    // multiple arduino pins to map to a single Port/Pin
+    // combination.
+
+    // Use PIN_NEOPIXEL (D8) for the ten built-in neopixels
+    _FL_DEFPIN( 8, 13, 0); // P0.13 -- D8 / Neopixels
+
+    // Use PIN_A0 .. PIN_A7 for alligator-clip connectors
+    _FL_DEFPIN(14, 26, 0); // P0.26 -- A0 / Audio Out
+    _FL_DEFPIN(15,  2, 0); // P0.02 -- A1
+    _FL_DEFPIN(16, 29, 0); // P0.29 -- A2
+    _FL_DEFPIN(17,  3, 0); // P0.03 -- A3
+    _FL_DEFPIN(18,  4, 0); // P0.04 -- A4   / SCL
+    _FL_DEFPIN(19,  5, 0); // P0.05 -- A5   / SDA
+    _FL_DEFPIN(20, 30, 0); // P0.30 -- A6   / UART RX
+    _FL_DEFPIN(21, 14, 0); // P0.14 -- AREF / UART TX
+
+#endif
+
 // Adafruit Bluefruit nRF52840 Feather Express
 // From https://www.adafruit.com/package_adafruit_index.json
 #if defined (ARDUINO_NRF52840_FEATHER)
@@ -156,55 +184,6 @@
     _FL_DEFPIN(39, 10, 0); // D39 is P0.10 NFC2
 #endif // defined (ARDUINO_NRF52840_METRO)
 
-// Adafruit Circuit Playground Bluefruit
-// From https://www.adafruit.com/package_adafruit_index.json
-#if defined (ARDUINO_NRF52840_CIRCUITPLAY)
-    #if defined(__FASTPIN_ARM_NRF52_VARIANT_FOUND)
-        #error "Cannot define more than one board at a time"
-    #else
-        #define __FASTPIN_ARM_NRF52_VARIANT_FOUND
-    #endif
-    #warning "Adafruit Circuit Playground Bluefruit is an untested board -- test and let use know your results via https://github.com/FastLED/FastLED/issues"
-
-    _FL_DEFPIN( 0, 30, 0); // D0  is P0.30 (GPIO D0  / A6 / UART RX)
-    _FL_DEFPIN( 1, 24, 0); // D1  is P0.14 (GPIO D1  / A7 / UART TX)
-    _FL_DEFPIN( 2,  5, 0); // D2  is P0.05 (GPIO D2  / A5 / SDA)
-    _FL_DEFPIN( 3,  4, 0); // D3  is P0.04 (GPIO D3  / A4 / SCL)
-    _FL_DEFPIN( 4,  2, 1); // D4  is P1.02 (GPIO D4  / Left Button)
-    _FL_DEFPIN( 5, 15, 1); // D5  is P1.15 (GPIO D5  / Right button)
-    _FL_DEFPIN( 6,  2, 0); // D6  is P0.02 (GPIO D6  / A1)
-    _FL_DEFPIN( 7,  6, 1); // D7  is P1.06 (GPIO D7  / Slide Switch)
-    _FL_DEFPIN( 8, 13, 0); // D8  is P0.13 (GPIO D8  / NeoPixels)
-    _FL_DEFPIN( 9, 29, 0); // D9  is P0.29 (GPIO D9  / A2)
-    _FL_DEFPIN(10,  3, 0); // D10 is P0.03 (GPIO D10 / A3)
-    _FL_DEFPIN(11,  4, 1); // D11 is P1.04 (GPIO D11 / Speaker Shutdown)
-    _FL_DEFPIN(12, 26, 0); // D12 is P0.26 (GPIO D12 / A0 / Audio Out)
-    _FL_DEFPIN(13, 14, 1); // D13 is P1.14 (GPIO D13 / Red LED)
-
-    // Yes, these are the same pins, listed a second time
-    // because they are defined as distinct pins in the pin mapping
-    // No, I don't know why this was thought to be a good idea by Hathach.
-    _FL_DEFPIN(14, 26, 0); // A0  is P0.26 (GPIO D12 / A0 / Audio Out)
-    _FL_DEFPIN(15,  2, 0); // A1  is P0.02 (GPIO D6  / A1)
-    _FL_DEFPIN(16, 29, 0); // A2  is P0.29 (GPIO D9  / A2)
-    _FL_DEFPIN(17,  3, 0); // A3  is P0.03 (GPIO D10 / A3)
-    _FL_DEFPIN(18,  4, 0); // A4  is P0.04 (GPIO D3  / A4 / SCL)
-    _FL_DEFPIN(19,  5, 0); // A5  is P0.05 (GPIO D2  / A5 / SDA)
-    _FL_DEFPIN(20, 30, 0); // A6  is P0.30 (GPIO D0  / A6 / UART RX)
-    _FL_DEFPIN(21, 24, 0); // A7  is P0.14 (GPIO D1  / A7 / UART TX)
-
-    // Other pins
-    _FL_DEFPIN(22, 28, 0); // A8  is P0.28 (Light Sensor)
-    _FL_DEFPIN(23, 31, 0); // A9  is P0.31 (Thermistor Sensor)
-    _FL_DEFPIN(24, 16, 0); //     is P0.16 (PDM DAT)
-    _FL_DEFPIN(25, 17, 0); //     is P0.17 (PDM CLK)
-    _FL_DEFPIN(26, 12, 1); //     is P1.12 (LIS SCL)
-    _FL_DEFPIN(27, 13, 1); //     is P1.13 (LIS IRQ)
-    _FL_DEFPIN(28, 10, 1); //     is P1.10 (LIS SDA)
-    _FL_DEFPIN(36,  6, 0); // D36 is P0.06 (?? Neopixel  / Sensor switch ??)
-#endif // defined (ARDUINO_NRF52840_CIRCUITPLAY)
-
-
 // Adafruit Bluefruit on nRF52840DK PCA10056
 // From https://www.adafruit.com/package_adafruit_index.json
 #if defined (ARDUINO_NRF52840_PCA10056)

From abef10848893981de1f7988001aa9410ab914758 Mon Sep 17 00:00:00 2001
From: Henry Gabryjelski <henrygab@users.noreply.github.com>
Date: Wed, 27 May 2020 17:46:31 -0700
Subject: [PATCH 160/204] Tested all basic pin functionality

---
 .../arm/nrf52/fastpin_arm_nrf52_variants.h    | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
index a59592e9a2..9020655c8a 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
@@ -65,15 +65,22 @@
     // Use PIN_NEOPIXEL (D8) for the ten built-in neopixels
     _FL_DEFPIN( 8, 13, 0); // P0.13 -- D8 / Neopixels
 
-    // Use PIN_A0 .. PIN_A7 for alligator-clip connectors
-    _FL_DEFPIN(14, 26, 0); // P0.26 -- A0 / Audio Out
-    _FL_DEFPIN(15,  2, 0); // P0.02 -- A1
-    _FL_DEFPIN(16, 29, 0); // P0.29 -- A2
-    _FL_DEFPIN(17,  3, 0); // P0.03 -- A3
-    _FL_DEFPIN(18,  4, 0); // P0.04 -- A4   / SCL
-    _FL_DEFPIN(19,  5, 0); // P0.05 -- A5   / SDA
-    _FL_DEFPIN(20, 30, 0); // P0.30 -- A6   / UART RX
-    _FL_DEFPIN(21, 14, 0); // P0.14 -- AREF / UART TX
+    // PIN_A0 is connect to an amplifier, and thus *might*
+    // not be suitable for use with FastLED.
+    // Do not enable this pin until can confirm
+    // signal integrity from this pin.
+    //
+    // NOTE: it might also be possible if first disable
+    //       the amp using D11 ("speaker shutdown" pin)
+    //
+    // _FL_DEFPIN(14, 26, 0); // P0.26 -- A0   / D12  / Audio Out
+    _FL_DEFPIN(15,  2, 0);    // P0.02 -- A1   /  D6
+    _FL_DEFPIN(16, 29, 0);    // P0.29 -- A2   /  D9
+    _FL_DEFPIN(17,  3, 0);    // P0.03 -- A3   / D10
+    _FL_DEFPIN(18,  4, 0);    // P0.04 -- A4   /  D3  / SCL
+    _FL_DEFPIN(19,  5, 0);    // P0.05 -- A5   /  D2  / SDA
+    _FL_DEFPIN(20, 30, 0);    // P0.30 -- A6   /  D0  / UART RX
+    _FL_DEFPIN(21, 14, 0);    // P0.14 -- AREF /  D1  / UART TX
 
 #endif
 

From e3eb2e25e223089ea18550bf53b9f8bebac85b4e Mon Sep 17 00:00:00 2001
From: Martin Falatic <martin@falatic.com>
Date: Sat, 30 May 2020 01:14:00 -0700
Subject: [PATCH 161/204] Update to work with Doxygen 1.8.18; update .gitignore

---
 .gitignore    |   4 +-
 docs/Doxyfile | 591 +++++++++++++++++++++++++++++++++-----------------
 2 files changed, 395 insertions(+), 200 deletions(-)

diff --git a/.gitignore b/.gitignore
index 60b7a717bd..54cd17df0f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
-html/
 *.gch
+/.vscode
+/docs/html
+/docs/latex
diff --git a/docs/Doxyfile b/docs/Doxyfile
index eb300236d6..25e4f92e10 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.8
+# Doxyfile 1.8.18
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -17,11 +17,11 @@
 # Project related configuration options
 #---------------------------------------------------------------------------
 
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
 # The default value is: UTF-8.
 
 DOXYFILE_ENCODING      = UTF-8
@@ -32,13 +32,13 @@ DOXYFILE_ENCODING      = UTF-8
 # title of most generated pages and in a few other places.
 # The default value is: My Project.
 
-PROJECT_NAME           = "FastLED"
+PROJECT_NAME           = FastLED
 
 # The PROJECT_NUMBER tag can be used to enter a project or revision number. This
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 3.1
+PROJECT_NUMBER         = 3.3.3
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -46,10 +46,10 @@ PROJECT_NUMBER         = 3.1
 
 PROJECT_BRIEF          =
 
-# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
-# the documentation. The maximum height of the logo should not exceed 55 pixels
-# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
-# to the output directory.
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
 
 PROJECT_LOGO           =
 
@@ -58,9 +58,9 @@ PROJECT_LOGO           =
 # entered, it will be relative to the location where doxygen was started. If
 # left blank the current directory will be used.
 
-OUTPUT_DIRECTORY       =
+OUTPUT_DIRECTORY       = ../docs
 
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
 # directories (in 2 levels) under the output directory of each output format and
 # will distribute the generated files over these directories. Enabling this
 # option can be useful when feeding doxygen a huge amount of source files, where
@@ -93,14 +93,22 @@ ALLOW_UNICODE_NAMES    = NO
 
 OUTPUT_LANGUAGE        = English
 
-# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
 # The default value is: YES.
 
 BRIEF_MEMBER_DESC      = YES
 
-# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
 # description of a member or function before the detailed description
 #
 # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
@@ -119,7 +127,6 @@ REPEAT_BRIEF           = YES
 # specifies, contains, represents, a, an and the.
 
 ABBREVIATE_BRIEF       =
-
 # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
 # doxygen will generate a detailed section even if there is only a brief
 # description.
@@ -135,7 +142,7 @@ ALWAYS_DETAILED_SEC    = NO
 
 INLINE_INHERITED_MEMB  = NO
 
-# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
 # before files name in the file list and in the header files. If set to NO the
 # shortest path that makes the file name unique will be used
 # The default value is: YES.
@@ -179,6 +186,16 @@ SHORT_NAMES            = NO
 
 JAVADOC_AUTOBRIEF      = YES
 
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = NO
+
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If
 # set to NO, the Qt-style will behave just like regular Qt-style comments (thus
@@ -205,9 +222,9 @@ MULTILINE_CPP_IS_BRIEF = NO
 
 INHERIT_DOCS           = YES
 
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
-# new page for each member. If set to NO, the documentation of a member will be
-# part of the file/class/namespace that contains it.
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
 # The default value is: NO.
 
 SEPARATE_MEMBER_PAGES  = NO
@@ -226,16 +243,15 @@ TAB_SIZE               = 4
 # will allow you to put the command \sideeffect (or @sideeffect) in the
 # documentation, which will result in a user-defined paragraph with heading
 # "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or use
+# a double escape (\\{ and \\})
 
 ALIASES                =
 
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
 # only. Doxygen will then generate output that is more tailored for C. For
 # instance, some of the names that are used will be different. The list of all
@@ -264,19 +280,28 @@ OPTIMIZE_FOR_FORTRAN   = NO
 
 OPTIMIZE_OUTPUT_VHDL   = NO
 
+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
 # Doxygen selects the parser to use depending on the extension of the files it
 # parses. With this tag you can assign which parser to use for a given
 # extension. Doxygen has a built-in mapping, but you can override or extend it
 # using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
-# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
-# Fortran. In the later case the parser tries to guess whether the code is fixed
-# or free formatted code, this is the default for Fortran type files), VHDL. For
-# instance to make doxygen treat .inc files as Fortran files (default is PHP),
-# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL,
+# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
 #
-# Note For files without extension you can use no_extension as a placeholder.
+# Note: For files without extension you can use no_extension as a placeholder.
 #
 # Note that for custom extensions you also need to set FILE_PATTERNS otherwise
 # the files are not read by doxygen.
@@ -285,7 +310,7 @@ EXTENSION_MAPPING      =
 
 # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
 # according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
 # The output of markdown processing is further processed by doxygen, so you can
 # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
 # case of backward compatibilities issues.
@@ -293,10 +318,19 @@ EXTENSION_MAPPING      =
 
 MARKDOWN_SUPPORT       = YES
 
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 5.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 5
+
 # When enabled doxygen tries to link words that correspond to documented
 # classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by by putting a % sign in front of the word
-# or globally by setting AUTOLINK_SUPPORT to NO.
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
 # The default value is: YES.
 
 AUTOLINK_SUPPORT       = YES
@@ -318,7 +352,7 @@ BUILTIN_STL_SUPPORT    = NO
 CPP_CLI_SUPPORT        = NO
 
 # Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
 # will parse them like normal C++ but will assume all classes use public instead
 # of private inheritance when no explicit protection keyword is present.
 # The default value is: NO.
@@ -336,13 +370,20 @@ SIP_SUPPORT            = NO
 IDL_PROPERTY_SUPPORT   = YES
 
 # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
+# tag is set to YES then doxygen will reuse the documentation of the first
 # member in the group (if any) for the other members of the group. By default
 # all members of a group must be documented explicitly.
 # The default value is: NO.
 
 DISTRIBUTE_GROUP_DOC   = NO
 
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
 # Set the SUBGROUPING tag to YES to allow class member groups of the same type
 # (for instance a group of public functions) to be put as a subgroup of that
 # type (e.g. under the Public Functions section). Set it to NO to prevent
@@ -401,7 +442,7 @@ LOOKUP_CACHE_SIZE      = 0
 # Build related configuration options
 #---------------------------------------------------------------------------
 
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
 # documentation are documented, even if no documentation was available. Private
 # class members and static file members will be hidden unless the
 # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
@@ -411,35 +452,41 @@ LOOKUP_CACHE_SIZE      = 0
 
 EXTRACT_ALL            = NO
 
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
 # be included in the documentation.
 # The default value is: NO.
 
 EXTRACT_PRIVATE        = NO
 
-# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
 # scope will be included in the documentation.
 # The default value is: NO.
 
 EXTRACT_PACKAGE        = NO
 
-# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
 # included in the documentation.
 # The default value is: NO.
 
 EXTRACT_STATIC         = NO
 
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
 # only classes defined in header files are included. Does not have any effect
 # for Java sources.
 # The default value is: YES.
 
 EXTRACT_LOCAL_CLASSES  = YES
 
-# This flag is only useful for Objective-C code. When set to YES local methods,
+# This flag is only useful for Objective-C code. If set to YES, local methods,
 # which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO only methods in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
 # included.
 # The default value is: NO.
 
@@ -464,21 +511,21 @@ HIDE_UNDOC_MEMBERS     = NO
 
 # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
 # undocumented classes that are normally visible in the class hierarchy. If set
-# to NO these classes will be included in the various overviews. This option has
-# no effect if EXTRACT_ALL is enabled.
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
 # The default value is: NO.
 
 HIDE_UNDOC_CLASSES     = NO
 
 # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO these declarations will be
-# included in the documentation.
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
 # The default value is: NO.
 
 HIDE_FRIEND_COMPOUNDS  = NO
 
 # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO these
+# documentation blocks found inside the body of a function. If set to NO, these
 # blocks will be appended to the function's detailed documentation block.
 # The default value is: NO.
 
@@ -492,21 +539,28 @@ HIDE_IN_BODY_DOCS      = NO
 INTERNAL_DOCS          = NO
 
 # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES upper-case letters are also
+# names in lower-case letters. If set to YES, upper-case letters are also
 # allowed. This is useful if you have classes or files whose names only differ
 # in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
+# (including Cygwin) ands Mac users are advised to set this option to NO.
 # The default value is: system dependent.
 
 CASE_SENSE_NAMES       = NO
 
 # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES the
+# their full class and namespace scopes in the documentation. If set to YES, the
 # scope will be hidden.
 # The default value is: NO.
 
 HIDE_SCOPE_NAMES       = NO
 
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
 # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
 # the files that are included by a file in the documentation of that file.
 # The default value is: YES.
@@ -534,14 +588,14 @@ INLINE_INFO            = YES
 
 # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
 # (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO the members will appear in declaration order.
+# name. If set to NO, the members will appear in declaration order.
 # The default value is: YES.
 
 SORT_MEMBER_DOCS       = YES
 
 # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
 # descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO the members will appear in declaration order. Note that
+# name. If set to NO, the members will appear in declaration order. Note that
 # this will also influence the order of the classes in the class list.
 # The default value is: NO.
 
@@ -586,27 +640,25 @@ SORT_BY_SCOPE_NAME     = NO
 
 STRICT_PROTO_MATCHING  = NO
 
-# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
-# todo list. This list is created by putting \todo commands in the
-# documentation.
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
 # The default value is: YES.
 
 GENERATE_TODOLIST      = YES
 
-# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
-# test list. This list is created by putting \test commands in the
-# documentation.
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
 # The default value is: YES.
 
 GENERATE_TESTLIST      = YES
 
-# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
 # list. This list is created by putting \bug commands in the documentation.
 # The default value is: YES.
 
 GENERATE_BUGLIST       = YES
 
-# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
 # the deprecated list. This list is created by putting \deprecated commands in
 # the documentation.
 # The default value is: YES.
@@ -631,8 +683,8 @@ ENABLED_SECTIONS       =
 MAX_INITIALIZER_LINES  = 30
 
 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
-# the bottom of the documentation of classes and structs. If set to YES the list
-# will mention the files that were used to generate the documentation.
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
 # The default value is: YES.
 
 SHOW_USED_FILES        = YES
@@ -677,7 +729,7 @@ LAYOUT_FILE            =
 # The CITE_BIB_FILES tag can be used to specify one or more bib files containing
 # the reference definitions. This must be a list of .bib files. The .bib
 # extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
 # For LaTeX the style of the bibliography can be controlled using
 # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
 # search path. See also \cite for info how to create references.
@@ -696,7 +748,7 @@ CITE_BIB_FILES         =
 QUIET                  = NO
 
 # The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
 # this implies that the warnings are on.
 #
 # Tip: Turn warnings on while writing the documentation.
@@ -704,7 +756,7 @@ QUIET                  = NO
 
 WARNINGS               = YES
 
-# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
 # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
 # will automatically be disabled.
 # The default value is: YES.
@@ -721,12 +773,19 @@ WARN_IF_DOC_ERROR      = YES
 
 # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
 # are documented, but have no documentation for their parameters or return
-# value. If set to NO doxygen will only warn about wrong or incomplete parameter
-# documentation, but not about the absence of documentation.
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation. If
+# EXTRACT_ALL is set to YES then this flag will automatically be disabled.
 # The default value is: NO.
 
 WARN_NO_PARAMDOC       = NO
 
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
 # The WARN_FORMAT tag determines the format of the warning messages that doxygen
 # can produce. The string should contain the $file, $line, and $text tags, which
 # will be replaced by the file and line number from which the warning originated
@@ -750,15 +809,15 @@ WARN_LOGFILE           =
 # The INPUT tag is used to specify the files and/or directories that contain
 # documented source files. You may enter file names like myfile.cpp or
 # directories like /usr/src/myproject. Separate the files or directories with
-# spaces.
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = . lib8tion 
+INPUT                  = ../ ../lib8tion
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
 # libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: http://www.gnu.org/software/libiconv) for the list of
+# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
 # possible encodings.
 # The default value is: UTF-8.
 
@@ -766,15 +825,21 @@ INPUT_ENCODING         = UTF-8
 
 # If the value of the INPUT tag contains directories, you can use the
 # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank the
-# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
-# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
-# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
-# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
-# *.qsf, *.as and *.js.
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment),
+# *.doc (to be provided as doxygen C comment), *.txt (to be provided as doxygen
+# C comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
+# *.vhdl, *.ucf, *.qsf and *.ice.
 
 FILE_PATTERNS          =
-
 # The RECURSIVE tag can be used to specify whether or not subdirectories should
 # be searched for input files as well.
 # The default value is: NO.
@@ -788,7 +853,7 @@ RECURSIVE              = NO
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                = M0-clocklessnotes.md TODO.md 
+EXCLUDE                =
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@@ -857,6 +922,10 @@ IMAGE_PATH             =
 # Note that the filter must not add or remove lines; it is applied before the
 # code is scanned, but not when the output code is generated. If lines are added
 # or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
 
 INPUT_FILTER           =
 
@@ -866,11 +935,15 @@ INPUT_FILTER           =
 # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
 # filters are used. If the FILTER_PATTERNS tag is empty or if none of the
 # patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
 
 FILTER_PATTERNS        =
 
 # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER ) will also be used to filter the input files that are used for
+# INPUT_FILTER) will also be used to filter the input files that are used for
 # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
 # The default value is: NO.
 
@@ -918,7 +991,7 @@ INLINE_SOURCES         = NO
 STRIP_CODE_COMMENTS    = YES
 
 # If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# function all documented functions referencing it will be listed.
+# entity all documented functions referencing it will be listed.
 # The default value is: NO.
 
 REFERENCED_BY_RELATION = NO
@@ -930,7 +1003,7 @@ REFERENCED_BY_RELATION = NO
 REFERENCES_RELATION    = NO
 
 # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
-# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
 # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
 # link to the documentation.
 # The default value is: YES.
@@ -950,12 +1023,12 @@ SOURCE_TOOLTIPS        = YES
 # If the USE_HTAGS tag is set to YES then the references to source code will
 # point to the HTML generated by the htags(1) tool instead of doxygen built-in
 # source browser. The htags tool is part of GNU's global source tagging system
-# (see http://www.gnu.org/software/global/global.html). You will need version
+# (see https://www.gnu.org/software/global/global.html). You will need version
 # 4.8.6 or higher.
 #
 # To use it do the following:
 # - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
 # - Make sure the INPUT points to the root of the source tree
 # - Run doxygen as normal
 #
@@ -977,6 +1050,35 @@ USE_HTAGS              = NO
 
 VERBATIM_HEADERS       = YES
 
+# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the
+# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the
+# cost of reduced performance. This can be particularly helpful with template
+# rich C++ code for which doxygen's built-in parser lacks the necessary type
+# information.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+# The default value is: NO.
+
+CLANG_ASSISTED_PARSING = NO
+
+# If clang assisted parsing is enabled you can provide the compiler with command
+# line options that you would normally use when invoking the compiler. Note that
+# the include paths will already be set by doxygen for the files and directories
+# specified with INPUT and INCLUDE_PATH.
+# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES.
+
+CLANG_OPTIONS          =
+
+# If clang assisted parsing is enabled you can provide the clang parser with the
+# path to the compilation database (see:
+# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files
+# were built. This is equivalent to specifying the "-p" option to a clang tool,
+# such as clang-check. These options will then be passed to the parser.
+# Note: The availability of this option depends on whether or not doxygen was
+# generated with the -Duse_libclang=ON option for CMake.
+
+CLANG_DATABASE_PATH    =
+
 #---------------------------------------------------------------------------
 # Configuration options related to the alphabetical class index
 #---------------------------------------------------------------------------
@@ -1007,7 +1109,7 @@ IGNORE_PREFIX          =
 # Configuration options related to the HTML output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
 # The default value is: YES.
 
 GENERATE_HTML          = YES
@@ -1018,7 +1120,7 @@ GENERATE_HTML          = YES
 # The default directory is: html.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_OUTPUT            = html/docs/3.1
+HTML_OUTPUT            = html
 
 # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
 # generated HTML page (for example: .htm, .php, .asp).
@@ -1073,10 +1175,10 @@ HTML_STYLESHEET        =
 # cascading style sheets that are included after the standard style sheets
 # created by doxygen. Using this option one can overrule certain style aspects.
 # This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefor more robust against future updates.
+# standard style sheet and is therefore more robust against future updates.
 # Doxygen will copy the style sheet files to the output directory.
-# Note: The order of the extra stylesheet files is of importance (e.g. the last
-# stylesheet in the list overrules the setting of the previous ones in the
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
 # list). For an example see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
@@ -1093,9 +1195,9 @@ HTML_EXTRA_STYLESHEET  =
 HTML_EXTRA_FILES       =
 
 # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
-# will adjust the colors in the stylesheet and background images according to
+# will adjust the colors in the style sheet and background images according to
 # this color. Hue is specified as an angle on a colorwheel, see
-# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
 # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
 # purple, and 360 is red again.
 # Minimum value: 0, maximum value: 359, default value: 220.
@@ -1124,12 +1226,24 @@ HTML_COLORSTYLE_GAMMA  = 80
 
 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
 # page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: YES.
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 HTML_TIMESTAMP         = YES
 
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
 # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 # documentation will contain sections that can be hidden and shown after the
 # page has loaded.
@@ -1153,13 +1267,13 @@ HTML_INDEX_NUM_ENTRIES = 100
 
 # If the GENERATE_DOCSET tag is set to YES, additional index files will be
 # generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: http://developer.apple.com/tools/xcode/), introduced with
-# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# environment (see: https://developer.apple.com/xcode/), introduced with OSX
+# 10.5 (Leopard). To create a documentation set, doxygen will generate a
 # Makefile in the HTML output directory. Running make will produce the docset in
 # that directory and running make install will install the docset in
 # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
@@ -1198,7 +1312,7 @@ DOCSET_PUBLISHER_NAME  = Publisher
 # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
 # additional HTML index files: index.hhp, index.hhc, and index.hhk. The
 # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on
 # Windows.
 #
 # The HTML Help Workshop contains a compiler that can convert all HTML output
@@ -1221,28 +1335,28 @@ GENERATE_HTMLHELP      = NO
 CHM_FILE               =
 
 # The HHC_LOCATION tag can be used to specify the location (absolute path
-# including file name) of the HTML help compiler ( hhc.exe). If non-empty
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
 # doxygen will try to run the HTML help compiler on the generated index.hhp.
 # The file has to be specified with full path.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
 HHC_LOCATION           =
 
-# The GENERATE_CHI flag controls if a separate .chi index file is generated (
-# YES) or that it should be included in the master .chm file ( NO).
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
 GENERATE_CHI           = NO
 
-# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
 # and project file content.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
 
 CHM_INDEX_ENCODING     =
 
-# The BINARY_TOC flag controls whether a binary table of contents is generated (
-# YES) or a normal table of contents ( NO) in the .chm file. Furthermore it
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
 # enables the Previous and Next buttons.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTMLHELP is set to YES.
@@ -1274,7 +1388,7 @@ QCH_FILE               =
 
 # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
 # Project output. For more information please see Qt Help Project / Namespace
-# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
+# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
 # The default value is: org.doxygen.Project.
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1282,7 +1396,7 @@ QHP_NAMESPACE          = org.doxygen.Project
 
 # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
 # Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
+# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-
 # folders).
 # The default value is: doc.
 # This tag requires that the tag GENERATE_QHP is set to YES.
@@ -1291,7 +1405,7 @@ QHP_VIRTUAL_FOLDER     = doc
 
 # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
 # filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
 # filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1299,7 +1413,7 @@ QHP_CUST_FILTER_NAME   =
 
 # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
 # custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
+# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-
 # filters).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
@@ -1307,7 +1421,7 @@ QHP_CUST_FILTER_ATTRS  =
 
 # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
 # project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
 # This tag requires that the tag GENERATE_QHP is set to YES.
 
 QHP_SECT_FILTER_ATTRS  =
@@ -1356,13 +1470,13 @@ DISABLE_INDEX          = NO
 # index structure (just like the one that is generated for HTML Help). For this
 # to work a browser that supports JavaScript, DHTML, CSS and frames is required
 # (i.e. any modern browser). Windows users are probably better off using the
-# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
 # further fine-tune the look of the index. As an example, the default style
 # sheet generated by doxygen has an example that shows how to put an image at
 # the root of the tree instead of the PROJECT_NAME. Since the tree basically has
 # the same information as the tab index, you could consider setting
 # DISABLE_INDEX to YES when enabling this option.
-# The default value is: .
+# The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 GENERATE_TREEVIEW      = YES
@@ -1384,13 +1498,24 @@ ENUM_VALUES_PER_LINE   = 4
 
 TREEVIEW_WIDTH         = 250
 
-# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
 # external symbols imported via tag files in a separate window.
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 EXT_LINKS_IN_WINDOW    = NO
 
+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png The default and svg Looks nicer but requires the
+# pdf2svg tool.
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
 # Use this tag to change the font size of LaTeX formulas included as images in
 # the HTML documentation. When you change the font size after a successful
 # doxygen run you need to manually remove any form_*.png images from the HTML
@@ -1400,7 +1525,7 @@ EXT_LINKS_IN_WINDOW    = NO
 
 FORMULA_FONTSIZE       = 10
 
-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
 # generated for formulas are transparent PNGs. Transparent PNGs are not
 # supported properly for IE 6.0, but are supported on all modern browsers.
 #
@@ -1411,9 +1536,15 @@ FORMULA_FONTSIZE       = 10
 
 FORMULA_TRANSPARENT    = YES
 
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
 # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# http://www.mathjax.org) which uses client side Javascript for the rendering
-# instead of using prerendered bitmaps. Use this if you do not have LaTeX
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
 # installed or if you want to formulas look prettier in the HTML output. When
 # enabled you may also need to install MathJax separately and configure the path
 # to it using the MATHJAX_RELPATH option.
@@ -1439,11 +1570,11 @@ MATHJAX_FORMAT         = HTML-CSS
 # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
 # Content Delivery Network so you can quickly see the result without installing
 # MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from http://www.mathjax.org before deployment.
-# The default value is: http://cdn.mathjax.org/mathjax/latest.
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2.
 # This tag requires that the tag USE_MATHJAX is set to YES.
 
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
+MATHJAX_RELPATH        = https://cdn.jsdelivr.net/npm/mathjax@2
 
 # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
 # extension names that should be enabled during MathJax rendering. For example
@@ -1482,7 +1613,7 @@ MATHJAX_CODEFILE       =
 SEARCHENGINE           = YES
 
 # When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using Javascript. There
+# implemented using a web server instead of a web client using JavaScript. There
 # are two flavors of web server based searching depending on the EXTERNAL_SEARCH
 # setting. When disabled, doxygen will generate a PHP script for searching and
 # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
@@ -1499,9 +1630,9 @@ SERVER_BASED_SEARCH    = NO
 # external search engine pointed to by the SEARCHENGINE_URL option to obtain the
 # search results.
 #
-# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/).
+# Xapian (see: https://xapian.org/).
 #
 # See the section "External Indexing and Searching" for details.
 # The default value is: NO.
@@ -1512,9 +1643,9 @@ EXTERNAL_SEARCH        = NO
 # The SEARCHENGINE_URL should point to a search engine hosted by a web server
 # which will return the search results when EXTERNAL_SEARCH is enabled.
 #
-# Doxygen ships with an example indexer ( doxyindexer) and search engine
+# Doxygen ships with an example indexer (doxyindexer) and search engine
 # (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/). See the section "External Indexing and
+# Xapian (see: https://xapian.org/). See the section "External Indexing and
 # Searching" for details.
 # This tag requires that the tag SEARCHENGINE is set to YES.
 
@@ -1550,7 +1681,7 @@ EXTRA_SEARCH_MAPPINGS  =
 # Configuration options related to the LaTeX output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
 # The default value is: YES.
 
 GENERATE_LATEX         = YES
@@ -1566,22 +1697,36 @@ LATEX_OUTPUT           = latex
 # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
 # invoked.
 #
-# Note that when enabling USE_PDFLATEX this option is only used for generating
-# bitmaps for formulas in the HTML output, but not in the Makefile that is
-# written to the output directory.
-# The default file is: latex.
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_CMD_NAME         = latex
 
 # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
 # index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
 # The default file is: makeindex.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 MAKEINDEX_CMD_NAME     = makeindex
 
-# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
 # documents. This may be useful for small projects and may help to save some
 # trees in general.
 # The default value is: NO.
@@ -1599,9 +1744,12 @@ COMPACT_LATEX          = NO
 PAPER_TYPE             = a4
 
 # The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
-# that should be included in the LaTeX output. To get the times font for
-# instance you can specify
-# EXTRA_PACKAGES=times
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
 # If left blank no extra packages will be included.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
@@ -1616,9 +1764,9 @@ EXTRA_PACKAGES         =
 # Note: Only use a user-defined header if you know what you are doing! The
 # following commands have a special meaning inside the header: $title,
 # $datetime, $date, $doxygenversion, $projectname, $projectnumber,
-# $projectbrief, $projectlogo. Doxygen will replace $title with the empy string,
-# for the replacement values of the other commands the user is refered to
-# HTML_HEADER.
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_HEADER           =
@@ -1634,6 +1782,17 @@ LATEX_HEADER           =
 
 LATEX_FOOTER           =
 
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
 # The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the LATEX_OUTPUT output
 # directory. Note that the files will be copied as-is; there are no commands or
@@ -1652,7 +1811,7 @@ LATEX_EXTRA_FILES      =
 PDF_HYPERLINKS         = YES
 
 # If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES to get a
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
 # higher quality PDF documentation.
 # The default value is: YES.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
@@ -1687,17 +1846,33 @@ LATEX_SOURCE_CODE      = NO
 
 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
-# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
 # The default value is: plain.
 # This tag requires that the tag GENERATE_LATEX is set to YES.
 
 LATEX_BIB_STYLE        = plain
 
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
 #---------------------------------------------------------------------------
 # Configuration options related to the RTF output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
 # RTF output is optimized for Word 97 and may not look too pretty with other RTF
 # readers/editors.
 # The default value is: NO.
@@ -1712,7 +1887,7 @@ GENERATE_RTF           = NO
 
 RTF_OUTPUT             = rtf
 
-# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
 # documents. This may be useful for small projects and may help to save some
 # trees in general.
 # The default value is: NO.
@@ -1732,9 +1907,9 @@ COMPACT_RTF            = NO
 
 RTF_HYPERLINKS         = NO
 
-# Load stylesheet definitions from file. Syntax is similar to doxygen's config
-# file, i.e. a series of assignments. You only have to provide replacements,
-# missing definitions are set to their default value.
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
 #
 # See also section "Doxygen usage" for information on how to generate the
 # default style sheet that doxygen normally uses.
@@ -1743,17 +1918,27 @@ RTF_HYPERLINKS         = NO
 RTF_STYLESHEET_FILE    =
 
 # Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's config file. A template extensions file can be generated
-# using doxygen -e rtf extensionFile.
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
 # This tag requires that the tag GENERATE_RTF is set to YES.
 
 RTF_EXTENSIONS_FILE    =
 
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE        = NO
+
 #---------------------------------------------------------------------------
 # Configuration options related to the man page output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
 # classes and files.
 # The default value is: NO.
 
@@ -1797,7 +1982,7 @@ MAN_LINKS              = NO
 # Configuration options related to the XML output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
 # captures the structure of the code including all documentation.
 # The default value is: NO.
 
@@ -1811,7 +1996,7 @@ GENERATE_XML           = NO
 
 XML_OUTPUT             = xml
 
-# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
 # listings (including syntax highlighting and cross-referencing information) to
 # the XML output. Note that enabling this will significantly increase the size
 # of the XML output.
@@ -1820,11 +2005,18 @@ XML_OUTPUT             = xml
 
 XML_PROGRAMLISTING     = YES
 
+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
 #---------------------------------------------------------------------------
 # Configuration options related to the DOCBOOK output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
 # that can be used to generate PDF.
 # The default value is: NO.
 
@@ -1838,7 +2030,7 @@ GENERATE_DOCBOOK       = NO
 
 DOCBOOK_OUTPUT         = docbook
 
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES doxygen will include the
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
 # program listings (including syntax highlighting and cross-referencing
 # information) to the DOCBOOK output. Note that enabling this will significantly
 # increase the size of the DOCBOOK output.
@@ -1851,10 +2043,10 @@ DOCBOOK_PROGRAMLISTING = NO
 # Configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
-# Definitions (see http://autogen.sf.net) file that captures the structure of
-# the code including all documentation. Note that this feature is still
-# experimental and incomplete at the moment.
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
 # The default value is: NO.
 
 GENERATE_AUTOGEN_DEF   = NO
@@ -1863,7 +2055,7 @@ GENERATE_AUTOGEN_DEF   = NO
 # Configuration options related to the Perl module output
 #---------------------------------------------------------------------------
 
-# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
 # file that captures the structure of the code including all documentation.
 #
 # Note that this feature is still experimental and incomplete at the moment.
@@ -1871,7 +2063,7 @@ GENERATE_AUTOGEN_DEF   = NO
 
 GENERATE_PERLMOD       = NO
 
-# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
 # Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
 # output from the Perl module output.
 # The default value is: NO.
@@ -1879,9 +2071,9 @@ GENERATE_PERLMOD       = NO
 
 PERLMOD_LATEX          = NO
 
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
 # formatted so it can be parsed by a human reader. This is useful if you want to
-# understand what is going on. On the other hand, if this tag is set to NO the
+# understand what is going on. On the other hand, if this tag is set to NO, the
 # size of the Perl module output will be much smaller and Perl will parse it
 # just the same.
 # The default value is: YES.
@@ -1901,14 +2093,14 @@ PERLMOD_MAKEVAR_PREFIX =
 # Configuration options related to the preprocessor
 #---------------------------------------------------------------------------
 
-# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
 # C-preprocessor directives found in the sources and include files.
 # The default value is: YES.
 
-ENABLE_PREPROCESSING   = NO
+ENABLE_PREPROCESSING   = YES
 
-# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
-# in the source code. If set to NO only conditional compilation will be
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
 # performed. Macro expansion can be done in a controlled way by setting
 # EXPAND_ONLY_PREDEF to YES.
 # The default value is: NO.
@@ -1924,7 +2116,7 @@ MACRO_EXPANSION        = NO
 
 EXPAND_ONLY_PREDEF     = NO
 
-# If the SEARCH_INCLUDES tag is set to YES the includes files in the
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
 # INCLUDE_PATH will be searched if a #include is found.
 # The default value is: YES.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
@@ -2000,37 +2192,32 @@ TAGFILES               =
 
 GENERATE_TAGFILE       =
 
-# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
-# class index. If set to NO only the inherited external classes will be listed.
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
 # The default value is: NO.
 
 ALLEXTERNALS           = NO
 
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
-# the modules index. If set to NO, only the current project's groups will be
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
 # listed.
 # The default value is: YES.
 
 EXTERNAL_GROUPS        = YES
 
-# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
 # the related pages index. If set to NO, only the current project's pages will
 # be listed.
 # The default value is: YES.
 
 EXTERNAL_PAGES         = YES
 
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH              = /usr/bin/perl
-
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
 # (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
 # NO turns the diagrams off. Note that this option also works with HAVE_DOT
 # disabled, but it is recommended to install and use dot, since it yields more
@@ -2039,15 +2226,6 @@ PERL_PATH              = /usr/bin/perl
 
 CLASS_DIAGRAMS         = YES
 
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
 # You can include diagrams made with dia in doxygen documentation. Doxygen will
 # then run dia to produce the diagram and insert it in the documentation. The
 # DIA_PATH tag allows you to specify the directory where the dia binary resides.
@@ -2055,7 +2233,7 @@ MSCGEN_PATH            =
 
 DIA_PATH               =
 
-# If set to YES, the inheritance and collaboration graphs will hide inheritance
+# If set to YES the inheritance and collaboration graphs will hide inheritance
 # and usage relations if the target is undocumented or is not a class.
 # The default value is: YES.
 
@@ -2068,7 +2246,7 @@ HIDE_UNDOC_RELATIONS   = YES
 # set to NO
 # The default value is: NO.
 
-HAVE_DOT               = YES
+HAVE_DOT               = NO
 
 # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
 # to run in parallel. When set to 0 doxygen will base this on the number of
@@ -2128,7 +2306,7 @@ COLLABORATION_GRAPH    = YES
 
 GROUP_GRAPHS           = YES
 
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
 # collaboration diagrams in a style similar to the OMG's Unified Modeling
 # Language.
 # The default value is: NO.
@@ -2180,7 +2358,8 @@ INCLUDED_BY_GRAPH      = YES
 #
 # Note that enabling this option will significantly increase the time of a run.
 # So in most cases it will be better to enable call graphs for selected
-# functions only using the \callgraph command.
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2191,7 +2370,8 @@ CALL_GRAPH             = NO
 #
 # Note that enabling this option will significantly increase the time of a run.
 # So in most cases it will be better to enable caller graphs for selected
-# functions only using the \callergraph command.
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
 # The default value is: NO.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2214,11 +2394,15 @@ GRAPHICAL_HIERARCHY    = YES
 DIRECTORY_GRAPH        = YES
 
 # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot.
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
 # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
 # to make the SVG files visible in IE 9+ (other browsers do not have this
 # requirement).
-# Possible values are: png, jpg, gif and svg.
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
 # The default value is: png.
 # This tag requires that the tag HAVE_DOT is set to YES.
 
@@ -2266,10 +2450,19 @@ DIAFILE_DIRS           =
 # PlantUML is not used or called during a preprocessing step. Doxygen will
 # generate a warning when it encounters a \startuml command in this case and
 # will not generate output for the diagram.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 PLANTUML_JAR_PATH      =
 
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
 # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
 # that will be shown in the graph. If the number of nodes in a graph becomes
 # larger than this value, doxygen will truncate the graph, which is visualized
@@ -2306,7 +2499,7 @@ MAX_DOT_GRAPH_DEPTH    = 0
 
 DOT_TRANSPARENT        = NO
 
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
 # files in one run (i.e. multiple -o and -T options on the command line). This
 # makes dot run faster, but since only newer versions of dot (>1.8.10) support
 # this, this feature is disabled by default.
@@ -2323,7 +2516,7 @@ DOT_MULTI_TARGETS      = NO
 
 GENERATE_LEGEND        = YES
 
-# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
 # files that are used to generate the various graphs.
 # The default value is: YES.
 # This tag requires that the tag HAVE_DOT is set to YES.

From 26e818b9642d5bdf8071839b8589d494708bcbe6 Mon Sep 17 00:00:00 2001
From: Martin Falatic <martin@falatic.com>
Date: Thu, 28 May 2020 00:54:19 -0700
Subject: [PATCH 162/204] Code formatting cleanups; markdown corrections

Improve formatting consistency
Fix comment/docstring/markdown typos
Remove unused file
---
 FastLED.cpp                                   |  52 +-
 FastLED.h                                     |  18 +-
 PORTING.md                                    |  13 +-
 bitswap.cpp                                   |  32 +-
 chipsets.h                                    |  10 +-
 color.h                                       | 106 +--
 colorpalettes.cpp                             |  18 +-
 controller.h                                  |  42 +-
 fastled_delay.h                               |   5 +-
 fastpin.h                                     |  27 +-
 fastspi_bitbang.h                             |   1 +
 fastspi_ref.h                                 |   1 +
 fastspi_types.h                               |   6 +-
 lib8tion.h                                    |  42 +-
 lib8tion/math8.h                              | 265 +++---
 lib8tion/scale8.h                             | 425 +++++-----
 lib8tion/trig8.h                              |  32 +-
 noise.cpp                                     | 569 ++++++-------
 pixelset.h                                    | 509 ++++++------
 pixeltypes.h                                  |  38 +-
 platforms/apollo3/clockless_apollo3.h         | 107 ++-
 platforms/apollo3/fastpin_apollo3.h           |  33 +-
 platforms/arm/common/m0clockless.h            | 608 +++++++-------
 platforms/arm/d21/clockless_arm_d21.h         |  82 +-
 platforms/arm/d21/fastpin_arm_d21.h           |  56 +-
 platforms/arm/d51/clockless_arm_d51.h         |  14 +-
 platforms/arm/d51/fastpin_arm_d51.h           |  57 +-
 platforms/arm/k20/clockless_arm_k20.h         |  14 +-
 platforms/arm/k20/clockless_block_arm_k20.h   |   2 +
 platforms/arm/k20/fastspi_arm_k20.h           |  10 +-
 platforms/arm/k20/octows2811_controller.h     |  82 +-
 platforms/arm/k20/smartmatrix_t3.h            |  63 +-
 platforms/arm/k20/ws2812serial_controller.h   |  47 +-
 platforms/arm/k66/clockless_arm_k66.h         |  14 +-
 platforms/arm/k66/clockless_block_arm_k66.h   |  23 +-
 platforms/arm/k66/fastspi_arm_k66.h           |  10 +-
 .../mxrt1062/block_clockless_arm_mxrt1062.h   | 338 ++++----
 .../arm/mxrt1062/clockless_arm_mxrt1062.h     |  27 +-
 platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h |   2 +-
 platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h |  90 +-
 platforms/arm/nrf51/clockless_arm_nrf51.h     | 101 +--
 platforms/arm/nrf51/fastpin_arm_nrf51.h       |  88 +-
 platforms/arm/nrf51/fastspi_arm_nrf51.h       | 247 +++---
 platforms/arm/nrf52/arbiter_nrf52.h           |   1 -
 platforms/arm/nrf52/clockless_arm_nrf52.h     |   1 -
 platforms/arm/nrf52/fastpin_arm_nrf52.h       |  94 +--
 platforms/arm/nrf52/fastspi_arm_nrf52.h       |   1 -
 platforms/arm/sam/clockless_arm_sam.h         |  20 +-
 platforms/arm/sam/clockless_block_arm_sam.h   | 237 +++---
 platforms/arm/sam/fastpin_arm_sam.h           |   1 -
 platforms/arm/stm32/clockless_arm_stm32.h     | 198 ++---
 platforms/arm/stm32/cm3_regs.h                |  62 +-
 platforms/arm/stm32/fastpin_arm_stm32.h       |  76 +-
 platforms/arm/stm32/led_sysdefs_arm_stm32.h   |  24 +-
 platforms/avr/clockless_trinket.h             |   2 +-
 platforms/avr/fastspi_avr.h                   |  13 +-
 platforms/esp/32/clockless_block_esp32.h      |   1 +
 platforms/esp/32/clockless_esp32.h.orig       | 786 ------------------
 platforms/esp/32/clockless_i2s_esp32.h        |  22 +-
 platforms/esp/32/clockless_rmt_esp32.h        |   2 -
 platforms/esp/32/fastpin_esp32.h              |   1 -
 platforms/esp/8266/clockless_block_esp8266.h  |  43 +-
 platforms/esp/8266/clockless_esp8266.h        |  62 +-
 platforms/esp/8266/fastpin_esp8266.h          |  45 +-
 power_mgt.cpp                                 |  12 +-
 wiring.cpp                                    | 220 ++---
 66 files changed, 2733 insertions(+), 3517 deletions(-)
 delete mode 100644 platforms/esp/32/clockless_esp32.h.orig

diff --git a/FastLED.cpp b/FastLED.cpp
index bfcb73c4b4..b070e80847 100644
--- a/FastLED.cpp
+++ b/FastLED.cpp
@@ -31,8 +31,8 @@ CFastLED::CFastLED() {
 }
 
 CLEDController &CFastLED::addLeds(CLEDController *pLed,
-									   struct CRGB *data,
-									   int nLedsOrOffset, int nLedsIfOffset) {
+								  struct CRGB *data,
+								  int nLedsOrOffset, int nLedsIfOffset) {
 	int nOffset = (nLedsIfOffset > 0) ? nLedsOrOffset : 0;
 	int nLeds = (nLedsIfOffset > 0) ? nLedsIfOffset : nLedsOrOffset;
 
@@ -204,33 +204,33 @@ extern int noise_min;
 extern int noise_max;
 
 void CFastLED::countFPS(int nFrames) {
-  static int br = 0;
-  static uint32_t lastframe = 0; // millis();
-
-  if(br++ >= nFrames) {
-      uint32_t now = millis();
-      now -= lastframe;
-      if( now == 0 ) {
-          now = 1; // prevent division by zero below
-      }
-      m_nFPS = (br * 1000) / now;
-    br = 0;
-    lastframe = millis();
-  }
+	static int br = 0;
+	static uint32_t lastframe = 0; // millis();
+
+	if(br++ >= nFrames) {
+		uint32_t now = millis();
+		now -= lastframe;
+		if(now == 0) {
+			now = 1; // prevent division by zero below
+		}
+		m_nFPS = (br * 1000) / now;
+		br = 0;
+		lastframe = millis();
+	}
 }
 
 void CFastLED::setMaxRefreshRate(uint16_t refresh, bool constrain) {
-  if(constrain) {
-    // if we're constraining, the new value of m_nMinMicros _must_ be higher than previously (because we're only
-    // allowed to slow things down if constraining)
-    if(refresh > 0) {
-      m_nMinMicros = ( (1000000/refresh) >  m_nMinMicros) ? (1000000/refresh) : m_nMinMicros;
-    }
-  } else if(refresh > 0) {
-    m_nMinMicros = 1000000 / refresh;
-  } else {
-    m_nMinMicros = 0;
-  }
+	if(constrain) {
+		// if we're constraining, the new value of m_nMinMicros _must_ be higher than previously (because we're only
+		// allowed to slow things down if constraining)
+		if(refresh > 0) {
+			m_nMinMicros = ((1000000 / refresh) > m_nMinMicros) ? (1000000 / refresh) : m_nMinMicros;
+		}
+	} else if(refresh > 0) {
+		m_nMinMicros = 1000000 / refresh;
+	} else {
+		m_nMinMicros = 0;
+	}
 }
 
 extern "C" int atexit(void (* /*func*/ )()) { return 0; }
diff --git a/FastLED.h b/FastLED.h
index d7b6375a86..0cc1acc495 100644
--- a/FastLED.h
+++ b/FastLED.h
@@ -316,11 +316,11 @@ class CFastLED {
 	}
 
 #if defined(__FASTLED_HAS_FIBCC) && (__FASTLED_HAS_FIBCC == 1)
-  template<uint8_t NUM_LANES, template<uint8_t DATA_PIN, EOrder RGB_ORDER> class CHIPSET, uint8_t DATA_PIN, EOrder RGB_ORDER=RGB>
-  static CLEDController &addLeds(struct CRGB *data, int nLeds) {
-    static __FIBCC<CHIPSET, DATA_PIN, NUM_LANES, RGB_ORDER> c;
-    return addLeds(&c, data, nLeds);
-  }
+	template<uint8_t NUM_LANES, template<uint8_t DATA_PIN, EOrder RGB_ORDER> class CHIPSET, uint8_t DATA_PIN, EOrder RGB_ORDER=RGB>
+	static CLEDController &addLeds(struct CRGB *data, int nLeds) {
+		static __FIBCC<CHIPSET, DATA_PIN, NUM_LANES, RGB_ORDER> c;
+		return addLeds(&c, data, nLeds);
+	}
 #endif
 
 	#ifdef FASTSPI_USE_DMX_SIMPLE
@@ -556,19 +556,19 @@ class CFastLED {
 	uint16_t getFPS() { return m_nFPS; }
 
 	/// Get how many controllers have been registered
-  /// @returns the number of controllers (strips) that have been added with addLeds
+	/// @returns the number of controllers (strips) that have been added with addLeds
 	int count();
 
 	/// Get a reference to a registered controller
-  /// @returns a reference to the Nth controller
+	/// @returns a reference to the Nth controller
 	CLEDController & operator[](int x);
 
 	/// Get the number of leds in the first controller
-  /// @returns the number of LEDs in the first controller
+	/// @returns the number of LEDs in the first controller
 	int size() { return (*this)[0].size(); }
 
 	/// Get a pointer to led data for the first controller
-  /// @returns pointer to the CRGB buffer for the first controller
+	/// @returns pointer to the CRGB buffer for the first controller
 	CRGB *leds() { return (*this)[0].leds(); }
 };
 
diff --git a/PORTING.md b/PORTING.md
index 2f925ab2d8..beb4e6c65d 100644
--- a/PORTING.md
+++ b/PORTING.md
@@ -1,6 +1,7 @@
-=New platform porting guide=
+New platform porting guide
+==========================
 
-== Fast porting for a new board on existing hardware ==
+# Fast porting for a new board on existing hardware
 
 Sometimes "porting" FastLED simply consists of supplying new pin definitions for the given platform.  For example, platforms/avr/fastpin_avr.h contains various pin definitions for all the AVR variant chipsets/boards that FastLED supports.  Defining a set of pins involves setting up a set of definitions - for example here's one full set from the avr fastpin file:
 
@@ -26,7 +27,7 @@ The ```_FL_IO``` macro is used to define the port registers for the platform whi
 
 The ```HAS_HARDWARE_PIN_SUPPORT``` define tells the rest of the FastLED library that there is hardware pin support available.  There may be other platform specific defines for things like hardware SPI ports and such.
 
-== Setting up the basic files/folders ==
+## Setting up the basic files/folders
 
 * Create platform directory (e.g. platforms/arm/kl26)
 * Create configuration header led_sysdefs_arm_kl26.h:
@@ -38,7 +39,7 @@ The ```HAS_HARDWARE_PIN_SUPPORT``` define tells the rest of the FastLED library
 * Modify led_sysdefs.h to conditionally include platform sysdefs header file
 * Modify platforms.h to conditionally include platform fastled header
 
-== Porting fastpin.h ==
+## Porting fastpin.h
 
 The heart of the FastLED library is the fast pin accesss.  This is a templated class that provides 1-2 cycle pin access, bypassing digital write and other such things.  As such, this will usually be the first bit of the library that you will want to port when moving to a new platform.  Once you have FastPIN up and running then you can do some basic work like testing toggles or running bit-bang'd SPI output.
 
@@ -46,10 +47,10 @@ There's two low level FastPin classes.  There's the base FastPIN template class,
 
 Explaining how the macros work and should be used is currently beyond the scope of this document.
 
-== Porting fastspi.h ==
+## Porting fastspi.h
 
 This is where you define the low level interface to the hardware SPI system (including a writePixels method that does a bunch of housekeeping for writing led data).  Use the fastspi_nop.h file as a reference for the methods that need to be implemented.  There are ofteh other useful methods that can help with the internals of the SPI code, I recommend taking a look at how the various platforms implement their SPI classes.
 
-== Porting clockless.h ==
+## Porting clockless.h
 
 This is where you define the code for the clockless controllers.  Across ARM platforms this will usually be fairly similar - though different arm platforms will have different clock sources that you can/should use.
diff --git a/bitswap.cpp b/bitswap.cpp
index 67530c72ac..5be71f0277 100644
--- a/bitswap.cpp
+++ b/bitswap.cpp
@@ -4,25 +4,25 @@
 /// Simplified form of bits rotating function.  Based on code found here - http://www.hackersdelight.org/hdcodetxt/transpose8.c.txt - rotating
 /// data into LSB for a faster write (the code using this data can happily walk the array backwards)
 void transpose8x1_noinline(unsigned char *A, unsigned char *B) {
-  uint32_t x, y, t;
+    uint32_t x, y, t;
 
-  // Load the array and pack it into x and y.
-  y = *(unsigned int*)(A);
-  x = *(unsigned int*)(A+4);
+    // Load the array and pack it into x and y.
+    y = *(unsigned int*)(A);
+    x = *(unsigned int*)(A+4);
 
-  // pre-transform x
-  t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
-  t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
+    // pre-transform x
+    t = (x ^ (x >> 7)) & 0x00AA00AA;  x = x ^ t ^ (t << 7);
+    t = (x ^ (x >>14)) & 0x0000CCCC;  x = x ^ t ^ (t <<14);
 
-  // pre-transform y
-  t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
-  t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
+    // pre-transform y
+    t = (y ^ (y >> 7)) & 0x00AA00AA;  y = y ^ t ^ (t << 7);
+    t = (y ^ (y >>14)) & 0x0000CCCC;  y = y ^ t ^ (t <<14);
 
-  // final transform
-  t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
-  y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
-  x = t;
+    // final transform
+    t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F);
+    y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F);
+    x = t;
 
-  *((uint32_t*)B) = y;
-  *((uint32_t*)(B+4)) = x;
+    *((uint32_t*)B) = y;
+    *((uint32_t*)(B+4)) = x;
 }
diff --git a/chipsets.h b/chipsets.h
index 8e9051d5cf..ddaf3857a6 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -28,6 +28,7 @@ template<uint8_t DATA_PIN, EOrder RGB_ORDER = RGB>
 class PixieController : public CPixelLEDController<RGB_ORDER> {
 	SoftwareSerial Serial;
 	CMinWait<2000> mWait;
+
 public:
 	PixieController() : Serial(-1, DATA_PIN) {}
 
@@ -92,8 +93,8 @@ class LPD8806Controller : public CPixelLEDController<RGB_ORDER> {
 	};
 
 	SPI mSPI;
-public:
 
+public:
 	LPD8806Controller()  {}
 	virtual void init() {
 		mSPI.init();
@@ -123,6 +124,7 @@ class WS2801Controller : public CPixelLEDController<RGB_ORDER> {
 	typedef SPIOutput<DATA_PIN, CLOCK_PIN, SPI_SPEED> SPI;
 	SPI mSPI;
 	CMinWait<1000>  mWaitDelay;
+
 public:
 	WS2801Controller() {}
 
@@ -132,7 +134,6 @@ class WS2801Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 		mWaitDelay.wait();
 		mSPI.template writePixels<0, DATA_NOP, RGB_ORDER>(pixels);
@@ -166,7 +167,6 @@ class LPD6803Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 		mSPI.select();
 
@@ -232,7 +232,6 @@ class APA102Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 		mSPI.select();
 
@@ -297,7 +296,6 @@ class SK9822Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 		mSPI.select();
 
@@ -360,7 +358,6 @@ class P9813Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 		mSPI.select();
 
@@ -418,7 +415,6 @@ class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 	}
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 		// Make sure the FLAG_START_BIT flag is set to ensure that an extra 1 bit is sent at the start
 		// of each triplet of bytes for rgb data
diff --git a/color.h b/color.h
index 1ed60b4d65..63687cb55f 100644
--- a/color.h
+++ b/color.h
@@ -11,71 +11,71 @@ FASTLED_NAMESPACE_BEGIN
 /// definitions for color correction and light temperatures
 ///@{
 typedef enum {
-   // Color correction starting points
+    // Color correction starting points
 
-   /// typical values for SMD5050 LEDs
-   ///@{
+    /// typical values for SMD5050 LEDs
+    ///@{
     TypicalSMD5050=0xFFB0F0 /* 255, 176, 240 */,
     TypicalLEDStrip=0xFFB0F0 /* 255, 176, 240 */,
-  ///@}
+    ///@}
 
-   /// typical values for 8mm "pixels on a string"
-   /// also for many through-hole 'T' package LEDs
-   ///@{
-   Typical8mmPixel=0xFFE08C /* 255, 224, 140 */,
-   TypicalPixelString=0xFFE08C /* 255, 224, 140 */,
-   ///@}
+    /// typical values for 8mm "pixels on a string"
+    /// also for many through-hole 'T' package LEDs
+    ///@{
+    Typical8mmPixel=0xFFE08C /* 255, 224, 140 */,
+    TypicalPixelString=0xFFE08C /* 255, 224, 140 */,
+    ///@}
 
-   /// uncorrected color
-   UncorrectedColor=0xFFFFFF
+    /// uncorrected color
+    UncorrectedColor=0xFFFFFF
 
 } LEDColorCorrection;
 
 
 typedef enum {
-   /// @name Black-body radiation light sources
-   /// Black-body radiation light sources emit a (relatively) continuous
-   /// spectrum, and can be described as having a Kelvin 'temperature'
-   ///@{
-   /// 1900 Kelvin
-   Candle=0xFF9329 /* 1900 K, 255, 147, 41 */,
-   /// 2600 Kelvin
-   Tungsten40W=0xFFC58F /* 2600 K, 255, 197, 143 */,
-   /// 2850 Kelvin
-   Tungsten100W=0xFFD6AA /* 2850 K, 255, 214, 170 */,
-   /// 3200 Kelvin
-   Halogen=0xFFF1E0 /* 3200 K, 255, 241, 224 */,
-   /// 5200 Kelvin
-   CarbonArc=0xFFFAF4 /* 5200 K, 255, 250, 244 */,
-   /// 5400 Kelvin
-   HighNoonSun=0xFFFFFB /* 5400 K, 255, 255, 251 */,
-   /// 6000 Kelvin
-   DirectSunlight=0xFFFFFF /* 6000 K, 255, 255, 255 */,
-   /// 7000 Kelvin
-   OvercastSky=0xC9E2FF /* 7000 K, 201, 226, 255 */,
-   /// 20000 Kelvin
-   ClearBlueSky=0x409CFF /* 20000 K, 64, 156, 255 */,
-   ///@}
+    /// @name Black-body radiation light sources
+    /// Black-body radiation light sources emit a (relatively) continuous
+    /// spectrum, and can be described as having a Kelvin 'temperature'
+    ///@{
+    /// 1900 Kelvin
+    Candle=0xFF9329 /* 1900 K, 255, 147, 41 */,
+    /// 2600 Kelvin
+    Tungsten40W=0xFFC58F /* 2600 K, 255, 197, 143 */,
+    /// 2850 Kelvin
+    Tungsten100W=0xFFD6AA /* 2850 K, 255, 214, 170 */,
+    /// 3200 Kelvin
+    Halogen=0xFFF1E0 /* 3200 K, 255, 241, 224 */,
+    /// 5200 Kelvin
+    CarbonArc=0xFFFAF4 /* 5200 K, 255, 250, 244 */,
+    /// 5400 Kelvin
+    HighNoonSun=0xFFFFFB /* 5400 K, 255, 255, 251 */,
+    /// 6000 Kelvin
+    DirectSunlight=0xFFFFFF /* 6000 K, 255, 255, 255 */,
+    /// 7000 Kelvin
+    OvercastSky=0xC9E2FF /* 7000 K, 201, 226, 255 */,
+    /// 20000 Kelvin
+    ClearBlueSky=0x409CFF /* 20000 K, 64, 156, 255 */,
+    ///@}
 
-   /// @name Gaseous light sources
-   /// Gaseous light sources emit discrete spectral bands, and while we can
-   /// approximate their aggregate hue with RGB values, they don't actually
-   /// have a proper Kelvin temperature.
-   ///@{
-   WarmFluorescent=0xFFF4E5 /* 0 K, 255, 244, 229 */,
-   StandardFluorescent=0xF4FFFA /* 0 K, 244, 255, 250 */,
-   CoolWhiteFluorescent=0xD4EBFF /* 0 K, 212, 235, 255 */,
-   FullSpectrumFluorescent=0xFFF4F2 /* 0 K, 255, 244, 242 */,
-   GrowLightFluorescent=0xFFEFF7 /* 0 K, 255, 239, 247 */,
-   BlackLightFluorescent=0xA700FF /* 0 K, 167, 0, 255 */,
-   MercuryVapor=0xD8F7FF /* 0 K, 216, 247, 255 */,
-   SodiumVapor=0xFFD1B2 /* 0 K, 255, 209, 178 */,
-   MetalHalide=0xF2FCFF /* 0 K, 242, 252, 255 */,
-   HighPressureSodium=0xFFB74C /* 0 K, 255, 183, 76 */,
-   ///@}
+    /// @name Gaseous light sources
+    /// Gaseous light sources emit discrete spectral bands, and while we can
+    /// approximate their aggregate hue with RGB values, they don't actually
+    /// have a proper Kelvin temperature.
+    ///@{
+    WarmFluorescent=0xFFF4E5 /* 0 K, 255, 244, 229 */,
+    StandardFluorescent=0xF4FFFA /* 0 K, 244, 255, 250 */,
+    CoolWhiteFluorescent=0xD4EBFF /* 0 K, 212, 235, 255 */,
+    FullSpectrumFluorescent=0xFFF4F2 /* 0 K, 255, 244, 242 */,
+    GrowLightFluorescent=0xFFEFF7 /* 0 K, 255, 239, 247 */,
+    BlackLightFluorescent=0xA700FF /* 0 K, 167, 0, 255 */,
+    MercuryVapor=0xD8F7FF /* 0 K, 216, 247, 255 */,
+    SodiumVapor=0xFFD1B2 /* 0 K, 255, 209, 178 */,
+    MetalHalide=0xF2FCFF /* 0 K, 242, 252, 255 */,
+    HighPressureSodium=0xFFB74C /* 0 K, 255, 183, 76 */,
+    ///@}
 
-   /// Uncorrected temperature 0xFFFFFF
-   UncorrectedTemperature=0xFFFFFF
+    /// Uncorrected temperature 0xFFFFFF
+    UncorrectedTemperature=0xFFFFFF
 } ColorTemperature;
 
 FASTLED_NAMESPACE_END
diff --git a/colorpalettes.cpp b/colorpalettes.cpp
index 3c3a1f519b..68e42f0353 100644
--- a/colorpalettes.cpp
+++ b/colorpalettes.cpp
@@ -161,14 +161,14 @@ extern const TProgmemRGBPalette16 HeatColors_p FL_PROGMEM =
 // you want a 'standard' FastLED rainbow as well.
 
 DEFINE_GRADIENT_PALETTE( Rainbow_gp ) {
-      0,  255,  0,  0, // Red
-     32,  171, 85,  0, // Orange
-     64,  171,171,  0, // Yellow
-     96,    0,255,  0, // Green
-    128,    0,171, 85, // Aqua
-    160,    0,  0,255, // Blue
-    192,   85,  0,171, // Purple
-    224,  171,  0, 85, // Pink
-    255,  255,  0,  0};// and back to Red
+      0,  255,    0,    0, // Red
+     32,  171,   85,    0, // Orange
+     64,  171,  171,    0, // Yellow
+     96,    0,  255,    0, // Green
+    128,    0,  171,   85, // Aqua
+    160,    0,    0,  255, // Blue
+    192,   85,    0,  171, // Purple
+    224,  171,    0,   85, // Pink
+    255,  255,    0,    0};// and back to Red
 
 #endif
diff --git a/controller.h b/controller.h
index 30e4c111ec..951a8a0dfc 100644
--- a/controller.h
+++ b/controller.h
@@ -50,7 +50,7 @@ class CLEDController {
 
     /// set all the leds on the controller to a given color
     ///@param data the crgb color to set the leds to
-    ///@param nLeds the numner of leds to set to this color
+    ///@param nLeds the number of leds to set to this color
     ///@param scale the rgb scaling value for outputting color
     virtual void showColor(const struct CRGB & data, int nLeds, CRGB scale) = 0;
 
@@ -388,28 +388,28 @@ struct PixelController {
 
 template<EOrder RGB_ORDER, int LANES=1, uint32_t MASK=0xFFFFFFFF> class CPixelLEDController : public CLEDController {
 protected:
-  virtual void showPixels(PixelController<RGB_ORDER,LANES,MASK> & pixels) = 0;
-
-  /// set all the leds on the controller to a given color
-  ///@param data the crgb color to set the leds to
-  ///@param nLeds the numner of leds to set to this color
-  ///@param scale the rgb scaling value for outputting color
-  virtual void showColor(const struct CRGB & data, int nLeds, CRGB scale) {
-    PixelController<RGB_ORDER, LANES, MASK> pixels(data, nLeds, scale, getDither());
-    showPixels(pixels);
-  }
-
-/// write the passed in rgb data out to the leds managed by this controller
-///@param data the rgb data to write out to the strip
-///@param nLeds the number of leds being written out
-///@param scale the rgb scaling to apply to each led before writing it out
-  virtual void show(const struct CRGB *data, int nLeds, CRGB scale) {
-    PixelController<RGB_ORDER, LANES, MASK> pixels(data, nLeds, scale, getDither());
-    showPixels(pixels);
-  }
+    virtual void showPixels(PixelController<RGB_ORDER,LANES,MASK> & pixels) = 0;
+
+    /// set all the leds on the controller to a given color
+    ///@param data the crgb color to set the leds to
+    ///@param nLeds the numner of leds to set to this color
+    ///@param scale the rgb scaling value for outputting color
+    virtual void showColor(const struct CRGB & data, int nLeds, CRGB scale) {
+        PixelController<RGB_ORDER, LANES, MASK> pixels(data, nLeds, scale, getDither());
+        showPixels(pixels);
+    }
+
+    /// write the passed in rgb data out to the leds managed by this controller
+    ///@param data the rgb data to write out to the strip
+    ///@param nLeds the number of leds being written out
+    ///@param scale the rgb scaling to apply to each led before writing it out
+    virtual void show(const struct CRGB *data, int nLeds, CRGB scale) {
+        PixelController<RGB_ORDER, LANES, MASK> pixels(data, nLeds, scale, getDither());
+        showPixels(pixels);
+    }
 
 public:
-  CPixelLEDController() : CLEDController() {}
+    CPixelLEDController() : CLEDController() {}
 };
 
 
diff --git a/fastled_delay.h b/fastled_delay.h
index 4649f7d003..a14e8a2941 100644
--- a/fastled_delay.h
+++ b/fastled_delay.h
@@ -12,6 +12,7 @@ FASTLED_NAMESPACE_BEGIN
 /// this should make sure that chipsets that have
 template<int WAIT> class CMinWait {
 	uint16_t mLastMicros;
+
 public:
 	CMinWait() { mLastMicros = 0; }
 
@@ -51,8 +52,8 @@ template<int WAIT> class CMinWait {
 // predeclaration to not upset the compiler
 template<int CYCLES> inline void delaycycles();
 template<int CYCLES> inline void delaycycles_min1() {
-  delaycycles<1>();
-  delaycycles<CYCLES-1>();
+	delaycycles<1>();
+	delaycycles<CYCLES-1>();
 }
 
 
diff --git a/fastpin.h b/fastpin.h
index ed2b8e7ebf..085a7d1b53 100644
--- a/fastpin.h
+++ b/fastpin.h
@@ -42,6 +42,7 @@ class Pin : public Selectable {
 		mPort = (volatile RwReg*)portOutputRegister(digitalPinToPort(mPin));
 		mInPort = (volatile RoReg*)portInputRegister(digitalPinToPort(mPin));
 	}
+
 public:
 	Pin(int pin) : mPin(pin) { _init(); }
 
@@ -98,6 +99,7 @@ class Pin : public Selectable {
 		mPort = NULL;
 		mInPort = NULL;
 	}
+
 public:
 	Pin(int pin) : mPin(pin) { _init(); }
 
@@ -169,6 +171,7 @@ template<uint8_t PIN> class FastPin {
 		sInPort = portInputRegister(digitalPinToPort(PIN));
 #endif
 	}
+
 public:
 	typedef volatile RwReg * port_ptr_t;
 	typedef RwReg port_t;
@@ -206,8 +209,8 @@ template<uint8_t PIN> class FastPin {
 
 	static_assert(validpin(), "Invalid pin specified");
 
-	static void _init() {
-	}
+	static void _init() { }
+
 public:
 	typedef volatile RwReg * port_ptr_t;
 	typedef RwReg port_t;
@@ -253,15 +256,17 @@ template<uint8_t port> struct __FL_PORT_INFO {
 // are numeric in nature, e.g. GPIO0, GPIO1.  Use _FL_DEFINE_PORT3 for ports that are letters.
 // The first parameter will be the letter, the second parameter will be an integer/counter of smoe kind
 // (this is because attempts to turn macro parameters into character constants break in some compilers)
-#define _FL_DEFINE_PORT(L, BASE) template<> struct __FL_PORT_INFO<L> { static bool hasPort() { return 1; } \
-										static const char *portName() { return #L; } \
-										typedef BASE __t_baseType;  \
-										static const void *portAddr() { return (void*)&__t_baseType::r(); } };
-
-#define _FL_DEFINE_PORT3(L, LC, BASE) template<> struct __FL_PORT_INFO<LC> { static bool hasPort() { return 1; } \
-										static const char *portName() { return #L; } \
-										typedef BASE __t_baseType;  \
-										static const void *portAddr() { return (void*)&__t_baseType::r(); } };
+#define _FL_DEFINE_PORT(L, BASE) template<> struct __FL_PORT_INFO<L> { \
+	static bool hasPort() { return 1; } \
+	static const char *portName() { return #L; } \
+	typedef BASE __t_baseType;  \
+	static const void *portAddr() { return (void*)&__t_baseType::r(); } };
+
+#define _FL_DEFINE_PORT3(L, LC, BASE) template<> struct __FL_PORT_INFO<LC> { \
+	static bool hasPort() { return 1; } \
+	static const char *portName() { return #L; } \
+	typedef BASE __t_baseType;  \
+	static const void *portAddr() { return (void*)&__t_baseType::r(); } };
 
 FASTLED_NAMESPACE_END
 
diff --git a/fastspi_bitbang.h b/fastspi_bitbang.h
index 019b6dc0ec..86663f17bd 100644
--- a/fastspi_bitbang.h
+++ b/fastspi_bitbang.h
@@ -203,6 +203,7 @@ class AVRSoftwareSPIOutput {
 		}
 #endif
 	}
+
 public:
 
 	// select the SPI output (TODO: research whether this really means hi or lo.  Alt TODO: move select responsibility out of the SPI classes
diff --git a/fastspi_ref.h b/fastspi_ref.h
index 00c41d345d..a12a962ae8 100644
--- a/fastspi_ref.h
+++ b/fastspi_ref.h
@@ -11,6 +11,7 @@ FASTLED_NAMESPACE_BEGIN
 template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class REFHardwareSPIOutput {
 	Selectable *m_pSelect;
+
 public:
 	SAMHardwareSPIOutput() { m_pSelect = NULL; }
 	SAMHArdwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
diff --git a/fastspi_types.h b/fastspi_types.h
index 5510bba80b..ea7d46ce39 100644
--- a/fastspi_types.h
+++ b/fastspi_types.h
@@ -19,9 +19,9 @@ FASTLED_NAMESPACE_BEGIN
 /// TODO: Convinience macro for building these
 class DATA_NOP {
 public:
-  static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data) { return data; }
-  static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data, register uint8_t scale) { return scale8(data, scale); }
-  static __attribute__((always_inline)) inline void postBlock(int /* len */) { }
+    static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data) { return data; }
+    static __attribute__((always_inline)) inline uint8_t adjust(register uint8_t data, register uint8_t scale) { return scale8(data, scale); }
+    static __attribute__((always_inline)) inline void postBlock(int /* len */) { }
 };
 
 #define FLAG_START_BIT 0x80
diff --git a/lib8tion.h b/lib8tion.h
index 62db2b1d3e..0cc3baa4fc 100644
--- a/lib8tion.h
+++ b/lib8tion.h
@@ -825,20 +825,20 @@ LIB8STATIC uint8_t squarewave8( uint8_t in, uint8_t pulsewidth=128)
 
 /// Template class for represneting fractional ints.
 template<class T, int F, int I> class q {
-  T i:I;
-  T f:F;
+    T i:I;
+    T f:F;
 public:
-  q(float fx) { i = fx; f = (fx-i) * (1<<F); }
-  q(uint8_t _i, uint8_t _f) {i=_i; f=_f; }
-  uint32_t operator*(uint32_t v) { return (v*i) + ((v*f)>>F); }
-  uint16_t operator*(uint16_t v) { return (v*i) + ((v*f)>>F); }
-  int32_t operator*(int32_t v) { return (v*i) + ((v*f)>>F); }
-  int16_t operator*(int16_t v) { return (v*i) + ((v*f)>>F); }
+    q(float fx) { i = fx; f = (fx-i) * (1<<F); }
+    q(uint8_t _i, uint8_t _f) {i=_i; f=_f; }
+    uint32_t operator*(uint32_t v) { return (v*i) + ((v*f)>>F); }
+    uint16_t operator*(uint16_t v) { return (v*i) + ((v*f)>>F); }
+    int32_t operator*(int32_t v) { return (v*i) + ((v*f)>>F); }
+    int16_t operator*(int16_t v) { return (v*i) + ((v*f)>>F); }
 #ifdef FASTLED_ARM
-  int operator*(int v) { return (v*i) + ((v*f)>>F); }
+    int operator*(int v) { return (v*i) + ((v*f)>>F); }
 #endif
 #ifdef FASTLED_APOLLO3
-  int operator*(int v) { return (v*i) + ((v*f)>>F); }
+    int operator*(int v) { return (v*i) + ((v*f)>>F); }
 #endif
 };
 
@@ -1048,17 +1048,17 @@ LIB8STATIC uint16_t div1024_32_16( uint32_t in32)
     uint16_t out16;
 #if defined(__AVR__)
     asm volatile (
-                  "  lsr %D[in]  \n\t"
-                  "  ror %C[in]  \n\t"
-                  "  ror %B[in]  \n\t"
-                  "  lsr %D[in]  \n\t"
-                  "  ror %C[in]  \n\t"
-                  "  ror %B[in]  \n\t"
-                  "  mov %B[out],%C[in] \n\t"
-                  "  mov %A[out],%B[in] \n\t"
-                  : [in] "+r" (in32),
-                  [out] "=r" (out16)
-                  );
+        "  lsr %D[in]  \n\t"
+        "  ror %C[in]  \n\t"
+        "  ror %B[in]  \n\t"
+        "  lsr %D[in]  \n\t"
+        "  ror %C[in]  \n\t"
+        "  ror %B[in]  \n\t"
+        "  mov %B[out],%C[in] \n\t"
+        "  mov %A[out],%B[in] \n\t"
+        : [in] "+r" (in32),
+        [out] "=r" (out16)
+    );
 #else
     out16 = (in32 >> 10) & 0xFFFF;
 #endif
diff --git a/lib8tion/math8.h b/lib8tion/math8.h
index 4dab820ffc..a83b1ad253 100644
--- a/lib8tion/math8.h
+++ b/lib8tion/math8.h
@@ -28,18 +28,19 @@ LIB8STATIC_ALWAYS_INLINE uint8_t qadd8( uint8_t i, uint8_t j)
     return t;
 #elif QADD8_AVRASM == 1
     asm volatile(
-         /* First, add j to i, conditioning the C flag */
-         "add %0, %1    \n\t"
-
-         /* Now test the C flag.
-           If C is clear, we branch around a load of 0xFF into i.
-           If C is set, we go ahead and load 0xFF into i.
-         */
-         "brcc L_%=     \n\t"
-         "ldi %0, 0xFF  \n\t"
-         "L_%=: "
-         : "+a" (i)
-         : "a"  (j) );
+        /* First, add j to i, conditioning the C flag */
+        "add %0, %1    \n\t"
+
+        /* Now test the C flag.
+        If C is clear, we branch around a load of 0xFF into i.
+        If C is set, we go ahead and load 0xFF into i.
+        */
+        "brcc L_%=     \n\t"
+        "ldi %0, 0xFF  \n\t"
+        "L_%=: "
+        : "+a" (i)
+        : "a"  (j)
+    );
     return i;
 #elif QADD8_ARM_DSP_ASM == 1
     asm volatile( "uqadd8 %0, %0, %1" : "+r" (i) : "r" (j));
@@ -61,19 +62,19 @@ LIB8STATIC_ALWAYS_INLINE int8_t qadd7( int8_t i, int8_t j)
     return t;
 #elif QADD7_AVRASM == 1
     asm volatile(
-         /* First, add j to i, conditioning the V flag */
-         "add %0, %1    \n\t"
-
-         /* Now test the V flag.
-          If V is clear, we branch around a load of 0x7F into i.
-          If V is set, we go ahead and load 0x7F into i.
-          */
-         "brvc L_%=     \n\t"
-         "ldi %0, 0x7F  \n\t"
-         "L_%=: "
-         : "+a" (i)
-         : "a"  (j) );
-
+        /* First, add j to i, conditioning the V flag */
+        "add %0, %1    \n\t"
+
+        /* Now test the V flag.
+        If V is clear, we branch around a load of 0x7F into i.
+        If V is set, we go ahead and load 0x7F into i.
+        */
+        "brvc L_%=     \n\t"
+        "ldi %0, 0x7F  \n\t"
+        "L_%=: "
+        : "+a" (i)
+        : "a"  (j)
+    );
     return i;
 #elif QADD7_ARM_DSP_ASM == 1
     asm volatile( "qadd8 %0, %0, %1" : "+r" (i) : "r" (j));
@@ -94,19 +95,19 @@ LIB8STATIC_ALWAYS_INLINE uint8_t qsub8( uint8_t i, uint8_t j)
 #elif QSUB8_AVRASM == 1
 
     asm volatile(
-         /* First, subtract j from i, conditioning the C flag */
-         "sub %0, %1    \n\t"
-
-         /* Now test the C flag.
-          If C is clear, we branch around a load of 0x00 into i.
-          If C is set, we go ahead and load 0x00 into i.
-          */
-         "brcc L_%=     \n\t"
-         "ldi %0, 0x00  \n\t"
-         "L_%=: "
-         : "+a" (i)
-         : "a"  (j) );
-
+        /* First, subtract j from i, conditioning the C flag */
+        "sub %0, %1    \n\t"
+
+        /* Now test the C flag.
+        If C is clear, we branch around a load of 0x00 into i.
+        If C is set, we go ahead and load 0x00 into i.
+        */
+        "brcc L_%=     \n\t"
+        "ldi %0, 0x00  \n\t"
+        "L_%=: "
+        : "+a" (i)
+        : "a"  (j)
+    );
     return i;
 #else
 #error "No implementation for qsub8 available."
@@ -136,11 +137,12 @@ LIB8STATIC_ALWAYS_INLINE uint16_t add8to16( uint8_t i, uint16_t j)
     return t;
 #elif ADD8_AVRASM == 1
     // Add i(one byte) to j(two bytes)
-    asm volatile( "add %A[j], %[i]              \n\t"
-                  "adc %B[j], __zero_reg__      \n\t"
-                 : [j] "+a" (j)
-                 : [i] "a"  (i)
-                 );
+    asm volatile(
+        "add %A[j], %[i]              \n\t"
+        "adc %B[j], __zero_reg__      \n\t"
+        : [j] "+a" (j)
+        : [i] "a"  (i)
+    );
     return i;
 #else
 #error "No implementation for add8to16 available."
@@ -172,12 +174,13 @@ LIB8STATIC_ALWAYS_INLINE uint8_t avg8( uint8_t i, uint8_t j)
     return (i + j) >> 1;
 #elif AVG8_AVRASM == 1
     asm volatile(
-         /* First, add j to i, 9th bit overflows into C flag */
-         "add %0, %1    \n\t"
-         /* Divide by two, moving C flag into high 8th bit */
-         "ror %0        \n\t"
-         : "+a" (i)
-         : "a"  (j) );
+        /* First, add j to i, 9th bit overflows into C flag */
+        "add %0, %1    \n\t"
+        /* Divide by two, moving C flag into high 8th bit */
+        "ror %0        \n\t"
+        : "+a" (i)
+        : "a"  (j)
+    );
     return i;
 #else
 #error "No implementation for avg8 available."
@@ -193,16 +196,17 @@ LIB8STATIC_ALWAYS_INLINE uint16_t avg16( uint16_t i, uint16_t j)
     return (uint32_t)((uint32_t)(i) + (uint32_t)(j)) >> 1;
 #elif AVG16_AVRASM == 1
     asm volatile(
-                 /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
-                 "add %A[i], %A[j]    \n\t"
-                 /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
-                 "adc %B[i], %B[j]    \n\t"
-                 /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
-                 "ror %B[i]        \n\t"
-                 /* Divide iLo by two, moving C flag into high 8th bit */
-                 "ror %A[i]        \n\t"
-                 : [i] "+a" (i)
-                 : [j] "a"  (j) );
+        /* First, add jLo (heh) to iLo, 9th bit overflows into C flag */
+        "add %A[i], %A[j]    \n\t"
+        /* Now, add C + jHi to iHi, 17th bit overflows into C flag */
+        "adc %B[i], %B[j]    \n\t"
+        /* Divide iHi by two, moving C flag into high 16th bit, old 9th bit now in C */
+        "ror %B[i]        \n\t"
+        /* Divide iLo by two, moving C flag into high 8th bit */
+        "ror %A[i]        \n\t"
+        : [i] "+a" (i)
+        : [j] "a"  (j)
+    );
     return i;
 #else
 #error "No implementation for avg16 available."
@@ -220,11 +224,12 @@ LIB8STATIC_ALWAYS_INLINE int8_t avg7( int8_t i, int8_t j)
     return ((i + j) >> 1) + (i & 0x1);
 #elif AVG7_AVRASM == 1
     asm volatile(
-                 "asr %1        \n\t"
-                 "asr %0        \n\t"
-                 "adc %0, %1    \n\t"
-                 : "+a" (i)
-                 : "a"  (j) );
+        "asr %1        \n\t"
+        "asr %0        \n\t"
+        "adc %0, %1    \n\t"
+        : "+a" (i)
+        : "a"  (j)
+    );
     return i;
 #else
 #error "No implementation for avg7 available."
@@ -241,17 +246,18 @@ LIB8STATIC_ALWAYS_INLINE int16_t avg15( int16_t i, int16_t j)
     return ((int32_t)((int32_t)(i) + (int32_t)(j)) >> 1) + (i & 0x1);
 #elif AVG15_AVRASM == 1
     asm volatile(
-                 /* first divide j by 2, throwing away lowest bit */
-                 "asr %B[j]          \n\t"
-                 "ror %A[j]          \n\t"
-                 /* now divide i by 2, with lowest bit going into C */
-                 "asr %B[i]          \n\t"
-                 "ror %A[i]          \n\t"
-                 /* add j + C to i */
-                 "adc %A[i], %A[j]   \n\t"
-                 "adc %B[i], %B[j]   \n\t"
-                 : [i] "+a" (i)
-                 : [j] "a"  (j) );
+        /* first divide j by 2, throwing away lowest bit */
+        "asr %B[j]          \n\t"
+        "ror %A[j]          \n\t"
+        /* now divide i by 2, with lowest bit going into C */
+        "asr %B[i]          \n\t"
+        "ror %A[i]          \n\t"
+        /* add j + C to i */
+        "adc %A[i], %A[j]   \n\t"
+        "adc %B[i], %B[j]   \n\t"
+        : [i] "+a" (i)
+        : [j] "a"  (j)
+    );
     return i;
 #else
 #error "No implementation for avg15 available."
@@ -271,12 +277,12 @@ LIB8STATIC_ALWAYS_INLINE uint8_t mod8( uint8_t a, uint8_t m)
 {
 #if defined(__AVR__)
     asm volatile (
-                  "L_%=:  sub %[a],%[m]    \n\t"
-                  "       brcc L_%=        \n\t"
-                  "       add %[a],%[m]    \n\t"
-                  : [a] "+r" (a)
-                  : [m] "r"  (m)
-                  );
+        "L_%=:  sub %[a],%[m]    \n\t"
+        "       brcc L_%=        \n\t"
+        "       add %[a],%[m]    \n\t"
+        : [a] "+r" (a)
+        : [m] "r"  (m)
+    );
 #else
     while( a >= m) a -= m;
 #endif
@@ -298,13 +304,13 @@ LIB8STATIC uint8_t addmod8( uint8_t a, uint8_t b, uint8_t m)
 {
 #if defined(__AVR__)
     asm volatile (
-                  "       add %[a],%[b]    \n\t"
-                  "L_%=:  sub %[a],%[m]    \n\t"
-                  "       brcc L_%=        \n\t"
-                  "       add %[a],%[m]    \n\t"
-                  : [a] "+r" (a)
-                  : [b] "r"  (b), [m] "r" (m)
-                  );
+        "       add %[a],%[b]    \n\t"
+        "L_%=:  sub %[a],%[m]    \n\t"
+        "       brcc L_%=        \n\t"
+        "       add %[a],%[m]    \n\t"
+        : [a] "+r" (a)
+        : [b] "r"  (b), [m] "r" (m)
+    );
 #else
     a += b;
     while( a >= m) a -= m;
@@ -327,13 +333,13 @@ LIB8STATIC uint8_t submod8( uint8_t a, uint8_t b, uint8_t m)
 {
 #if defined(__AVR__)
     asm volatile (
-                  "       sub %[a],%[b]    \n\t"
-                  "L_%=:  sub %[a],%[m]    \n\t"
-                  "       brcc L_%=        \n\t"
-                  "       add %[a],%[m]    \n\t"
-                  : [a] "+r" (a)
-                  : [b] "r"  (b), [m] "r" (m)
-                  );
+        "       sub %[a],%[b]    \n\t"
+        "L_%=:  sub %[a],%[m]    \n\t"
+        "       brcc L_%=        \n\t"
+        "       add %[a],%[m]    \n\t"
+        : [a] "+r" (a)
+        : [b] "r"  (b), [m] "r" (m)
+    );
 #else
     a -= b;
     while( a >= m) a -= m;
@@ -348,16 +354,16 @@ LIB8STATIC_ALWAYS_INLINE uint8_t mul8( uint8_t i, uint8_t j)
     return ((int)i * (int)(j) ) & 0xFF;
 #elif MUL8_AVRASM == 1
     asm volatile(
-         /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
-         "mul %0, %1          \n\t"
-         /* Extract the LOW 8-bits (r0) */
-         "mov %0, r0          \n\t"
-         /* Restore r1 to "0"; it's expected to always be that */
-         "clr __zero_reg__    \n\t"
-         : "+a" (i)
-         : "a"  (j)
-         : "r0", "r1");
-
+        /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+        "mul %0, %1          \n\t"
+        /* Extract the LOW 8-bits (r0) */
+        "mov %0, r0          \n\t"
+        /* Restore r1 to "0"; it's expected to always be that */
+        "clr __zero_reg__    \n\t"
+        : "+a" (i)
+        : "a"  (j)
+        : "r0", "r1"
+    );
     return i;
 #else
 #error "No implementation for mul8 available."
@@ -375,24 +381,24 @@ LIB8STATIC_ALWAYS_INLINE uint8_t qmul8( uint8_t i, uint8_t j)
     return p;
 #elif QMUL8_AVRASM == 1
     asm volatile(
-                 /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
-                 "  mul %0, %1          \n\t"
-                 /* If high byte of result is zero, all is well. */
-                 "  tst r1              \n\t"
-                 "  breq Lnospill_%=    \n\t"
-                 /* If high byte of result > 0, saturate low byte to 0xFF */
-                 "  ldi %0,0xFF         \n\t"
-                 "  rjmp Ldone_%=       \n\t"
-                 "Lnospill_%=:          \n\t"
-                 /* Extract the LOW 8-bits (r0) */
-                 "  mov %0, r0          \n\t"
-                 "Ldone_%=:             \n\t"
-                 /* Restore r1 to "0"; it's expected to always be that */
-                 "  clr __zero_reg__    \n\t"
-                 : "+a" (i)
-                 : "a"  (j)
-                 : "r0", "r1");
-
+        /* Multiply 8-bit i * 8-bit j, giving 16-bit r1,r0 */
+        "  mul %0, %1          \n\t"
+        /* If high byte of result is zero, all is well. */
+        "  tst r1              \n\t"
+        "  breq Lnospill_%=    \n\t"
+        /* If high byte of result > 0, saturate low byte to 0xFF */
+        "  ldi %0,0xFF         \n\t"
+        "  rjmp Ldone_%=       \n\t"
+        "Lnospill_%=:          \n\t"
+        /* Extract the LOW 8-bits (r0) */
+        "  mov %0, r0          \n\t"
+        "Ldone_%=:             \n\t"
+        /* Restore r1 to "0"; it's expected to always be that */
+        "  clr __zero_reg__    \n\t"
+        : "+a" (i)
+        : "a"  (j)
+        : "r0", "r1"
+    );
     return i;
 #else
 #error "No implementation for qmul8 available."
@@ -407,16 +413,15 @@ LIB8STATIC_ALWAYS_INLINE int8_t abs8( int8_t i)
     if( i < 0) i = -i;
     return i;
 #elif ABS8_AVRASM == 1
-
-
     asm volatile(
-         /* First, check the high bit, and prepare to skip if it's clear */
-         "sbrc %0, 7 \n"
+        /* First, check the high bit, and prepare to skip if it's clear */
+        "sbrc %0, 7 \n"
 
-         /* Negate the value */
-         "neg %0     \n"
+        /* Negate the value */
+        "neg %0     \n"
 
-         : "+r" (i) : "r" (i) );
+        : "+r" (i) : "r" (i)
+    );
     return i;
 #else
 #error "No implementation for abs8 available."
diff --git a/lib8tion/scale8.h b/lib8tion/scale8.h
index 56392258d5..6324475b66 100644
--- a/lib8tion/scale8.h
+++ b/lib8tion/scale8.h
@@ -55,7 +55,7 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
         : [work] "+r" (work), [cnt] "+r" (cnt)
         : [scale] "r" (scale), [i] "r" (i)
         :
-      );
+    );
     return work;
 #else
     asm volatile(
@@ -69,18 +69,18 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8( uint8_t i, fract8 scale)
         // walk and chew gum at the same time
         "adc %0, r1          \n\t"
 #else
-         /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
-         "mul %0, %1          \n\t"
-         /* Move the high 8-bits of the product (r1) back to i */
-         "mov %0, r1          \n\t"
-         /* Restore r1 to "0"; it's expected to always be that */
+        /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+        "mul %0, %1          \n\t"
+        /* Move the high 8-bits of the product (r1) back to i */
+        "mov %0, r1          \n\t"
+        /* Restore r1 to "0"; it's expected to always be that */
 #endif
-         "clr __zero_reg__    \n\t"
-
-         : "+a" (i)      /* writes to i */
-         : "a"  (scale)  /* uses scale */
-         : "r0", "r1"    /* clobbers r0, r1 */ );
+        "clr __zero_reg__    \n\t"
 
+        : "+a" (i)      /* writes to i */
+        : "a"  (scale)  /* uses scale */
+        : "r0", "r1"    /* clobbers r0, r1 */
+    );
     /* Return the result */
     return i;
 #endif
@@ -115,8 +115,8 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
         "L_%=: \n\t"
         : [j] "+a" (j)
         : [i] "a" (i), [scale] "a" (scale)
-        : "r0", "r1");
-
+        : "r0", "r1"
+    );
     return j;
     // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
     // asm volatile(
@@ -127,11 +127,9 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video( uint8_t i, fract8 scale)
     //      "      add %0, %2       \n"
     //      "      clr __zero_reg__ \n"
     //      "L_%=:                  \n"
-
     //      : "+a" (i)
     //      : "a" (scale), "a" (nonzeroscale)
     //      : "r0", "r1");
-
     // // Return the result
     // return i;
 #else
@@ -153,28 +151,27 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8_LEAVING_R1_DIRTY( uint8_t i, fract8 scal
 #endif
 #elif SCALE8_AVRASM == 1
     asm volatile(
-      #if (FASTLED_SCALE8_FIXED==1)
-              // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
-              "mul %0, %1          \n\t"
-              // Add i to r0, possibly setting the carry flag
-              "add r0, %0         \n\t"
-              // load the immediate 0 into i (note, this does _not_ touch any flags)
-              "ldi %0, 0x00       \n\t"
-              // walk and chew gum at the same time
-              "adc %0, r1          \n\t"
-      #else
-         /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
-         "mul %0, %1    \n\t"
-         /* Move the high 8-bits of the product (r1) back to i */
-         "mov %0, r1    \n\t"
-      #endif
-         /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF  */
-         /* "clr __zero_reg__    \n\t" */
-
-         : "+a" (i)      /* writes to i */
-         : "a"  (scale)  /* uses scale */
-         : "r0", "r1"    /* clobbers r0, r1 */ );
-
+#if (FASTLED_SCALE8_FIXED==1)
+        // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+        "mul %0, %1          \n\t"
+        // Add i to r0, possibly setting the carry flag
+        "add r0, %0         \n\t"
+        // load the immediate 0 into i (note, this does _not_ touch any flags)
+        "ldi %0, 0x00       \n\t"
+        // walk and chew gum at the same time
+        "adc %0, r1          \n\t"
+#else
+        /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+        "mul %0, %1    \n\t"
+        /* Move the high 8-bits of the product (r1) back to i */
+        "mov %0, r1    \n\t"
+#endif
+        /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF  */
+        /* "clr __zero_reg__    \n\t" */
+        : "+a" (i)      /* writes to i */
+        : "a"  (scale)  /* uses scale */
+        : "r0", "r1"    /* clobbers r0, r1 */
+    );
     // Return the result
     return i;
 #else
@@ -197,27 +194,28 @@ LIB8STATIC_ALWAYS_INLINE void nscale8_LEAVING_R1_DIRTY( uint8_t& i, fract8 scale
 #endif
 #elif SCALE8_AVRASM == 1
     asm volatile(
-      #if (FASTLED_SCALE8_FIXED==1)
-              // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
-              "mul %0, %1          \n\t"
-              // Add i to r0, possibly setting the carry flag
-              "add r0, %0         \n\t"
-              // load the immediate 0 into i (note, this does _not_ touch any flags)
-              "ldi %0, 0x00       \n\t"
-              // walk and chew gum at the same time
-              "adc %0, r1          \n\t"
-      #else
-         /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
-         "mul %0, %1    \n\t"
-         /* Move the high 8-bits of the product (r1) back to i */
-         "mov %0, r1    \n\t"
-      #endif
-         /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
-         /* "clr __zero_reg__    \n\t" */
-
-         : "+a" (i)      /* writes to i */
-         : "a"  (scale)  /* uses scale */
-         : "r0", "r1"    /* clobbers r0, r1 */ );
+#if (FASTLED_SCALE8_FIXED==1)
+        // Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0
+        "mul %0, %1          \n\t"
+        // Add i to r0, possibly setting the carry flag
+        "add r0, %0         \n\t"
+        // load the immediate 0 into i (note, this does _not_ touch any flags)
+        "ldi %0, 0x00       \n\t"
+        // walk and chew gum at the same time
+        "adc %0, r1          \n\t"
+#else
+        /* Multiply 8-bit i * 8-bit scale, giving 16-bit r1,r0 */
+        "mul %0, %1    \n\t"
+        /* Move the high 8-bits of the product (r1) back to i */
+        "mov %0, r1    \n\t"
+#endif
+        /* R1 IS LEFT DIRTY HERE; YOU MUST ZERO IT OUT YOURSELF */
+        /* "clr __zero_reg__    \n\t" */
+
+        : "+a" (i)      /* writes to i */
+        : "a"  (scale)  /* uses scale */
+        : "r0", "r1"    /* clobbers r0, r1 */
+    );
 #else
 #error "No implementation for nscale8_LEAVING_R1_DIRTY available."
 #endif
@@ -246,8 +244,8 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract
         "L_%=: \n\t"
         : [j] "+a" (j)
         : [i] "a" (i), [scale] "a" (scale)
-        : "r0", "r1");
-
+        : "r0", "r1"
+    );
     return j;
     // uint8_t nonzeroscale = (scale != 0) ? 1 : 0;
     // asm volatile(
@@ -258,11 +256,9 @@ LIB8STATIC_ALWAYS_INLINE uint8_t scale8_video_LEAVING_R1_DIRTY( uint8_t i, fract
     //      "      add %0, %2       \n"
     //      "      clr __zero_reg__ \n"
     //      "L_%=:                  \n"
-
     //      : "+a" (i)
     //      : "a" (scale), "a" (nonzeroscale)
     //      : "r0", "r1");
-
     // // Return the result
     // return i;
 #else
@@ -289,7 +285,8 @@ LIB8STATIC_ALWAYS_INLINE void nscale8_video_LEAVING_R1_DIRTY( uint8_t & i, fract
         "L_%=: \n\t"
         : [i] "+a" (i)
         : [scale] "a" (scale)
-        : "r0", "r1");
+        : "r0", "r1"
+    );
 #else
 #error "No implementation for scale8_video_LEAVING_R1_DIRTY available."
 #endif
@@ -427,50 +424,50 @@ LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
 #if FASTLED_SCALE8_FIXED == 1
     uint16_t result = 0;
     asm volatile(
-                 // result.A = HighByte( (i.A x scale) + i.A )
-                 "  mul %A[i], %[scale]                 \n\t"
-                 "  add r0, %A[i]                       \n\t"
-            //   "  adc r1, [zero]                      \n\t"
-            //   "  mov %A[result], r1                  \n\t"
-                 "  adc %A[result], r1                  \n\t"
-                 
-                 // result.A-B += i.B x scale
-                 "  mul %B[i], %[scale]                 \n\t"
-                 "  add %A[result], r0                  \n\t"
-                 "  adc %B[result], r1                  \n\t"
-
-                 // cleanup r1
-                 "  clr __zero_reg__                    \n\t"
-                 
-                 // result.A-B += i.B
-                 "  add %A[result], %B[i]               \n\t"
-                 "  adc %B[result], __zero_reg__        \n\t"
-
-                 : [result] "+r" (result)
-                 : [i] "r" (i), [scale] "r" (scale)
-                 : "r0", "r1"
-                 );
+        // result.A = HighByte( (i.A x scale) + i.A )
+        "  mul %A[i], %[scale]                 \n\t"
+        "  add r0, %A[i]                       \n\t"
+        //   "  adc r1, [zero]                      \n\t"
+        //   "  mov %A[result], r1                  \n\t"
+        "  adc %A[result], r1                  \n\t"
+        
+        // result.A-B += i.B x scale
+        "  mul %B[i], %[scale]                 \n\t"
+        "  add %A[result], r0                  \n\t"
+        "  adc %B[result], r1                  \n\t"
+
+        // cleanup r1
+        "  clr __zero_reg__                    \n\t"
+        
+        // result.A-B += i.B
+        "  add %A[result], %B[i]               \n\t"
+        "  adc %B[result], __zero_reg__        \n\t"
+
+        : [result] "+r" (result)
+        : [i] "r" (i), [scale] "r" (scale)
+        : "r0", "r1"
+    );
     return result;
 #else
     uint16_t result = 0;
     asm volatile(
-         // result.A = HighByte(i.A x j )
-         "  mul %A[i], %[scale]                 \n\t"
-         "  mov %A[result], r1                  \n\t"
-         //"  clr %B[result]                      \n\t"
-
-         // result.A-B += i.B x j
-         "  mul %B[i], %[scale]                 \n\t"
-         "  add %A[result], r0                  \n\t"
-         "  adc %B[result], r1                  \n\t"
-
-         // cleanup r1
-         "  clr __zero_reg__                    \n\t"
-
-         : [result] "+r" (result)
-         : [i] "r" (i), [scale] "r" (scale)
-         : "r0", "r1"
-         );
+        // result.A = HighByte(i.A x j )
+        "  mul %A[i], %[scale]                 \n\t"
+        "  mov %A[result], r1                  \n\t"
+        //"  clr %B[result]                      \n\t"
+
+        // result.A-B += i.B x j
+        "  mul %B[i], %[scale]                 \n\t"
+        "  add %A[result], r0                  \n\t"
+        "  adc %B[result], r1                  \n\t"
+
+        // cleanup r1
+        "  clr __zero_reg__                    \n\t"
+
+        : [result] "+r" (result)
+        : [i] "r" (i), [scale] "r" (scale)
+        : "r0", "r1"
+    );
     return result;
 #endif
 #else
@@ -503,137 +500,137 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
     // will be zero, which is not what we want.
     uint32_t result;
     asm volatile(
-                 // result.A-B  = i.A x scale.A
-                 "  mul %A[i], %A[scale]                 \n\t"
-                 //  save results...
-                 // basic idea:
-                 //"  mov %A[result], r0                 \n\t"
-                 //"  mov %B[result], r1                 \n\t"
-                 // which can be written as...
-                 "  movw %A[result], r0                   \n\t"
-                 // Because we're going to add i.A-B to
-                 // result.A-D, we DO need to keep both
-                 // the r0 and r1 portions of the product
-                 // UNlike in the 'unfixed scale8' version.
-                 // So the movw here is needed.
-                 : [result] "=r" (result)
-                 : [i] "r" (i),
-                 [scale] "r" (scale)
-                 : "r0", "r1"
-                 );
-    
-    asm volatile(
-                 // result.C-D  = i.B x scale.B
-                 "  mul %B[i], %B[scale]                 \n\t"
-                 //"  mov %C[result], r0                 \n\t"
-                 //"  mov %D[result], r1                 \n\t"
-                 "  movw %C[result], r0                   \n\t"
-                 : [result] "+r" (result)
-                 : [i] "r" (i),
-                 [scale] "r" (scale)
-                 : "r0", "r1"
-                 );
+        // result.A-B  = i.A x scale.A
+        "  mul %A[i], %A[scale]                 \n\t"
+        //  save results...
+        // basic idea:
+        //"  mov %A[result], r0                 \n\t"
+        //"  mov %B[result], r1                 \n\t"
+        // which can be written as...
+        "  movw %A[result], r0                   \n\t"
+        // Because we're going to add i.A-B to
+        // result.A-D, we DO need to keep both
+        // the r0 and r1 portions of the product
+        // UNlike in the 'unfixed scale8' version.
+        // So the movw here is needed.
+        : [result] "=r" (result)
+        : [i] "r" (i),
+        [scale] "r" (scale)
+        : "r0", "r1"
+    );
     
-    const uint8_t  zero = 0;
     asm volatile(
-                 // result.B-D += i.B x scale.A
-                 "  mul %B[i], %A[scale]                 \n\t"
-                 
-                 "  add %B[result], r0                   \n\t"
-                 "  adc %C[result], r1                   \n\t"
-                 "  adc %D[result], %[zero]              \n\t"
-                 
-                 // result.B-D += i.A x scale.B
-                 "  mul %A[i], %B[scale]                 \n\t"
-                 
-                 "  add %B[result], r0                   \n\t"
-                 "  adc %C[result], r1                   \n\t"
-                 "  adc %D[result], %[zero]              \n\t"
-                 
-                 // cleanup r1
-                 "  clr r1                               \n\t"
-                 
-                 : [result] "+r" (result)
-                 : [i] "r" (i),
-                 [scale] "r" (scale),
-                 [zero] "r" (zero)
-                 : "r0", "r1"
-                 );
+        // result.C-D  = i.B x scale.B
+        "  mul %B[i], %B[scale]                 \n\t"
+        //"  mov %C[result], r0                 \n\t"
+        //"  mov %D[result], r1                 \n\t"
+        "  movw %C[result], r0                   \n\t"
+        : [result] "+r" (result)
+        : [i] "r" (i),
+        [scale] "r" (scale)
+        : "r0", "r1"
+);
+
+const uint8_t  zero = 0;
+asm volatile(
+        // result.B-D += i.B x scale.A
+        "  mul %B[i], %A[scale]                 \n\t"
+        
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+        
+        // result.B-D += i.A x scale.B
+        "  mul %A[i], %B[scale]                 \n\t"
+        
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+        
+        // cleanup r1
+        "  clr r1                               \n\t"
+        
+        : [result] "+r" (result)
+        : [i] "r" (i),
+        [scale] "r" (scale),
+        [zero] "r" (zero)
+        : "r0", "r1"
+    );
 
     asm volatile(
-                 // result.A-D += i.A-B
-                 "  add %A[result], %A[i]                \n\t"
-                 "  adc %B[result], %B[i]                \n\t"
-                 "  adc %C[result], %[zero]              \n\t"
-                 "  adc %D[result], %[zero]              \n\t"
-                 : [result] "+r" (result)
-                 : [i] "r" (i),
-                 [zero] "r" (zero)
-                 );
+        // result.A-D += i.A-B
+        "  add %A[result], %A[i]                \n\t"
+        "  adc %B[result], %B[i]                \n\t"
+        "  adc %C[result], %[zero]              \n\t"
+        "  adc %D[result], %[zero]              \n\t"
+        : [result] "+r" (result)
+        : [i] "r" (i),
+        [zero] "r" (zero)
+    );
     
     result = result >> 16;
     return result;
 #else
     uint32_t result;
     asm volatile(
-                 // result.A-B  = i.A x scale.A
-                 "  mul %A[i], %A[scale]                 \n\t"
-                 //  save results...
-                 // basic idea:
-                 //"  mov %A[result], r0                 \n\t"
-                 //"  mov %B[result], r1                 \n\t"
-                 // which can be written as...
-                 "  movw %A[result], r0                   \n\t"
-                 // We actually don't need to do anything with r0,
-                 // as result.A is never used again here, so we
-                 // could just move the high byte, but movw is
-                 // one clock cycle, just like mov, so might as
-                 // well, in case we want to use this code for
-                 // a generic 16x16 multiply somewhere.
-
-                 : [result] "=r" (result)
-                 : [i] "r" (i),
-                   [scale] "r" (scale)
-                 : "r0", "r1"
-                 );
+        // result.A-B  = i.A x scale.A
+        "  mul %A[i], %A[scale]                 \n\t"
+        //  save results...
+        // basic idea:
+        //"  mov %A[result], r0                 \n\t"
+        //"  mov %B[result], r1                 \n\t"
+        // which can be written as...
+        "  movw %A[result], r0                   \n\t"
+        // We actually don't need to do anything with r0,
+        // as result.A is never used again here, so we
+        // could just move the high byte, but movw is
+        // one clock cycle, just like mov, so might as
+        // well, in case we want to use this code for
+        // a generic 16x16 multiply somewhere.
+
+        : [result] "=r" (result)
+        : [i] "r" (i),
+        [scale] "r" (scale)
+        : "r0", "r1"
+    );
 
     asm volatile(
-                 // result.C-D  = i.B x scale.B
-                 "  mul %B[i], %B[scale]                 \n\t"
-                 //"  mov %C[result], r0                 \n\t"
-                 //"  mov %D[result], r1                 \n\t"
-                 "  movw %C[result], r0                   \n\t"
-                 : [result] "+r" (result)
-                 : [i] "r" (i),
-                   [scale] "r" (scale)
-                 : "r0", "r1"
-                 );
+        // result.C-D  = i.B x scale.B
+        "  mul %B[i], %B[scale]                 \n\t"
+        //"  mov %C[result], r0                 \n\t"
+        //"  mov %D[result], r1                 \n\t"
+        "  movw %C[result], r0                   \n\t"
+        : [result] "+r" (result)
+        : [i] "r" (i),
+        [scale] "r" (scale)
+        : "r0", "r1"
+    );
 
     const uint8_t  zero = 0;
     asm volatile(
-                 // result.B-D += i.B x scale.A
-                 "  mul %B[i], %A[scale]                 \n\t"
+        // result.B-D += i.B x scale.A
+        "  mul %B[i], %A[scale]                 \n\t"
 
-                 "  add %B[result], r0                   \n\t"
-                 "  adc %C[result], r1                   \n\t"
-                 "  adc %D[result], %[zero]              \n\t"
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
 
-                 // result.B-D += i.A x scale.B
-                 "  mul %A[i], %B[scale]                 \n\t"
+        // result.B-D += i.A x scale.B
+        "  mul %A[i], %B[scale]                 \n\t"
 
-                 "  add %B[result], r0                   \n\t"
-                 "  adc %C[result], r1                   \n\t"
-                 "  adc %D[result], %[zero]              \n\t"
+        "  add %B[result], r0                   \n\t"
+        "  adc %C[result], r1                   \n\t"
+        "  adc %D[result], %[zero]              \n\t"
 
-                 // cleanup r1
-                 "  clr r1                               \n\t"
+        // cleanup r1
+        "  clr r1                               \n\t"
 
-                 : [result] "+r" (result)
-                 : [i] "r" (i),
-                   [scale] "r" (scale),
-                   [zero] "r" (zero)
-                 : "r0", "r1"
-                 );
+        : [result] "+r" (result)
+        : [i] "r" (i),
+        [scale] "r" (scale),
+        [zero] "r" (zero)
+        : "r0", "r1"
+    );
 
     result = result >> 16;
     return result;
diff --git a/lib8tion/trig8.h b/lib8tion/trig8.h
index 4907c6ff30..6e08407549 100644
--- a/lib8tion/trig8.h
+++ b/lib8tion/trig8.h
@@ -161,10 +161,10 @@ LIB8STATIC uint8_t  sin8_avr( uint8_t theta)
     uint8_t offset = theta;
 
     asm volatile(
-                 "sbrc %[theta],6         \n\t"
-                 "com  %[offset]           \n\t"
-                 : [theta] "+r" (theta), [offset] "+r" (offset)
-                 );
+        "sbrc %[theta],6         \n\t"
+        "com  %[offset]           \n\t"
+        : [theta] "+r" (theta), [offset] "+r" (offset)
+    );
 
     offset &= 0x3F; // 0..63
 
@@ -185,18 +185,18 @@ LIB8STATIC uint8_t  sin8_avr( uint8_t theta)
     uint8_t mx;
     uint8_t xr1;
     asm volatile(
-                 "mul %[m16],%[secoffset]   \n\t"
-                 "mov %[mx],r0              \n\t"
-                 "mov %[xr1],r1             \n\t"
-                 "eor  r1, r1               \n\t"
-                 "swap %[mx]                \n\t"
-                 "andi %[mx],0x0F           \n\t"
-                 "swap %[xr1]               \n\t"
-                 "andi %[xr1], 0xF0         \n\t"
-                 "or   %[mx], %[xr1]        \n\t"
-                 : [mx] "=d" (mx), [xr1] "=d" (xr1)
-                 : [m16] "d" (m16), [secoffset] "d" (secoffset)
-                 );
+        "mul %[m16],%[secoffset]   \n\t"
+        "mov %[mx],r0              \n\t"
+        "mov %[xr1],r1             \n\t"
+        "eor  r1, r1               \n\t"
+        "swap %[mx]                \n\t"
+        "andi %[mx],0x0F           \n\t"
+        "swap %[xr1]               \n\t"
+        "andi %[xr1], 0xF0         \n\t"
+        "or   %[mx], %[xr1]        \n\t"
+        : [mx] "=d" (mx), [xr1] "=d" (xr1)
+        : [m16] "d" (m16), [secoffset] "d" (secoffset)
+    );
 
     int8_t y = mx + b;
     if( theta & 0x80 ) y = -y;
diff --git a/noise.cpp b/noise.cpp
index 7d42d64dce..2963c4a413 100644
--- a/noise.cpp
+++ b/noise.cpp
@@ -6,21 +6,24 @@ FASTLED_NAMESPACE_BEGIN
 
 #define P(x) FL_PGM_READ_BYTE_NEAR(p + x)
 
-FL_PROGMEM static uint8_t const p[] = { 151,160,137,91,90,15,
-   131,13,201,95,96,53,194,233,7,225,140,36,103,30,69,142,8,99,37,240,21,10,23,
-   190, 6,148,247,120,234,75,0,26,197,62,94,252,219,203,117,35,11,32,57,177,33,
-   88,237,149,56,87,174,20,125,136,171,168, 68,175,74,165,71,134,139,48,27,166,
-   77,146,158,231,83,111,229,122,60,211,133,230,220,105,92,41,55,46,245,40,244,
-   102,143,54, 65,25,63,161, 1,216,80,73,209,76,132,187,208, 89,18,169,200,196,
-   135,130,116,188,159,86,164,100,109,198,173,186, 3,64,52,217,226,250,124,123,
-   5,202,38,147,118,126,255,82,85,212,207,206,59,227,47,16,58,17,182,189,28,42,
-   223,183,170,213,119,248,152, 2,44,154,163, 70,221,153,101,155,167, 43,172,9,
-   129,22,39,253, 19,98,108,110,79,113,224,232,178,185, 112,104,218,246,97,228,
-   251,34,242,193,238,210,144,12,191,179,162,241, 81,51,145,235,249,14,239,107,
-   49,192,214, 31,181,199,106,157,184, 84,204,176,115,121,50,45,127, 4,150,254,
-   138,236,205,93,222,114,67,29,24,72,243,141,128,195,78,66,215,61,156,180,151
-   };
-
+FL_PROGMEM static uint8_t const p[] = {
+    151, 160, 137,  91,  90,  15, 131,  13, 201,  95,  96,  53, 194, 233,   7, 225,
+    140,  36, 103,  30,  69, 142,   8,  99,  37, 240,  21,  10,  23, 190,   6, 148,
+    247, 120, 234,  75,   0,  26, 197,  62,  94, 252, 219, 203, 117,  35,  11,  32,
+     57, 177,  33,  88, 237, 149,  56,  87, 174,  20, 125, 136, 171, 168,  68, 175,
+     74, 165,  71, 134, 139,  48,  27, 166,  77, 146, 158, 231,  83, 111, 229, 122,
+     60, 211, 133, 230, 220, 105,  92,  41,  55,  46, 245,  40, 244, 102, 143,  54,
+     65,  25,  63, 161,   1, 216,  80,  73, 209,  76, 132, 187, 208,  89,  18, 169,
+    200, 196, 135, 130, 116, 188, 159,  86, 164, 100, 109, 198, 173, 186,   3,  64,
+     52, 217, 226, 250, 124, 123,   5, 202,  38, 147, 118, 126, 255,  82,  85, 212,
+    207, 206,  59, 227,  47,  16,  58,  17, 182, 189,  28,  42, 223, 183, 170, 213,
+    119, 248, 152,   2,  44, 154, 163,  70, 221, 153, 101, 155, 167,  43, 172,   9,
+    129,  22,  39, 253,  19,  98, 108, 110,  79, 113, 224, 232, 178, 185, 112, 104,
+    218, 246,  97, 228, 251,  34, 242, 193, 238, 210, 144,  12, 191, 179, 162, 241,
+     81,  51, 145, 235, 249,  14, 239, 107,  49, 192, 214,  31, 181, 199, 106, 157,
+    184,  84, 204, 176, 115, 121,  50,  45, 127,   4, 150, 254, 138, 236, 205,  93,
+    222, 114,  67,  29,  24,  72, 243, 141, 128, 195,  78,  66, 215,  61, 156, 180,
+    151};
 
 #if FASTLED_NOISE_ALLOW_AVERAGE_TO_OVERFLOW == 1
 #define AVG15(U,V) (((U)+(V)) >> 1)
@@ -74,55 +77,55 @@ static int16_t inline __attribute__((always_inline))  avg15_inline_avr_mul( int1
 #endif
 static int16_t inline __attribute__((always_inline))  grad16(uint8_t hash, int16_t x, int16_t y, int16_t z) {
 #if 0
-  switch(hash & 0xF) {
-    case  0: return (( x) + ( y))>>1;
-    case  1: return ((-x) + ( y))>>1;
-    case  2: return (( x) + (-y))>>1;
-    case  3: return ((-x) + (-y))>>1;
-    case  4: return (( x) + ( z))>>1;
-    case  5: return ((-x) + ( z))>>1;
-    case  6: return (( x) + (-z))>>1;
-    case  7: return ((-x) + (-z))>>1;
-    case  8: return (( y) + ( z))>>1;
-    case  9: return ((-y) + ( z))>>1;
-    case 10: return (( y) + (-z))>>1;
-    case 11: return ((-y) + (-z))>>1;
-    case 12: return (( y) + ( x))>>1;
-    case 13: return ((-y) + ( z))>>1;
-    case 14: return (( y) + (-x))>>1;
-    case 15: return ((-y) + (-z))>>1;
-  }
+    switch(hash & 0xF) {
+        case  0: return (( x) + ( y))>>1;
+        case  1: return ((-x) + ( y))>>1;
+        case  2: return (( x) + (-y))>>1;
+        case  3: return ((-x) + (-y))>>1;
+        case  4: return (( x) + ( z))>>1;
+        case  5: return ((-x) + ( z))>>1;
+        case  6: return (( x) + (-z))>>1;
+        case  7: return ((-x) + (-z))>>1;
+        case  8: return (( y) + ( z))>>1;
+        case  9: return ((-y) + ( z))>>1;
+        case 10: return (( y) + (-z))>>1;
+        case 11: return ((-y) + (-z))>>1;
+        case 12: return (( y) + ( x))>>1;
+        case 13: return ((-y) + ( z))>>1;
+        case 14: return (( y) + (-x))>>1;
+        case 15: return ((-y) + (-z))>>1;
+    }
 #else
-  hash = hash&15;
-  int16_t u = hash<8?x:y;
-  int16_t v = hash<4?y:hash==12||hash==14?x:z;
-  if(hash&1) { u = -u; }
-  if(hash&2) { v = -v; }
+    hash = hash&15;
+    int16_t u = hash<8?x:y;
+    int16_t v = hash<4?y:hash==12||hash==14?x:z;
+    if(hash&1) { u = -u; }
+    if(hash&2) { v = -v; }
 
-  return AVG15(u,v);
+    return AVG15(u,v);
 #endif
 }
 
 static int16_t inline __attribute__((always_inline)) grad16(uint8_t hash, int16_t x, int16_t y) {
-  hash = hash & 7;
-  int16_t u,v;
-  if(hash < 4) { u = x; v = y; } else { u = y; v = x; }
-  if(hash&1) { u = -u; }
-  if(hash&2) { v = -v; }
+    hash = hash & 7;
+    int16_t u,v;
+    if(hash < 4) { u = x; v = y; } else { u = y; v = x; }
+    if(hash&1) { u = -u; }
+    if(hash&2) { v = -v; }
 
-  return AVG15(u,v);
+    return AVG15(u,v);
 }
 
 static int16_t inline __attribute__((always_inline)) grad16(uint8_t hash, int16_t x) {
-  hash = hash & 15;
-  int16_t u,v;
-  if(hash > 8) { u=x;v=x; }
-  else if(hash < 4) { u=x;v=1; }
-  else { u=1;v=x; }
-  if(hash&1) { u = -u; }
-  if(hash&2) { v = -v; }
+    hash = hash & 15;
+    int16_t u,v;
+    if(hash > 8) { u=x;v=x; }
+    else if(hash < 4) { u=x;v=1; }
+    else { u=1;v=x; }
+    if(hash&1) { u = -u; }
+    if(hash&2) { v = -v; }
 
-  return AVG15(u,v);
+    return AVG15(u,v);
 }
 
 // selectBasedOnHashBit performs this:
@@ -150,115 +153,115 @@ static int8_t inline __attribute__((always_inline)) selectBasedOnHashBit(uint8_t
 
 static int8_t  inline __attribute__((always_inline)) grad8(uint8_t hash, int8_t x, int8_t y, int8_t z) {
 #if 0
-  switch(hash & 0xF) {
-    case  0: return (( x) + ( y))>>1;
-    case  1: return ((-x) + ( y))>>1;
-    case  2: return (( x) + (-y))>>1;
-    case  3: return ((-x) + (-y))>>1;
-    case  4: return (( x) + ( z))>>1;
-    case  5: return ((-x) + ( z))>>1;
-    case  6: return (( x) + (-z))>>1;
-    case  7: return ((-x) + (-z))>>1;
-    case  8: return (( y) + ( z))>>1;
-    case  9: return ((-y) + ( z))>>1;
-    case 10: return (( y) + (-z))>>1;
-    case 11: return ((-y) + (-z))>>1;
-    case 12: return (( y) + ( x))>>1;
-    case 13: return ((-y) + ( z))>>1;
-    case 14: return (( y) + (-x))>>1;
-    case 15: return ((-y) + (-z))>>1;
-  }
+    switch(hash & 0xF) {
+        case  0: return (( x) + ( y))>>1;
+        case  1: return ((-x) + ( y))>>1;
+        case  2: return (( x) + (-y))>>1;
+        case  3: return ((-x) + (-y))>>1;
+        case  4: return (( x) + ( z))>>1;
+        case  5: return ((-x) + ( z))>>1;
+        case  6: return (( x) + (-z))>>1;
+        case  7: return ((-x) + (-z))>>1;
+        case  8: return (( y) + ( z))>>1;
+        case  9: return ((-y) + ( z))>>1;
+        case 10: return (( y) + (-z))>>1;
+        case 11: return ((-y) + (-z))>>1;
+        case 12: return (( y) + ( x))>>1;
+        case 13: return ((-y) + ( z))>>1;
+        case 14: return (( y) + (-x))>>1;
+        case 15: return ((-y) + (-z))>>1;
+    }
 #else
 
-  hash &= 0xF;
+    hash &= 0xF;
 
-  int8_t u, v;
-  //u = (hash&8)?y:x;
-  u = selectBasedOnHashBit( hash, 3, y, x);
+    int8_t u, v;
+    //u = (hash&8)?y:x;
+    u = selectBasedOnHashBit( hash, 3, y, x);
 
 #if 1
-  v = hash<4?y:hash==12||hash==14?x:z;
+    v = hash<4?y:hash==12||hash==14?x:z;
 #else
-  // Verbose version for analysis; generates idenitical code.
-  if( hash < 4) { // 00 01 02 03
-	  v = y;
-  } else {
-      if( hash==12 || hash==14) { // 0C 0E
-		  v = x;
-	  } else {
-		  v = z; // 04 05 06 07   08 09 0A 0B   0D  0F
-	  }
-  }
+    // Verbose version for analysis; generates idenitical code.
+    if( hash < 4) { // 00 01 02 03
+        v = y;
+    } else {
+        if( hash==12 || hash==14) { // 0C 0E
+            v = x;
+        } else {
+            v = z; // 04 05 06 07   08 09 0A 0B   0D  0F
+        }
+    }
 #endif
 
-  if(hash&1) { u = -u; }
-  if(hash&2) { v = -v; }
+    if(hash&1) { u = -u; }
+    if(hash&2) { v = -v; }
 
-  return avg7(u,v);
+    return avg7(u,v);
 #endif
 }
 
 static int8_t inline __attribute__((always_inline)) grad8(uint8_t hash, int8_t x, int8_t y)
 {
-  // since the tests below can be done bit-wise on the bottom
-  // three bits, there's no need to mask off the higher bits
-  //  hash = hash & 7;
+    // since the tests below can be done bit-wise on the bottom
+    // three bits, there's no need to mask off the higher bits
+    //  hash = hash & 7;
 
-  int8_t u,v;
-  if( hash & 4) {
-	  u = y; v = x;
-  } else {
-	  u = x; v = y;
-  }
+    int8_t u,v;
+    if( hash & 4) {
+        u = y; v = x;
+    } else {
+        u = x; v = y;
+    }
 
-  if(hash&1) { u = -u; }
-  if(hash&2) { v = -v; }
+    if(hash&1) { u = -u; }
+    if(hash&2) { v = -v; }
 
-  return avg7(u,v);
+    return avg7(u,v);
 }
 
 static int8_t inline __attribute__((always_inline)) grad8(uint8_t hash, int8_t x)
 {
-  // since the tests below can be done bit-wise on the bottom
-  // four bits, there's no need to mask off the higher bits
-  //	hash = hash & 15;
+    // since the tests below can be done bit-wise on the bottom
+    // four bits, there's no need to mask off the higher bits
+    //	hash = hash & 15;
 
-  int8_t u,v;
-  if(hash & 8) {
-	  u=x; v=x;
-  } else {
-	if(hash & 4) {
-		u=1; v=x;
-	} else {
-		u=x; v=1;
-	}
-  }
+    int8_t u,v;
+    if(hash & 8) {
+        u=x; v=x;
+    } else {
+    if(hash & 4) {
+        u=1; v=x;
+    } else {
+        u=x; v=1;
+    }
+    }
 
-  if(hash&1) { u = -u; }
-  if(hash&2) { v = -v; }
+    if(hash&1) { u = -u; }
+    if(hash&2) { v = -v; }
 
-  return avg7(u,v);
+    return avg7(u,v);
 }
 
 
 #ifdef FADE_12
 uint16_t logfade12(uint16_t val) {
-  return scale16(val,val)>>4;
+    return scale16(val,val)>>4;
 }
 
 static int16_t inline __attribute__((always_inline)) lerp15by12( int16_t a, int16_t b, fract16 frac)
 {
-   //if(1) return (lerp(frac,a,b));
+    //if(1) return (lerp(frac,a,b));
     int16_t result;
     if( b > a) {
         uint16_t delta = b - a;
         uint16_t scaled = scale16(delta,frac<<4);
         result = a + scaled;
-     } else {
+    } else {
         uint16_t delta = a - b;
         uint16_t scaled = scale16(delta,frac<<4);
-      result = a - scaled;
-     }
+        result = a - scaled;
+    }
     return result;
 }
 #endif
@@ -285,183 +288,183 @@ static int8_t inline __attribute__((always_inline)) lerp7by8( int8_t a, int8_t b
 
 int16_t inoise16_raw(uint32_t x, uint32_t y, uint32_t z)
 {
-  // Find the unit cube containing the point
-  uint8_t X = (x>>16)&0xFF;
-  uint8_t Y = (y>>16)&0xFF;
-  uint8_t Z = (z>>16)&0xFF;
-
-  // Hash cube corner coordinates
-  uint8_t A = P(X)+Y;
-  uint8_t AA = P(A)+Z;
-  uint8_t AB = P(A+1)+Z;
-  uint8_t B = P(X+1)+Y;
-  uint8_t BA = P(B) + Z;
-  uint8_t BB = P(B+1)+Z;
-
-  // Get the relative position of the point in the cube
-  uint16_t u = x & 0xFFFF;
-  uint16_t v = y & 0xFFFF;
-  uint16_t w = z & 0xFFFF;
-
-  // Get a signed version of the above for the grad function
-  int16_t xx = (u >> 1) & 0x7FFF;
-  int16_t yy = (v >> 1) & 0x7FFF;
-  int16_t zz = (w >> 1) & 0x7FFF;
-  uint16_t N = 0x8000L;
-
-  u = EASE16(u); v = EASE16(v); w = EASE16(w);
-
-  // skip the log fade adjustment for the moment, otherwise here we would
-  // adjust fade values for u,v,w
-  int16_t X1 = LERP(grad16(P(AA), xx, yy, zz), grad16(P(BA), xx - N, yy, zz), u);
-  int16_t X2 = LERP(grad16(P(AB), xx, yy-N, zz), grad16(P(BB), xx - N, yy - N, zz), u);
-  int16_t X3 = LERP(grad16(P(AA+1), xx, yy, zz-N), grad16(P(BA+1), xx - N, yy, zz-N), u);
-  int16_t X4 = LERP(grad16(P(AB+1), xx, yy-N, zz-N), grad16(P(BB+1), xx - N, yy - N, zz - N), u);
-
-  int16_t Y1 = LERP(X1,X2,v);
-  int16_t Y2 = LERP(X3,X4,v);
+    // Find the unit cube containing the point
+    uint8_t X = (x>>16)&0xFF;
+    uint8_t Y = (y>>16)&0xFF;
+    uint8_t Z = (z>>16)&0xFF;
+
+    // Hash cube corner coordinates
+    uint8_t A = P(X)+Y;
+    uint8_t AA = P(A)+Z;
+    uint8_t AB = P(A+1)+Z;
+    uint8_t B = P(X+1)+Y;
+    uint8_t BA = P(B) + Z;
+    uint8_t BB = P(B+1)+Z;
+
+    // Get the relative position of the point in the cube
+    uint16_t u = x & 0xFFFF;
+    uint16_t v = y & 0xFFFF;
+    uint16_t w = z & 0xFFFF;
+
+    // Get a signed version of the above for the grad function
+    int16_t xx = (u >> 1) & 0x7FFF;
+    int16_t yy = (v >> 1) & 0x7FFF;
+    int16_t zz = (w >> 1) & 0x7FFF;
+    uint16_t N = 0x8000L;
+
+    u = EASE16(u); v = EASE16(v); w = EASE16(w);
+
+    // skip the log fade adjustment for the moment, otherwise here we would
+    // adjust fade values for u,v,w
+    int16_t X1 = LERP(grad16(P(AA), xx, yy, zz), grad16(P(BA), xx - N, yy, zz), u);
+    int16_t X2 = LERP(grad16(P(AB), xx, yy-N, zz), grad16(P(BB), xx - N, yy - N, zz), u);
+    int16_t X3 = LERP(grad16(P(AA+1), xx, yy, zz-N), grad16(P(BA+1), xx - N, yy, zz-N), u);
+    int16_t X4 = LERP(grad16(P(AB+1), xx, yy-N, zz-N), grad16(P(BB+1), xx - N, yy - N, zz - N), u);
+
+    int16_t Y1 = LERP(X1,X2,v);
+    int16_t Y2 = LERP(X3,X4,v);
+
+    int16_t ans = LERP(Y1,Y2,w);
 
-  int16_t ans = LERP(Y1,Y2,w);
-
-  return ans;
+    return ans;
 }
 
 uint16_t inoise16(uint32_t x, uint32_t y, uint32_t z) {
-  int32_t ans = inoise16_raw(x,y,z);
-  ans = ans + 19052L;
-  uint32_t pan = ans;
-  // pan = (ans * 220L) >> 7.  That's the same as:
-  // pan = (ans * 440L) >> 8.  And this way avoids a 7X four-byte shift-loop on AVR.
-  // Identical math, except for the highest bit, which we don't care about anyway,
-  // since we're returning the 'middle' 16 out of a 32-bit value anyway.
-  pan *= 440L;
-  return (pan>>8);
+    int32_t ans = inoise16_raw(x,y,z);
+    ans = ans + 19052L;
+    uint32_t pan = ans;
+    // pan = (ans * 220L) >> 7.  That's the same as:
+    // pan = (ans * 440L) >> 8.  And this way avoids a 7X four-byte shift-loop on AVR.
+    // Identical math, except for the highest bit, which we don't care about anyway,
+    // since we're returning the 'middle' 16 out of a 32-bit value anyway.
+    pan *= 440L;
+    return (pan>>8);
 
-  // // return scale16by8(pan,220)<<1;
-  // return ((inoise16_raw(x,y,z)+19052)*220)>>7;
-  // return scale16by8(inoise16_raw(x,y,z)+19052,220)<<1;
+    // // return scale16by8(pan,220)<<1;
+    // return ((inoise16_raw(x,y,z)+19052)*220)>>7;
+    // return scale16by8(inoise16_raw(x,y,z)+19052,220)<<1;
 }
 
 int16_t inoise16_raw(uint32_t x, uint32_t y)
 {
-  // Find the unit cube containing the point
-  uint8_t X = x>>16;
-  uint8_t Y = y>>16;
+    // Find the unit cube containing the point
+    uint8_t X = x>>16;
+    uint8_t Y = y>>16;
 
-  // Hash cube corner coordinates
-  uint8_t A = P(X)+Y;
-  uint8_t AA = P(A);
-  uint8_t AB = P(A+1);
-  uint8_t B = P(X+1)+Y;
-  uint8_t BA = P(B);
-  uint8_t BB = P(B+1);
+    // Hash cube corner coordinates
+    uint8_t A = P(X)+Y;
+    uint8_t AA = P(A);
+    uint8_t AB = P(A+1);
+    uint8_t B = P(X+1)+Y;
+    uint8_t BA = P(B);
+    uint8_t BB = P(B+1);
 
-  // Get the relative position of the point in the cube
-  uint16_t u = x & 0xFFFF;
-  uint16_t v = y & 0xFFFF;
+    // Get the relative position of the point in the cube
+    uint16_t u = x & 0xFFFF;
+    uint16_t v = y & 0xFFFF;
 
-  // Get a signed version of the above for the grad function
-  int16_t xx = (u >> 1) & 0x7FFF;
-  int16_t yy = (v >> 1) & 0x7FFF;
-  uint16_t N = 0x8000L;
+    // Get a signed version of the above for the grad function
+    int16_t xx = (u >> 1) & 0x7FFF;
+    int16_t yy = (v >> 1) & 0x7FFF;
+    uint16_t N = 0x8000L;
 
-  u = EASE16(u); v = EASE16(v);
+    u = EASE16(u); v = EASE16(v);
 
-  int16_t X1 = LERP(grad16(P(AA), xx, yy), grad16(P(BA), xx - N, yy), u);
-  int16_t X2 = LERP(grad16(P(AB), xx, yy-N), grad16(P(BB), xx - N, yy - N), u);
+    int16_t X1 = LERP(grad16(P(AA), xx, yy), grad16(P(BA), xx - N, yy), u);
+    int16_t X2 = LERP(grad16(P(AB), xx, yy-N), grad16(P(BB), xx - N, yy - N), u);
 
-  int16_t ans = LERP(X1,X2,v);
+    int16_t ans = LERP(X1,X2,v);
 
-  return ans;
+    return ans;
 }
 
 uint16_t inoise16(uint32_t x, uint32_t y) {
-  int32_t ans = inoise16_raw(x,y);
-  ans = ans + 17308L;
-  uint32_t pan = ans;
-  // pan = (ans * 242L) >> 7.  That's the same as:
-  // pan = (ans * 484L) >> 8.  And this way avoids a 7X four-byte shift-loop on AVR.
-  // Identical math, except for the highest bit, which we don't care about anyway,
-  // since we're returning the 'middle' 16 out of a 32-bit value anyway.
-  pan *= 484L;
-  return (pan>>8);
-    
-  // return (uint32_t)(((int32_t)inoise16_raw(x,y)+(uint32_t)17308)*242)>>7;
-  // return scale16by8(inoise16_raw(x,y)+17308,242)<<1;
+    int32_t ans = inoise16_raw(x,y);
+    ans = ans + 17308L;
+    uint32_t pan = ans;
+    // pan = (ans * 242L) >> 7.  That's the same as:
+    // pan = (ans * 484L) >> 8.  And this way avoids a 7X four-byte shift-loop on AVR.
+    // Identical math, except for the highest bit, which we don't care about anyway,
+    // since we're returning the 'middle' 16 out of a 32-bit value anyway.
+    pan *= 484L;
+    return (pan>>8);
+
+    // return (uint32_t)(((int32_t)inoise16_raw(x,y)+(uint32_t)17308)*242)>>7;
+    // return scale16by8(inoise16_raw(x,y)+17308,242)<<1;
 }
 
 int16_t inoise16_raw(uint32_t x)
 {
-  // Find the unit cube containing the point
-  uint8_t X = x>>16;
+    // Find the unit cube containing the point
+    uint8_t X = x>>16;
 
-  // Hash cube corner coordinates
-  uint8_t A = P(X);
-  uint8_t AA = P(A);
-  uint8_t B = P(X+1);
-  uint8_t BA = P(B);
+    // Hash cube corner coordinates
+    uint8_t A = P(X);
+    uint8_t AA = P(A);
+    uint8_t B = P(X+1);
+    uint8_t BA = P(B);
 
-  // Get the relative position of the point in the cube
-  uint16_t u = x & 0xFFFF;
+    // Get the relative position of the point in the cube
+    uint16_t u = x & 0xFFFF;
 
-  // Get a signed version of the above for the grad function
-  int16_t xx = (u >> 1) & 0x7FFF;
-  uint16_t N = 0x8000L;
+    // Get a signed version of the above for the grad function
+    int16_t xx = (u >> 1) & 0x7FFF;
+    uint16_t N = 0x8000L;
 
-  u = EASE16(u);
+    u = EASE16(u);
 
-  int16_t ans = LERP(grad16(P(AA), xx), grad16(P(BA), xx - N), u);
+    int16_t ans = LERP(grad16(P(AA), xx), grad16(P(BA), xx - N), u);
 
-  return ans;
+    return ans;
 }
 
 uint16_t inoise16(uint32_t x) {
-  return ((uint32_t)((int32_t)inoise16_raw(x) + 17308L)) << 1;
+    return ((uint32_t)((int32_t)inoise16_raw(x) + 17308L)) << 1;
 }
 
 int8_t inoise8_raw(uint16_t x, uint16_t y, uint16_t z)
 {
-  // Find the unit cube containing the point
-  uint8_t X = x>>8;
-  uint8_t Y = y>>8;
-  uint8_t Z = z>>8;
-
-  // Hash cube corner coordinates
-  uint8_t A = P(X)+Y;
-  uint8_t AA = P(A)+Z;
-  uint8_t AB = P(A+1)+Z;
-  uint8_t B = P(X+1)+Y;
-  uint8_t BA = P(B) + Z;
-  uint8_t BB = P(B+1)+Z;
-
-  // Get the relative position of the point in the cube
-  uint8_t u = x;
-  uint8_t v = y;
-  uint8_t w = z;
-
-  // Get a signed version of the above for the grad function
-  int8_t xx = ((uint8_t)(x)>>1) & 0x7F;
-  int8_t yy = ((uint8_t)(y)>>1) & 0x7F;
-  int8_t zz = ((uint8_t)(z)>>1) & 0x7F;
-  uint8_t N = 0x80;
-
-  u = EASE8(u); v = EASE8(v); w = EASE8(w);
-    
-  int8_t X1 = lerp7by8(grad8(P(AA), xx, yy, zz), grad8(P(BA), xx - N, yy, zz), u);
-  int8_t X2 = lerp7by8(grad8(P(AB), xx, yy-N, zz), grad8(P(BB), xx - N, yy - N, zz), u);
-  int8_t X3 = lerp7by8(grad8(P(AA+1), xx, yy, zz-N), grad8(P(BA+1), xx - N, yy, zz-N), u);
-  int8_t X4 = lerp7by8(grad8(P(AB+1), xx, yy-N, zz-N), grad8(P(BB+1), xx - N, yy - N, zz - N), u);
+    // Find the unit cube containing the point
+    uint8_t X = x>>8;
+    uint8_t Y = y>>8;
+    uint8_t Z = z>>8;
+
+    // Hash cube corner coordinates
+    uint8_t A = P(X)+Y;
+    uint8_t AA = P(A)+Z;
+    uint8_t AB = P(A+1)+Z;
+    uint8_t B = P(X+1)+Y;
+    uint8_t BA = P(B) + Z;
+    uint8_t BB = P(B+1)+Z;
+
+    // Get the relative position of the point in the cube
+    uint8_t u = x;
+    uint8_t v = y;
+    uint8_t w = z;
+
+    // Get a signed version of the above for the grad function
+    int8_t xx = ((uint8_t)(x)>>1) & 0x7F;
+    int8_t yy = ((uint8_t)(y)>>1) & 0x7F;
+    int8_t zz = ((uint8_t)(z)>>1) & 0x7F;
+    uint8_t N = 0x80;
+
+    u = EASE8(u); v = EASE8(v); w = EASE8(w);
+
+    int8_t X1 = lerp7by8(grad8(P(AA), xx, yy, zz), grad8(P(BA), xx - N, yy, zz), u);
+    int8_t X2 = lerp7by8(grad8(P(AB), xx, yy-N, zz), grad8(P(BB), xx - N, yy - N, zz), u);
+    int8_t X3 = lerp7by8(grad8(P(AA+1), xx, yy, zz-N), grad8(P(BA+1), xx - N, yy, zz-N), u);
+    int8_t X4 = lerp7by8(grad8(P(AB+1), xx, yy-N, zz-N), grad8(P(BB+1), xx - N, yy - N, zz - N), u);
+
+    int8_t Y1 = lerp7by8(X1,X2,v);
+    int8_t Y2 = lerp7by8(X3,X4,v);
+
+    int8_t ans = lerp7by8(Y1,Y2,w);
 
-  int8_t Y1 = lerp7by8(X1,X2,v);
-  int8_t Y2 = lerp7by8(X3,X4,v);
-
-  int8_t ans = lerp7by8(Y1,Y2,w);
-
-  return ans;
+    return ans;
 }
 
 uint8_t inoise8(uint16_t x, uint16_t y, uint16_t z) {
-//  return scale8(76+(inoise8_raw(x,y,z)),215)<<1;
+    //return scale8(76+(inoise8_raw(x,y,z)),215)<<1;
     int8_t n = inoise8_raw( x, y, z);  // -64..+64
     n+= 64;                            //   0..128
     uint8_t ans = qadd8( n, n);        //   0..255
@@ -470,36 +473,36 @@ uint8_t inoise8(uint16_t x, uint16_t y, uint16_t z) {
 
 int8_t inoise8_raw(uint16_t x, uint16_t y)
 {
-  // Find the unit cube containing the point
-  uint8_t X = x>>8;
-  uint8_t Y = y>>8;
+    // Find the unit cube containing the point
+    uint8_t X = x>>8;
+    uint8_t Y = y>>8;
 
-  // Hash cube corner coordinates
-  uint8_t A = P(X)+Y;
-  uint8_t AA = P(A);
-  uint8_t AB = P(A+1);
-  uint8_t B = P(X+1)+Y;
-  uint8_t BA = P(B);
-  uint8_t BB = P(B+1);
+    // Hash cube corner coordinates
+    uint8_t A = P(X)+Y;
+    uint8_t AA = P(A);
+    uint8_t AB = P(A+1);
+    uint8_t B = P(X+1)+Y;
+    uint8_t BA = P(B);
+    uint8_t BB = P(B+1);
 
-  // Get the relative position of the point in the cube
-  uint8_t u = x;
-  uint8_t v = y;
+    // Get the relative position of the point in the cube
+    uint8_t u = x;
+    uint8_t v = y;
 
-  // Get a signed version of the above for the grad function
-  int8_t xx = ((uint8_t)(x)>>1) & 0x7F;
-  int8_t yy = ((uint8_t)(y)>>1) & 0x7F;
-  uint8_t N = 0x80;
+    // Get a signed version of the above for the grad function
+    int8_t xx = ((uint8_t)(x)>>1) & 0x7F;
+    int8_t yy = ((uint8_t)(y)>>1) & 0x7F;
+    uint8_t N = 0x80;
 
-  u = EASE8(u); v = EASE8(v);
-  
-  int8_t X1 = lerp7by8(grad8(P(AA), xx, yy), grad8(P(BA), xx - N, yy), u);
-  int8_t X2 = lerp7by8(grad8(P(AB), xx, yy-N), grad8(P(BB), xx - N, yy - N), u);
+    u = EASE8(u); v = EASE8(v);
 
-  int8_t ans = lerp7by8(X1,X2,v);
+    int8_t X1 = lerp7by8(grad8(P(AA), xx, yy), grad8(P(BA), xx - N, yy), u);
+    int8_t X2 = lerp7by8(grad8(P(AB), xx, yy-N), grad8(P(BB), xx - N, yy - N), u);
 
-  return ans;
-  // return scale8((70+(ans)),234)<<1;
+    int8_t ans = lerp7by8(X1,X2,v);
+
+    return ans;
+    // return scale8((70+(ans)),234)<<1;
 }
 
 
diff --git a/pixelset.h b/pixelset.h
index c9272ef2d9..b8488c2ced 100644
--- a/pixelset.h
+++ b/pixelset.h
@@ -13,279 +13,279 @@
 template<class PIXEL_TYPE>
 class CPixelView {
 public:
-  const int8_t  dir;
-  const int   len;
-  PIXEL_TYPE * const leds;
-  PIXEL_TYPE * const end_pos;
+    const int8_t  dir;
+    const int   len;
+    PIXEL_TYPE * const leds;
+    PIXEL_TYPE * const end_pos;
 
 public:
+    /// PixelSet copy constructor
+    inline CPixelView(const CPixelView & other) : dir(other.dir), len(other.len), leds(other.leds), end_pos(other.end_pos) {}
+
+    /// pixelset constructor for a pixel set starting at the given PIXEL_TYPE* and going for _len leds.  Note that the length
+    /// can be backwards, creating a PixelSet that walks backwards over the data
+    /// @param leds point to the raw led data
+    /// @param len how many leds in this set
+    inline CPixelView(PIXEL_TYPE *_leds, int _len) : dir(_len < 0 ? -1 : 1), len(_len), leds(_leds), end_pos(_leds + _len) {}
+
+    /// PixelSet constructor for the given set of leds, with start and end boundaries.  Note that start can be after
+    /// end, resulting in a set that will iterate backwards
+    /// @param leds point to the raw led data
+    /// @param start the start index of the leds for this array
+    /// @param end the end index of the leds for this array
+    inline CPixelView(PIXEL_TYPE *_leds, int _start, int _end) : dir(((_end-_start)<0) ? -1 : 1), len((_end - _start) + dir), leds(_leds + _start), end_pos(_leds + _start + len) {}
+
+    /// Get the size of this set
+    /// @return the size of the set
+    int size() { return abs(len); }
+
+    /// Whether or not this set goes backwards
+    /// @return whether or not the set is backwards
+    bool reversed() { return len < 0; }
+
+    /// do these sets point to the same thing (note, this is different from the contents of the set being the same)
+    bool operator==(const CPixelView & rhs) const { return leds == rhs.leds && len == rhs.len && dir == rhs.dir; }
+
+    /// do these sets point to the different things (note, this is different from the contents of the set being the same)
+    bool operator!=(const CPixelView & rhs) const { return leds != rhs.leds || len != rhs.len || dir != rhs.dir; }
+
+    /// access a single element in this set, just like an array operator
+    inline PIXEL_TYPE & operator[](int x) const { if(dir & 0x80) { return leds[-x]; } else { return leds[x]; } }
+
+    /// Access an inclusive subset of the leds in this set.  Note that start can be greater than end, which will
+    /// result in a reverse ordering for many functions (useful for mirroring)
+    /// @param start the first element from this set for the new subset
+    /// @param end the last element for the new subset
+    inline CPixelView operator()(int start, int end) { return CPixelView(leds, start, end); }
+
+    /// Access an inclusive subset of the leds in this set, starting from the first.
+    /// @param end the last element for the new subset
+    /// Not sure i want this? inline CPixelView operator()(int end) { return CPixelView(leds, 0, end); }
+
+    /// Return the reverse ordering of this set
+    inline CPixelView operator-() { return CPixelView(leds, len - dir, 0); }
+
+    /// Return a pointer to the first element in this set
+    inline operator PIXEL_TYPE* () const { return leds; }
+
+    /// Assign the passed in color to all elements in this set
+    /// @param color the new color for the elements in the set
+    inline CPixelView & operator=(const PIXEL_TYPE & color) {
+        for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) = color; }
+        return *this;
+    }
+
+
+    void dump() const {
+        /**
+        Serial.print("len: "); Serial.print(len); Serial.print(", dir:"); Serial.print((int)dir);
+        Serial.print(", range:"); Serial.print((uint32_t)leds); Serial.print("-"); Serial.print((uint32_t)end_pos);
+        Serial.print(", diff:"); Serial.print((int32_t)(end_pos - leds));
+        Serial.println("");
+        **/
+    }
 
-  /// PixelSet copy constructor
-  inline CPixelView(const CPixelView & other) : dir(other.dir), len(other.len), leds(other.leds), end_pos(other.end_pos) {}
-
-  /// pixelset constructor for a pixel set starting at the given PIXEL_TYPE* and going for _len leds.  Note that the length
-  /// can be backwards, creating a PixelSet that walks backwards over the data
-  /// @param leds point to the raw led data
-  /// @param len how many leds in this set
-  inline CPixelView(PIXEL_TYPE *_leds, int _len) : dir(_len < 0 ? -1 : 1), len(_len), leds(_leds), end_pos(_leds + _len) {}
-
-  /// PixelSet constructor for the given set of leds, with start and end boundaries.  Note that start can be after
-  /// end, resulting in a set that will iterate backwards
-  /// @param leds point to the raw led data
-  /// @param start the start index of the leds for this array
-  /// @param end the end index of the leds for this array
-  inline CPixelView(PIXEL_TYPE *_leds, int _start, int _end) : dir(((_end-_start)<0) ? -1 : 1), len((_end - _start) + dir), leds(_leds + _start), end_pos(_leds + _start + len) {}
-
-  /// Get the size of this set
-  /// @return the size of the set
-  int size() { return abs(len); }
-
-  /// Whether or not this set goes backwards
-  /// @return whether or not the set is backwards
-  bool reversed() { return len < 0; }
-
-  /// do these sets point to the same thing (note, this is different from the contents of the set being the same)
-  bool operator==(const CPixelView & rhs) const { return leds == rhs.leds && len == rhs.len && dir == rhs.dir; }
-
-  /// do these sets point to the different things (note, this is different from the contents of the set being the same)
-  bool operator!=(const CPixelView & rhs) const { return leds != rhs.leds || len != rhs.len || dir != rhs.dir; }
-
-  /// access a single element in this set, just like an array operator
-  inline PIXEL_TYPE & operator[](int x) const { if(dir & 0x80) { return leds[-x]; } else { return leds[x]; } }
-
-  /// Access an inclusive subset of the leds in this set.  Note that start can be greater than end, which will
-  /// result in a reverse ordering for many functions (useful for mirroring)
-  /// @param start the first element from this set for the new subset
-  /// @param end the last element for the new subset
-  inline CPixelView operator()(int start, int end) { return CPixelView(leds, start, end); }
-
-  /// Access an inclusive subset of the leds in this set, starting from the first.
-  /// @param end the last element for the new subset
-  /// Not sure i want this? inline CPixelView operator()(int end) { return CPixelView(leds, 0, end); }
-
-  /// Return the reverse ordering of this set
-  inline CPixelView operator-() { return CPixelView(leds, len - dir, 0); }
-
-  /// Return a pointer to the first element in this set
-  inline operator PIXEL_TYPE* () const { return leds; }
-
-  /// Assign the passed in color to all elements in this set
-  /// @param color the new color for the elements in the set
-  inline CPixelView & operator=(const PIXEL_TYPE & color) {
-    for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) = color; }
-    return *this;
-  }
-
-
-  void dump() const {
-/**
-    Serial.print("len: "); Serial.print(len); Serial.print(", dir:"); Serial.print((int)dir);
-    Serial.print(", range:"); Serial.print((uint32_t)leds); Serial.print("-"); Serial.print((uint32_t)end_pos);
-    Serial.print(", diff:"); Serial.print((int32_t)(end_pos - leds));
-    Serial.println("");
- **/
- }
-
-  /// Copy the contents of the passed in set to our set.  Note if one set is smaller than the other, only the
-  /// smallest number of items will be copied over.
-  inline CPixelView & operator=(const CPixelView & rhs) {
-    for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) {
-      (*pixel) = (*rhspixel);
+    /// Copy the contents of the passed in set to our set.  Note if one set is smaller than the other, only the
+    /// smallest number of items will be copied over.
+    inline CPixelView & operator=(const CPixelView & rhs) {
+        for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) {
+            (*pixel) = (*rhspixel);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  /// @name modification/scaling operators
-  //@{
-  /// Add the passed in value to r,g, b for all the pixels in this set
-  inline CPixelView & addToRGB(uint8_t inc) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) += inc; } return *this; }
-  /// Add every pixel in the other set to this set
-  inline CPixelView & operator+=(CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) += (*rhspixel); } return *this; }
-
-  /// Subtract the passed in value from r,g,b for all pixels in this set
-  inline CPixelView & subFromRGB(uint8_t inc) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) -= inc; } return *this; }
-  /// Subtract every pixel in the other set from this set
-  inline CPixelView & operator-=(CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) -= (*rhspixel); } return *this; }
-
-  /// Increment every pixel value in this set
-  inline CPixelView & operator++() { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)++; } return *this; }
-  /// Increment every pixel value in this set
-  inline CPixelView & operator++(int DUMMY_ARG) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)++; } return *this; }
-
-  /// Decrement every pixel value in this set
-  inline CPixelView & operator--() { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)--; } return *this; }
-  /// Decrement every pixel value in this set
-  inline CPixelView & operator--(int DUMMY_ARG) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)--; } return *this; }
-
-  /// Divide every led by the given value
-  inline CPixelView & operator/=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) /= d; } return *this; }
-  /// Shift every led in this set right by the given number of bits
-  inline CPixelView & operator>>=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) >>= d; } return *this; }
-  /// Multiply every led in this set by the given value
-  inline CPixelView & operator*=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) *= d; } return *this; }
-
-  /// Scale every led by the given scale
-  inline CPixelView & nscale8_video(uint8_t scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8_video(scaledown); } return *this;}
-  /// Scale down every led by the given scale
-  inline CPixelView & operator%=(uint8_t scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8_video(scaledown); } return *this; }
-  /// Fade every led down by the given scale
-  inline CPixelView & fadeLightBy(uint8_t fadefactor) { return nscale8_video(255 - fadefactor); }
-
-  /// Scale every led by the given scale
-  inline CPixelView & nscale8(uint8_t scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8(scaledown); } return *this; }
-  /// Scale every led by the given scale
-  inline CPixelView & nscale8(PIXEL_TYPE & scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8(scaledown); } return *this; }
-  /// Scale every led in this set by every led in the other set
-  inline CPixelView & nscale8(CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel).nscale8((*rhspixel)); } return *this; }
-
-  /// Fade every led down by the given scale
-  inline CPixelView & fadeToBlackBy(uint8_t fade) { return nscale8(255 - fade); }
-
-  /// Apply the PIXEL_TYPE |= operator to every pixel in this set with the given PIXEL_TYPE value (bringing each channel to the higher of the two values)
-  inline CPixelView & operator|=(const PIXEL_TYPE & rhs) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) |= rhs; } return *this; }
-  /// Apply the PIXEL_TYPE |= operator to every pixel in this set with every pixel in the passed in set
-  inline CPixelView & operator|=(const CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) |= (*rhspixel); } return *this; }
-  /// Apply the PIXEL_TYPE |= operator to every pixel in this set
-  inline CPixelView & operator|=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) |= d; } return *this; }
-
-  /// Apply the PIXEL_TYPE &= operator to every pixel in this set with the given PIXEL_TYPE value (bringing each channel down to the lower of the two values)
-  inline CPixelView & operator&=(const PIXEL_TYPE & rhs) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) &= rhs; } return *this; }
-  /// Apply the PIXEL_TYPE &= operator to every pixel in this set with every pixel in the passed in set
-  inline CPixelView & operator&=(const CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) &= (*rhspixel); } return *this; }
-  /// APply the PIXEL_TYPE &= operator to every pixel in this set with the passed in value
-  inline CPixelView & operator&=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) &= d; } return *this; }
-  //@}
-
-  /// Returns whether or not any leds in this set are non-zero
-  inline operator bool() { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { if((*pixel)) return true; } return false; }
-
-  // Color util functions
-  inline CPixelView & fill_solid(const PIXEL_TYPE & color) { *this = color; return *this; }
-  inline CPixelView & fill_solid(const CHSV & color) { if(dir>0) { *this = color; return *this; } }
-
-  inline CPixelView & fill_rainbow(uint8_t initialhue, uint8_t deltahue=5) {
-    if(dir >= 0) {
-      ::fill_rainbow(leds,len,initialhue,deltahue);
-    } else {
-      ::fill_rainbow(leds+len+1,-len,initialhue,deltahue);
+
+    /// @name modification/scaling operators
+    //@{
+    /// Add the passed in value to r,g, b for all the pixels in this set
+    inline CPixelView & addToRGB(uint8_t inc) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) += inc; } return *this; }
+    /// Add every pixel in the other set to this set
+    inline CPixelView & operator+=(CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) += (*rhspixel); } return *this; }
+
+    /// Subtract the passed in value from r,g,b for all pixels in this set
+    inline CPixelView & subFromRGB(uint8_t inc) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) -= inc; } return *this; }
+    /// Subtract every pixel in the other set from this set
+    inline CPixelView & operator-=(CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) -= (*rhspixel); } return *this; }
+
+    /// Increment every pixel value in this set
+    inline CPixelView & operator++() { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)++; } return *this; }
+    /// Increment every pixel value in this set
+    inline CPixelView & operator++(int DUMMY_ARG) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)++; } return *this; }
+
+    /// Decrement every pixel value in this set
+    inline CPixelView & operator--() { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)--; } return *this; }
+    /// Decrement every pixel value in this set
+    inline CPixelView & operator--(int DUMMY_ARG) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel)--; } return *this; }
+
+    /// Divide every led by the given value
+    inline CPixelView & operator/=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) /= d; } return *this; }
+    /// Shift every led in this set right by the given number of bits
+    inline CPixelView & operator>>=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) >>= d; } return *this; }
+    /// Multiply every led in this set by the given value
+    inline CPixelView & operator*=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) *= d; } return *this; }
+
+    /// Scale every led by the given scale
+    inline CPixelView & nscale8_video(uint8_t scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8_video(scaledown); } return *this;}
+    /// Scale down every led by the given scale
+    inline CPixelView & operator%=(uint8_t scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8_video(scaledown); } return *this; }
+    /// Fade every led down by the given scale
+    inline CPixelView & fadeLightBy(uint8_t fadefactor) { return nscale8_video(255 - fadefactor); }
+
+    /// Scale every led by the given scale
+    inline CPixelView & nscale8(uint8_t scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8(scaledown); } return *this; }
+    /// Scale every led by the given scale
+    inline CPixelView & nscale8(PIXEL_TYPE & scaledown) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel).nscale8(scaledown); } return *this; }
+    /// Scale every led in this set by every led in the other set
+    inline CPixelView & nscale8(CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel).nscale8((*rhspixel)); } return *this; }
+
+    /// Fade every led down by the given scale
+    inline CPixelView & fadeToBlackBy(uint8_t fade) { return nscale8(255 - fade); }
+
+    /// Apply the PIXEL_TYPE |= operator to every pixel in this set with the given PIXEL_TYPE value (bringing each channel to the higher of the two values)
+    inline CPixelView & operator|=(const PIXEL_TYPE & rhs) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) |= rhs; } return *this; }
+    /// Apply the PIXEL_TYPE |= operator to every pixel in this set with every pixel in the passed in set
+    inline CPixelView & operator|=(const CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) |= (*rhspixel); } return *this; }
+    /// Apply the PIXEL_TYPE |= operator to every pixel in this set
+    inline CPixelView & operator|=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) |= d; } return *this; }
+
+    /// Apply the PIXEL_TYPE &= operator to every pixel in this set with the given PIXEL_TYPE value (bringing each channel down to the lower of the two values)
+    inline CPixelView & operator&=(const PIXEL_TYPE & rhs) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) &= rhs; } return *this; }
+    /// Apply the PIXEL_TYPE &= operator to every pixel in this set with every pixel in the passed in set
+    inline CPixelView & operator&=(const CPixelView & rhs) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { (*pixel) &= (*rhspixel); } return *this; }
+    /// APply the PIXEL_TYPE &= operator to every pixel in this set with the passed in value
+    inline CPixelView & operator&=(uint8_t d) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { (*pixel) &= d; } return *this; }
+    //@}
+
+    /// Returns whether or not any leds in this set are non-zero
+    inline operator bool() { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { if((*pixel)) return true; } return false; }
+
+    // Color util functions
+    inline CPixelView & fill_solid(const PIXEL_TYPE & color) { *this = color; return *this; }
+    inline CPixelView & fill_solid(const CHSV & color) { if(dir>0) { *this = color; return *this; } }
+
+    inline CPixelView & fill_rainbow(uint8_t initialhue, uint8_t deltahue=5) {
+        if(dir >= 0) {
+            ::fill_rainbow(leds,len,initialhue,deltahue);
+        } else {
+            ::fill_rainbow(leds+len+1,-len,initialhue,deltahue);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & fill_gradient(const CHSV & startcolor, const CHSV & endcolor, TGradientDirectionCode directionCode  = SHORTEST_HUES) {
-    if(dir >= 0) {
-      ::fill_gradient(leds,len,startcolor, endcolor, directionCode);
-    } else {
-      ::fill_gradient(leds + len + 1, (-len), endcolor, startcolor, directionCode);
+
+    inline CPixelView & fill_gradient(const CHSV & startcolor, const CHSV & endcolor, TGradientDirectionCode directionCode  = SHORTEST_HUES) {
+        if(dir >= 0) {
+            ::fill_gradient(leds,len,startcolor, endcolor, directionCode);
+        } else {
+            ::fill_gradient(leds + len + 1, (-len), endcolor, startcolor, directionCode);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & fill_gradient(const CHSV & c1, const CHSV & c2, const CHSV &  c3, TGradientDirectionCode directionCode = SHORTEST_HUES) {
-    if(dir >= 0) {
-      ::fill_gradient(leds, len, c1, c2, c3, directionCode);
-    } else {
-      ::fill_gradient(leds + len + 1, -len, c3, c2, c1, directionCode);
+
+    inline CPixelView & fill_gradient(const CHSV & c1, const CHSV & c2, const CHSV &  c3, TGradientDirectionCode directionCode = SHORTEST_HUES) {
+        if(dir >= 0) {
+            ::fill_gradient(leds, len, c1, c2, c3, directionCode);
+        } else {
+            ::fill_gradient(leds + len + 1, -len, c3, c2, c1, directionCode);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & fill_gradient(const CHSV & c1, const CHSV & c2, const CHSV & c3, const CHSV & c4, TGradientDirectionCode directionCode = SHORTEST_HUES) {
-    if(dir >= 0) {
-      ::fill_gradient(leds, len, c1, c2, c3, c4, directionCode);
-    } else {
-      ::fill_gradient(leds + len + 1, -len, c4, c3, c2, c1, directionCode);
+
+    inline CPixelView & fill_gradient(const CHSV & c1, const CHSV & c2, const CHSV & c3, const CHSV & c4, TGradientDirectionCode directionCode = SHORTEST_HUES) {
+        if(dir >= 0) {
+            ::fill_gradient(leds, len, c1, c2, c3, c4, directionCode);
+        } else {
+            ::fill_gradient(leds + len + 1, -len, c4, c3, c2, c1, directionCode);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & fill_gradient_RGB(const PIXEL_TYPE & startcolor, const PIXEL_TYPE & endcolor, TGradientDirectionCode directionCode  = SHORTEST_HUES) {
-    if(dir >= 0) {
-      ::fill_gradient_RGB(leds,len,startcolor, endcolor);
-    } else {
-      ::fill_gradient_RGB(leds + len + 1, (-len), endcolor, startcolor);
+
+    inline CPixelView & fill_gradient_RGB(const PIXEL_TYPE & startcolor, const PIXEL_TYPE & endcolor, TGradientDirectionCode directionCode  = SHORTEST_HUES) {
+        if(dir >= 0) {
+            ::fill_gradient_RGB(leds,len,startcolor, endcolor);
+        } else {
+            ::fill_gradient_RGB(leds + len + 1, (-len), endcolor, startcolor);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & fill_gradient_RGB(const PIXEL_TYPE & c1, const PIXEL_TYPE & c2, const PIXEL_TYPE &  c3) {
-    if(dir >= 0) {
-      ::fill_gradient_RGB(leds, len, c1, c2, c3);
-    } else {
-      ::fill_gradient_RGB(leds + len + 1, -len, c3, c2, c1);
+
+    inline CPixelView & fill_gradient_RGB(const PIXEL_TYPE & c1, const PIXEL_TYPE & c2, const PIXEL_TYPE &  c3) {
+        if(dir >= 0) {
+            ::fill_gradient_RGB(leds, len, c1, c2, c3);
+        } else {
+            ::fill_gradient_RGB(leds + len + 1, -len, c3, c2, c1);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & fill_gradient_RGB(const PIXEL_TYPE & c1, const PIXEL_TYPE & c2, const PIXEL_TYPE & c3, const PIXEL_TYPE & c4) {
-    if(dir >= 0) {
-      ::fill_gradient_RGB(leds, len, c1, c2, c3, c4);
-    } else {
-      ::fill_gradient_RGB(leds + len + 1, -len, c4, c3, c2, c1);
+
+    inline CPixelView & fill_gradient_RGB(const PIXEL_TYPE & c1, const PIXEL_TYPE & c2, const PIXEL_TYPE & c3, const PIXEL_TYPE & c4) {
+        if(dir >= 0) {
+            ::fill_gradient_RGB(leds, len, c1, c2, c3, c4);
+        } else {
+            ::fill_gradient_RGB(leds + len + 1, -len, c4, c3, c2, c1);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & nblend(const PIXEL_TYPE & overlay, fract8 amountOfOverlay) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { ::nblend((*pixel), overlay, amountOfOverlay); } return *this; }
-  inline CPixelView & nblend(const CPixelView & rhs, fract8 amountOfOverlay) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { ::nblend((*pixel), (*rhspixel), amountOfOverlay); } return *this; }
-
-  // Note: only bringing in a 1d blur, not sure 2d blur makes sense when looking at sub arrays
-  inline CPixelView & blur1d(fract8 blur_amount) {
-    if(dir >= 0) {
-      ::blur1d(leds, len, blur_amount);
-    } else {
-      ::blur1d(leds + len + 1, -len, blur_amount);
+
+    inline CPixelView & nblend(const PIXEL_TYPE & overlay, fract8 amountOfOverlay) { for(iterator pixel = begin(), _end = end(); pixel != _end; ++pixel) { ::nblend((*pixel), overlay, amountOfOverlay); } return *this; }
+    inline CPixelView & nblend(const CPixelView & rhs, fract8 amountOfOverlay) { for(iterator pixel = begin(), rhspixel = rhs.begin(), _end = end(), rhs_end = rhs.end(); (pixel != _end) && (rhspixel != rhs_end); ++pixel, ++rhspixel) { ::nblend((*pixel), (*rhspixel), amountOfOverlay); } return *this; }
+
+    // Note: only bringing in a 1d blur, not sure 2d blur makes sense when looking at sub arrays
+    inline CPixelView & blur1d(fract8 blur_amount) {
+        if(dir >= 0) {
+            ::blur1d(leds, len, blur_amount);
+        } else {
+            ::blur1d(leds + len + 1, -len, blur_amount);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & napplyGamma_video(float gamma) {
-    if(dir >= 0) {
-      ::napplyGamma_video(leds, len, gamma);
-    } else {
-      ::napplyGamma_video(leds + len + 1, -len, gamma);
+
+    inline CPixelView & napplyGamma_video(float gamma) {
+        if(dir >= 0) {
+            ::napplyGamma_video(leds, len, gamma);
+        } else {
+            ::napplyGamma_video(leds + len + 1, -len, gamma);
+        }
+        return *this;
     }
-    return *this;
-  }
-
-  inline CPixelView & napplyGamma_video(float gammaR, float gammaG, float gammaB) {
-    if(dir >= 0) {
-      ::napplyGamma_video(leds, len, gammaR, gammaG, gammaB);
-    } else {
-      ::napplyGamma_video(leds + len + 1, -len, gammaR, gammaG, gammaB);
+
+    inline CPixelView & napplyGamma_video(float gammaR, float gammaG, float gammaB) {
+        if(dir >= 0) {
+            ::napplyGamma_video(leds, len, gammaR, gammaG, gammaB);
+        } else {
+            ::napplyGamma_video(leds + len + 1, -len, gammaR, gammaG, gammaB);
+        }
+        return *this;
     }
-    return *this;
-  }
 
-  // TODO: Make this a fully specified/proper iterator
-  template <class T>
-  class pixelset_iterator_base {
-    T * leds;
-    const int8_t dir;
-  public:
-    __attribute__((always_inline)) inline pixelset_iterator_base(const pixelset_iterator_base & rhs) : leds(rhs.leds), dir(rhs.dir) {}
-    __attribute__((always_inline)) inline pixelset_iterator_base(T * _leds, const char _dir) : leds(_leds), dir(_dir) {}
+    // TODO: Make this a fully specified/proper iterator
+    template <class T>
+    class pixelset_iterator_base {
+        T * leds;
+        const int8_t dir;
 
-    __attribute__((always_inline)) inline pixelset_iterator_base& operator++() { leds += dir; return *this; }
-    __attribute__((always_inline)) inline pixelset_iterator_base operator++(int) { pixelset_iterator_base tmp(*this); leds += dir; return tmp; }
+    public:
+        __attribute__((always_inline)) inline pixelset_iterator_base(const pixelset_iterator_base & rhs) : leds(rhs.leds), dir(rhs.dir) {}
+        __attribute__((always_inline)) inline pixelset_iterator_base(T * _leds, const char _dir) : leds(_leds), dir(_dir) {}
 
-    __attribute__((always_inline)) inline bool operator==(pixelset_iterator_base & other) const { return leds == other.leds; } // && set==other.set; }
-    __attribute__((always_inline)) inline bool operator!=(pixelset_iterator_base & other) const { return leds != other.leds; } // || set != other.set; }
+        __attribute__((always_inline)) inline pixelset_iterator_base& operator++() { leds += dir; return *this; }
+        __attribute__((always_inline)) inline pixelset_iterator_base operator++(int) { pixelset_iterator_base tmp(*this); leds += dir; return tmp; }
 
-    __attribute__((always_inline)) inline PIXEL_TYPE& operator*() const { return *leds; }
-  };
+        __attribute__((always_inline)) inline bool operator==(pixelset_iterator_base & other) const { return leds == other.leds; } // && set==other.set; }
+        __attribute__((always_inline)) inline bool operator!=(pixelset_iterator_base & other) const { return leds != other.leds; } // || set != other.set; }
 
-  typedef pixelset_iterator_base<PIXEL_TYPE> iterator;
-  typedef pixelset_iterator_base<const PIXEL_TYPE> const_iterator;
+        __attribute__((always_inline)) inline PIXEL_TYPE& operator*() const { return *leds; }
+    };
 
-  iterator begin() { return iterator(leds, dir); }
-  iterator end() { return iterator(end_pos, dir); }
+    typedef pixelset_iterator_base<PIXEL_TYPE> iterator;
+    typedef pixelset_iterator_base<const PIXEL_TYPE> const_iterator;
 
-  iterator begin() const { return iterator(leds, dir); }
-  iterator end() const { return iterator(end_pos, dir); }
+    iterator begin() { return iterator(leds, dir); }
+    iterator end() { return iterator(end_pos, dir); }
 
-  const_iterator cbegin() const { return const_iterator(leds, dir); }
-  const_iterator cend() const { return const_iterator(end_pos, dir); }
+    iterator begin() const { return iterator(leds, dir); }
+    iterator end() const { return iterator(end_pos, dir); }
+
+    const_iterator cbegin() const { return const_iterator(leds, dir); }
+    const_iterator cend() const { return const_iterator(end_pos, dir); }
 };
 
 typedef CPixelView<CRGB> CRGBSet;
@@ -296,10 +296,11 @@ inline CRGB *operator+(const CRGBSet & pixels, int offset) { return (CRGB*)pixel
 
 template<int SIZE>
 class CRGBArray : public CPixelView<CRGB> {
-  CRGB rawleds[SIZE];
+    CRGB rawleds[SIZE];
+
 public:
-  CRGBArray() : CPixelView<CRGB>(rawleds, SIZE) {}
-  using CPixelView::operator=;
+    CRGBArray() : CPixelView<CRGB>(rawleds, SIZE) {}
+    using CPixelView::operator=;
 };
 
 #endif
diff --git a/pixeltypes.h b/pixeltypes.h
index f4e57061e5..b3a3ff6959 100644
--- a/pixeltypes.h
+++ b/pixeltypes.h
@@ -317,10 +317,10 @@ struct CRGB {
     /// right shift each of the channels by a constant
     inline CRGB& operator>>= (uint8_t d)
     {
-      r >>= d;
-      g >>= d;
-      b >>= d;
-      return *this;
+        r >>= d;
+        g >>= d;
+        b >>= d;
+        return *this;
     }
 
     /// multiply each of the channels by a constant,
@@ -450,11 +450,11 @@ struct CRGB {
 
 #if (defined SmartMatrix_h || defined SmartMatrix3_h)
     operator rgb24() const {
-      rgb24 ret;
-      ret.red = r;
-      ret.green = g;
-      ret.blue = b;
-      return ret;
+        rgb24 ret;
+        ret.red = r;
+        ret.green = g;
+        ret.blue = b;
+        return ret;
     }
 #endif
 
@@ -503,25 +503,25 @@ struct CRGB {
     /// return a new CRGB object after performing a linear interpolation between this object and the passed in object
     inline CRGB lerp8( const CRGB& other, fract8 frac) const
     {
-      CRGB ret;
+        CRGB ret;
 
-      ret.r = lerp8by8(r,other.r,frac);
-      ret.g = lerp8by8(g,other.g,frac);
-      ret.b = lerp8by8(b,other.b,frac);
+        ret.r = lerp8by8(r,other.r,frac);
+        ret.g = lerp8by8(g,other.g,frac);
+        ret.b = lerp8by8(b,other.b,frac);
 
-      return ret;
+        return ret;
     }
 
     /// return a new CRGB object after performing a linear interpolation between this object and the passed in object
     inline CRGB lerp16( const CRGB& other, fract16 frac) const
     {
-      CRGB ret;
+        CRGB ret;
 
-      ret.r = lerp16by16(r<<8,other.r<<8,frac)>>8;
-      ret.g = lerp16by16(g<<8,other.g<<8,frac)>>8;
-      ret.b = lerp16by16(b<<8,other.b<<8,frac)>>8;
+        ret.r = lerp16by16(r<<8,other.r<<8,frac)>>8;
+        ret.g = lerp16by16(g<<8,other.g<<8,frac)>>8;
+        ret.b = lerp16by16(b<<8,other.b<<8,frac)>>8;
 
-      return ret;
+        return ret;
     }
 
     /// getParity returns 0 or 1, depending on the
diff --git a/platforms/apollo3/clockless_apollo3.h b/platforms/apollo3/clockless_apollo3.h
index d881eee4ae..fa487c2f76 100644
--- a/platforms/apollo3/clockless_apollo3.h
+++ b/platforms/apollo3/clockless_apollo3.h
@@ -31,7 +31,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
 	typedef typename FastPin<DATA_PIN>::port_t data_t;
 
-  CMinWait<WAIT_TIME> mWait;
+  	CMinWait<WAIT_TIME> mWait;
 
 public:
 	virtual void init() {
@@ -39,86 +39,85 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 		// Configure DATA_PIN for FastGPIO (settings are in fastpin_apollo3.h)
 		FastPin<DATA_PIN>::setOutput();
-    FastPin<DATA_PIN>::lo();
+		FastPin<DATA_PIN>::lo();
 
 		// Make sure the system clock is running at the full 48MHz
-	  am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
+		am_hal_clkgen_control(AM_HAL_CLKGEN_CONTROL_SYSCLK_MAX, 0);
 
 		// Make sure interrupts are enabled
-	  //am_hal_interrupt_master_enable();
+		//am_hal_interrupt_master_enable();
 
-	  // Enable SysTick Interrupts in the NVIC
-	  //NVIC_EnableIRQ(SysTick_IRQn);
+		// Enable SysTick Interrupts in the NVIC
+		//NVIC_EnableIRQ(SysTick_IRQn);
 
 		// SysTick is 24-bit and counts down (not up)
 
-	  // Stop the SysTick (just in case it is already running).
-	  // This clears the ENABLE bit in the SysTick Control and Status Register (SYST_CSR).
-	  // In Ambiq naming convention, the control register is SysTick->CTRL
-	  am_hal_systick_stop();
-
-	  // Call SysTick_Config
-	  // This is defined in core_cm4.h
-	  // It loads the specified LOAD value into the SysTick Reload Value Register (SYST_RVR)
-	  // In Ambiq naming convention, the reload register is SysTick->LOAD
-	  // It sets the SysTick interrupt priority
-	  // It clears the SysTick Current Value Register (SYST_CVR)
-	  // In Ambiq naming convention, the current value register is SysTick->VAL
-	  // Finally it sets these bits in the SysTick Control and Status Register (SYST_CSR):
-	  // CLKSOURCE: SysTick uses the processor clock
-	  // TICKINT: When the count reaches zero, the SysTick exception (interrupt) is changed to pending
-	  // ENABLE: Enables the counter
-	  // SysTick_Config returns 0 if successful. 1 indicates a failure (the LOAD value was invalid).
-	  SysTick_Config(0xFFFFFFUL); // The LOAD value needs to be 24-bit
+		// Stop the SysTick (just in case it is already running).
+		// This clears the ENABLE bit in the SysTick Control and Status Register (SYST_CSR).
+		// In Ambiq naming convention, the control register is SysTick->CTRL
+		am_hal_systick_stop();
+
+		// Call SysTick_Config
+		// This is defined in core_cm4.h
+		// It loads the specified LOAD value into the SysTick Reload Value Register (SYST_RVR)
+		// In Ambiq naming convention, the reload register is SysTick->LOAD
+		// It sets the SysTick interrupt priority
+		// It clears the SysTick Current Value Register (SYST_CVR)
+		// In Ambiq naming convention, the current value register is SysTick->VAL
+		// Finally it sets these bits in the SysTick Control and Status Register (SYST_CSR):
+		// CLKSOURCE: SysTick uses the processor clock
+		// TICKINT: When the count reaches zero, the SysTick exception (interrupt) is changed to pending
+		// ENABLE: Enables the counter
+		// SysTick_Config returns 0 if successful. 1 indicates a failure (the LOAD value was invalid).
+		SysTick_Config(0xFFFFFFUL); // The LOAD value needs to be 24-bit
 	}
 
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
+    	mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
-    }
-    mWait.mark();
-  }
+			sei(); delayMicroseconds(WAIT_TIME); cli();
+			showRGBInternal(pixels);
+   		}
+    	mWait.mark();
+ 	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register uint8_t & b)  {
 		// SysTick counts down (not up) and is 24-bit
 		for(register uint32_t i = BITS-1; i > 0; i--) { // We could speed this up by using Bit Banding
-      while(__am_hal_systick_count() > next_mark) { ; } // Wait for the remainder of this cycle to complete
-			// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
-			// SysTick counts down (not up) and is 24-bit
-			next_mark = (__am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
-			FastPin<DATA_PIN>::hi();
-			if(b&0x80) {
-				// "1 code" = longer pulse width
-	      while((__am_hal_systick_count() - next_mark) > (T3+(3*(F_CPU/24000000)))) { ; }
-        FastPin<DATA_PIN>::lo();
-			} else {
-				// "0 code" = shorter pulse width
-	      while((__am_hal_systick_count() - next_mark) > (T2+T3+(4*(F_CPU/24000000)))) { ; }
-        FastPin<DATA_PIN>::lo();
-			}
-			b <<= 1;
+			while(__am_hal_systick_count() > next_mark) { ; } // Wait for the remainder of this cycle to complete
+				// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
+				// SysTick counts down (not up) and is 24-bit
+				next_mark = (__am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
+				FastPin<DATA_PIN>::hi();
+				if(b&0x80) {
+					// "1 code" = longer pulse width
+					while((__am_hal_systick_count() - next_mark) > (T3+(3*(F_CPU/24000000)))) { ; }
+					FastPin<DATA_PIN>::lo();
+				} else {
+					// "0 code" = shorter pulse width
+					while((__am_hal_systick_count() - next_mark) > (T2+T3+(4*(F_CPU/24000000)))) { ; }
+					FastPin<DATA_PIN>::lo();
+				}
+				b <<= 1;
 		}
 
-    while(__am_hal_systick_count() > next_mark) { ; }// Wait for the remainder of this cycle to complete
+		while(__am_hal_systick_count() > next_mark) { ; }// Wait for the remainder of this cycle to complete
 		// Calculate next_mark (the time of the next DATA_PIN transition) by subtracting T1+T2+T3
 		// SysTick counts down (not up) and is 24-bit
 		next_mark = (__am_hal_systick_count() - (T1+T2+T3)) & 0xFFFFFFUL;
 		FastPin<DATA_PIN>::hi();
-    if(b&0x80) {
+		if(b&0x80) {
 			// "1 code" = longer pulse width
-      while((__am_hal_systick_count() - next_mark) > (T3+(2*(F_CPU/24000000)))) { ; }
-      FastPin<DATA_PIN>::lo();
-    } else {
+			while((__am_hal_systick_count() - next_mark) > (T3+(2*(F_CPU/24000000)))) { ; }
+			FastPin<DATA_PIN>::lo();
+		} else {
 			// "0 code" = shorter pulse width
-      while((__am_hal_systick_count() - next_mark) > (T2+T3+(4*(F_CPU/24000000)))) { ; }
-      FastPin<DATA_PIN>::lo();
-    }
+			while((__am_hal_systick_count() - next_mark) > (T2+T3+(4*(F_CPU/24000000)))) { ; }
+			FastPin<DATA_PIN>::lo();
+		}
 	}
 
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index eb9e453744..28e5b9670d 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -11,30 +11,29 @@ FASTLED_NAMESPACE_BEGIN
 #else
 
 template<uint8_t PIN, uint8_t PAD> class _APOLLO3PIN {
-
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); am_hal_gpio_fastgpio_enable(PAD); }
-  inline static void setInput() { am_hal_gpio_fastgpio_disable(PAD); pinMode(PIN, INPUT); }
+    inline static void setOutput() { pinMode(PIN, OUTPUT); am_hal_gpio_fastgpio_enable(PAD); }
+    inline static void setInput() { am_hal_gpio_fastgpio_disable(PAD); pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PAD); }
-  inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PAD); }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PAD); } else { am_hal_gpio_fastgpio_clr(PAD); } }
+    inline static void hi() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_set(PAD); }
+    inline static void lo() __attribute__ ((always_inline)) { am_hal_gpio_fastgpio_clr(PAD); }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { if(val) { am_hal_gpio_fastgpio_set(PAD); } else { am_hal_gpio_fastgpio_clr(PAD); } }
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if( am_hal_gpio_fastgpio_read(PAD)) { lo(); } else { hi(); } }
+    inline static void toggle() __attribute__ ((always_inline)) { if( am_hal_gpio_fastgpio_read(PAD)) { lo(); } else { hi(); } }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { set(val); }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { set(val); }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return 0; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return NULL; }
-  inline static port_t mask() __attribute__ ((always_inline)) { return 0; }
+    inline static port_t hival() __attribute__ ((always_inline)) { return 0; }
+    inline static port_t loval() __attribute__ ((always_inline)) { return 0; }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { return NULL; }
+    inline static port_t mask() __attribute__ ((always_inline)) { return 0; }
 };
 
 // For the Apollo3 we need to define both the pin number and the associated pad
diff --git a/platforms/arm/common/m0clockless.h b/platforms/arm/common/m0clockless.h
index d5a0cf6f6b..b9ed2ba5ca 100644
--- a/platforms/arm/common/m0clockless.h
+++ b/platforms/arm/common/m0clockless.h
@@ -2,182 +2,182 @@
 #define __INC_M0_CLOCKLESS_H
 
 struct M0ClocklessData {
-  uint8_t d[3];
-  uint8_t e[3];
-  uint8_t adj;
-  uint8_t pad;
-  uint32_t s[3];
+    uint8_t d[3];
+    uint8_t e[3];
+    uint8_t adj;
+    uint8_t pad;
+    uint32_t s[3];
 };
 
 
 template<int HI_OFFSET, int LO_OFFSET, int T1, int T2, int T3, EOrder RGB_ORDER, int WAIT_TIME>int
 showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, uint32_t num_leds, struct M0ClocklessData *pData) {
-  // Lo register variables
-  register uint32_t scratch=0;
-  register struct M0ClocklessData *base = pData;
-  register volatile uint32_t *port = _port;
-  register uint32_t d=0;
-  register uint32_t counter=num_leds;
-  register uint32_t bn=0;
-  register uint32_t b=0;
-  register uint32_t bitmask = _bitmask;
-
-  // high register variable
-  register const uint8_t *leds = _leds;
+    // Lo register variables
+    register uint32_t scratch=0;
+    register struct M0ClocklessData *base = pData;
+    register volatile uint32_t *port = _port;
+    register uint32_t d=0;
+    register uint32_t counter=num_leds;
+    register uint32_t bn=0;
+    register uint32_t b=0;
+    register uint32_t bitmask = _bitmask;
+
+    // high register variable
+    register const uint8_t *leds = _leds;
 #if (FASTLED_SCALE8_FIXED == 1)
-  pData->s[0]++;
-  pData->s[1]++;
-  pData->s[2]++;
+    pData->s[0]++;
+    pData->s[1]++;
+    pData->s[2]++;
 #endif
-  asm __volatile__ (
-    ///////////////////////////////////////////////////////////////////////////
-    //
-    // asm macro definitions - used to assemble the clockless output
-    //
-    ".ifnotdef fl_delay_def;"
+    asm __volatile__ (
+        ///////////////////////////////////////////////////////////////////////////
+        //
+        // asm macro definitions - used to assemble the clockless output
+        //
+        ".ifnotdef fl_delay_def;"
 #ifdef FASTLED_ARM_M0_PLUS
-    "  .set fl_is_m0p, 1;"
-    "  .macro m0pad;"
-    "    nop;"
-    "  .endm;"
+        "  .set fl_is_m0p, 1;"
+        "  .macro m0pad;"
+        "    nop;"
+        "  .endm;"
 #else
-    "  .set fl_is_m0p, 0;"
-    "  .macro m0pad;"
-    "  .endm;"
+        "  .set fl_is_m0p, 0;"
+        "  .macro m0pad;"
+        "  .endm;"
 #endif
-    "  .set fl_delay_def, 1;"
-    "  .set fl_delay_mod, 4;"
-    "  .if fl_is_m0p == 1;"
-    "    .set fl_delay_mod, 3;"
-    "  .endif;"
-    "  .macro fl_delay dtime, reg=r0;"
-    "    .if (\\dtime > 0);"
-    "      .set dcycle, (\\dtime / fl_delay_mod);"
-    "      .set dwork, (dcycle * fl_delay_mod);"
-    "      .set drem, (\\dtime - dwork);"
-    "      .rept (drem);"
-    "        nop;"
-    "      .endr;"
-    "      .if dcycle > 0;"
-    "        mov \\reg, #dcycle;"
-    "        delayloop_\\@:;"
-    "        sub \\reg, #1;"
-    "        bne delayloop_\\@;"
-    "	     .if fl_is_m0p == 0;"
-    "          nop;"
-    "        .endif;"
-    "      .endif;"
-    "    .endif;"
-    "  .endm;"
-
-    "  .macro mod_delay dtime,b1,b2,reg;"
-    "    .set adj, (\\b1 + \\b2);"
-    "    .if adj < \\dtime;"
-    "      .set dtime2, (\\dtime - adj);"
-    "      fl_delay dtime2, \\reg;"
-    "    .endif;"
-    "  .endm;"
-
-    // check the bit and drop the line low if it isn't set
-    "  .macro qlo4 b,bitmask,port,loff	;"
-    "    lsl \\b, #1			;"
-    "    bcs skip_\\@			;"
-    "    str \\bitmask, [\\port, \\loff]	;"
-    "    skip_\\@:			;"
-    "    m0pad;"
-    "  .endm				;"
-
-    // set the pin hi or low (determined by the offset passed in )
-    "  .macro qset2 bitmask,port,loff;"
-    "    str \\bitmask, [\\port, \\loff];"
-    "    m0pad;"
-    "  .endm;"
-
-    // Load up the next led byte to work with, put it in bn
-    "  .macro loadleds3 leds, bn, rled, scratch;"
-    "    mov \\scratch, \\leds;"
-    "    ldrb \\bn, [\\scratch, \\rled];"
-    "  .endm;"
-
-    // check whether or not we should dither
-    "  .macro loaddither7 bn,d,base,rdither;"
-    "    ldrb \\d, [\\base, \\rdither];"
-    "    lsl \\d, #24;"  //; shift high for the qadd w/bn
-    "    lsl \\bn, #24;" //; shift high for the qadd w/d
-    "    bne chkskip_\\@;" //; if bn==0, clear d;"
-    "    eor \\d, \\d;" //; clear d;"
-    "    m0pad;"
-    "    chkskip_\\@:;"
-    "  .endm;"
-
-    // Do the qadd8 for dithering -- there's two versions of this.  The m0 version
-    // takes advantage of the 3 cycle branch to do two things after the branch,
-    // while keeping timing constant.  The m0+, however, branches in 2 cycles, so
-    // we have to work around that a bit more.  This is one of the few times
-    // where the m0 will actually be _more_ efficient than the m0+
-    "  .macro dither5 bn,d;"
-    "  .syntax unified;"
-    "    .if fl_is_m0p == 0;"
-    "      adds \\bn, \\d;"         // do the add
-    "      bcc dither5_1_\\@;"
-    "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
-    "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
-    "      dither5_1_\\@:;"
-    "      nop;"                    // nop to keep timing in line
-    "    .else;"
-    "      adds \\bn, \\d;"         // do the add"
-    "      bcc dither5_2_\\@;"
-    "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
-    "      dither5_2_\\@:;"
-    "      bcc dither5_3_\\@;"
-    "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
-    "      dither5_3_\\@:;"
-    "    .endif;"
-    "  .syntax divided;"
-    "  .endm;"
-
-    // Do our scaling
-    "  .macro scale4 bn, base, scale, scratch;"
-    "    ldr \\scratch, [\\base, \\scale];"
-    "    lsr \\bn, \\bn, #24;"                  // bring bn back down to its low 8 bits
-    "    mul \\bn, \\scratch;"                  // do the multiply
-    "  .endm;"
-
-    // swap bn into b
-    "  .macro swapbbn1 b,bn;"
-    "    lsl \\b, \\bn, #16;"  // put the 8 bits we want for output high
-    "  .endm;"
-
-    // adjust the dithering value for the next time around (load e from memory
-    // to do the math)
-    "  .macro adjdither7 base,d,rled,eoffset,scratch;"
-    "    ldrb \\d, [\\base, \\rled];"
-    "    ldrb \\scratch,[\\base,\\eoffset];"          // load e
-    "    .syntax unified;"
-    "    subs \\d, \\scratch, \\d;"                   // d=e-d
-    "    .syntax divided;"
-    "    strb \\d, [\\base, \\rled];"                 // save d
-    "  .endm;"
-
-    // increment the led pointer (base+6 has what we're incrementing by)
-    "  .macro incleds3   leds, base, scratch;"
-    "    ldrb \\scratch, [\\base, #6];"               // load incremen
-    "    add \\leds, \\leds, \\scratch;"              // update leds pointer
-    "  .endm;"
-
-    // compare and loop
-    "  .macro cmploop5 counter,label;"
-    "    .syntax unified;"
-    "    subs \\counter, #1;"
-    "    .syntax divided;"
-    "    beq done_\\@;"
-    "    m0pad;"
-    "    b \\label;"
-    "    done_\\@:;"
-    "  .endm;"
-
-    " .endif;"
-  );
+        "  .set fl_delay_def, 1;"
+        "  .set fl_delay_mod, 4;"
+        "  .if fl_is_m0p == 1;"
+        "    .set fl_delay_mod, 3;"
+        "  .endif;"
+        "  .macro fl_delay dtime, reg=r0;"
+        "    .if (\\dtime > 0);"
+        "      .set dcycle, (\\dtime / fl_delay_mod);"
+        "      .set dwork, (dcycle * fl_delay_mod);"
+        "      .set drem, (\\dtime - dwork);"
+        "      .rept (drem);"
+        "        nop;"
+        "      .endr;"
+        "      .if dcycle > 0;"
+        "        mov \\reg, #dcycle;"
+        "        delayloop_\\@:;"
+        "        sub \\reg, #1;"
+        "        bne delayloop_\\@;"
+        "	     .if fl_is_m0p == 0;"
+        "          nop;"
+        "        .endif;"
+        "      .endif;"
+        "    .endif;"
+        "  .endm;"
+
+        "  .macro mod_delay dtime,b1,b2,reg;"
+        "    .set adj, (\\b1 + \\b2);"
+        "    .if adj < \\dtime;"
+        "      .set dtime2, (\\dtime - adj);"
+        "      fl_delay dtime2, \\reg;"
+        "    .endif;"
+        "  .endm;"
+
+        // check the bit and drop the line low if it isn't set
+        "  .macro qlo4 b,bitmask,port,loff	;"
+        "    lsl \\b, #1			;"
+        "    bcs skip_\\@			;"
+        "    str \\bitmask, [\\port, \\loff]	;"
+        "    skip_\\@:			;"
+        "    m0pad;"
+        "  .endm				;"
+
+        // set the pin hi or low (determined by the offset passed in )
+        "  .macro qset2 bitmask,port,loff;"
+        "    str \\bitmask, [\\port, \\loff];"
+        "    m0pad;"
+        "  .endm;"
+
+        // Load up the next led byte to work with, put it in bn
+        "  .macro loadleds3 leds, bn, rled, scratch;"
+        "    mov \\scratch, \\leds;"
+        "    ldrb \\bn, [\\scratch, \\rled];"
+        "  .endm;"
+
+        // check whether or not we should dither
+        "  .macro loaddither7 bn,d,base,rdither;"
+        "    ldrb \\d, [\\base, \\rdither];"
+        "    lsl \\d, #24;"  //; shift high for the qadd w/bn
+        "    lsl \\bn, #24;" //; shift high for the qadd w/d
+        "    bne chkskip_\\@;" //; if bn==0, clear d;"
+        "    eor \\d, \\d;" //; clear d;"
+        "    m0pad;"
+        "    chkskip_\\@:;"
+        "  .endm;"
+
+        // Do the qadd8 for dithering -- there's two versions of this.  The m0 version
+        // takes advantage of the 3 cycle branch to do two things after the branch,
+        // while keeping timing constant.  The m0+, however, branches in 2 cycles, so
+        // we have to work around that a bit more.  This is one of the few times
+        // where the m0 will actually be _more_ efficient than the m0+
+        "  .macro dither5 bn,d;"
+        "  .syntax unified;"
+        "    .if fl_is_m0p == 0;"
+        "      adds \\bn, \\d;"         // do the add
+        "      bcc dither5_1_\\@;"
+        "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
+        "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
+        "      dither5_1_\\@:;"
+        "      nop;"                    // nop to keep timing in line
+        "    .else;"
+        "      adds \\bn, \\d;"         // do the add"
+        "      bcc dither5_2_\\@;"
+        "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
+        "      dither5_2_\\@:;"
+        "      bcc dither5_3_\\@;"
+        "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
+        "      dither5_3_\\@:;"
+        "    .endif;"
+        "  .syntax divided;"
+        "  .endm;"
+
+        // Do our scaling
+        "  .macro scale4 bn, base, scale, scratch;"
+        "    ldr \\scratch, [\\base, \\scale];"
+        "    lsr \\bn, \\bn, #24;"                  // bring bn back down to its low 8 bits
+        "    mul \\bn, \\scratch;"                  // do the multiply
+        "  .endm;"
+
+        // swap bn into b
+        "  .macro swapbbn1 b,bn;"
+        "    lsl \\b, \\bn, #16;"  // put the 8 bits we want for output high
+        "  .endm;"
+
+        // adjust the dithering value for the next time around (load e from memory
+        // to do the math)
+        "  .macro adjdither7 base,d,rled,eoffset,scratch;"
+        "    ldrb \\d, [\\base, \\rled];"
+        "    ldrb \\scratch,[\\base,\\eoffset];"          // load e
+        "    .syntax unified;"
+        "    subs \\d, \\scratch, \\d;"                   // d=e-d
+        "    .syntax divided;"
+        "    strb \\d, [\\base, \\rled];"                 // save d
+        "  .endm;"
+
+        // increment the led pointer (base+6 has what we're incrementing by)
+        "  .macro incleds3   leds, base, scratch;"
+        "    ldrb \\scratch, [\\base, #6];"               // load incremen
+        "    add \\leds, \\leds, \\scratch;"              // update leds pointer
+        "  .endm;"
+
+        // compare and loop
+        "  .macro cmploop5 counter,label;"
+        "    .syntax unified;"
+        "    subs \\counter, #1;"
+        "    .syntax divided;"
+        "    beq done_\\@;"
+        "    m0pad;"
+        "    b \\label;"
+        "    done_\\@:;"
+        "  .endm;"
+
+        " .endif;"
+    );
 
 #define M0_ASM_ARGS     :             \
       [leds] "+h" (leds),             \
@@ -198,9 +198,9 @@ showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, u
       [e0] "I" (3+RO(0)),             \
       [e1] "I" (3+RO(1)),             \
       [e2] "I" (3+RO(2)),             \
-      [scale0] "I" (4*(2+RO(0))),         \
-      [scale1] "I" (4*(2+RO(1))),         \
-      [scale2] "I" (4*(2+RO(2))),         \
+      [scale0] "I" (4*(2+RO(0))),     \
+      [scale1] "I" (4*(2+RO(1))),     \
+      [scale2] "I" (4*(2+RO(2))),     \
       [T1] "I" (T1),                  \
       [T2] "I" (T2),                  \
       [T3] "I" (T3)                   \
@@ -230,157 +230,157 @@ showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, u
     // track the loop outside the asm code, to allow inserting the interrupt
     // overrun checks.
     asm __volatile__ (
-      // pre-load byte 0
-      LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-      M0_ASM_ARGS);
+        // pre-load byte 0
+        LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+        M0_ASM_ARGS);
 
     do {
-      asm __volatile__ (
-      // Write out byte 0, prepping byte 1
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-      // Write out byte 1, prepping byte 2
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-      // Write out byte 2, prepping byte 0
-      HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
-
-      M0_ASM_ARGS
-      );
-      SEI_CHK; INNER_SEI; --counter; CLI_CHK;
+        asm __volatile__ (
+            // Write out byte 0, prepping byte 1
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+            // Write out byte 1, prepping byte 2
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+            // Write out byte 2, prepping byte 0
+            HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
+
+            M0_ASM_ARGS
+        );
+        SEI_CHK; INNER_SEI; --counter; CLI_CHK;
     } while(counter);
 #elif (FASTLED_ALLOW_INTERRUPTS == 1)
     // We're allowing interrupts - track the loop outside the asm code, and
     // re-enable interrupts in between each iteration.
     asm __volatile__ (
-      // pre-load byte 0
-      LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-      M0_ASM_ARGS);
+        // pre-load byte 0
+        LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+        M0_ASM_ARGS);
 
     do {
-      asm __volatile__ (
-      // Write out byte 0, prepping byte 1
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-      // Write out byte 1, prepping byte 2
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-      // Write out byte 2, prepping byte 0
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
-
-      M0_ASM_ARGS
-      );
-
-      uint32_t ticksBeforeInterrupts = SysTick->VAL;
-      sei();
-      --counter;
-      cli();
-
-      // If more than 45 uSecs have elapsed, give up on this frame and start over.
-      // Note: this isn't completely correct. It's possible that more than one
-      // millisecond will elapse, and so SysTick->VAL will lap
-      // ticksBeforeInterrupts.
-      // Note: ticksBeforeInterrupts DECREASES
-      const uint32_t kTicksPerMs = VARIANT_MCK / 1000;
-      const uint32_t kTicksPerUs = kTicksPerMs / 1000;
-      const uint32_t kTicksIn45us = kTicksPerUs * 45;
-
-      const uint32_t currentTicks = SysTick->VAL;
-
-      if (ticksBeforeInterrupts < currentTicks) {
-        // Timer started over
-        if ((ticksBeforeInterrupts + (kTicksPerMs - currentTicks)) > kTicksIn45us) {
-          return 0;
-        }
-      } else {
-        if ((ticksBeforeInterrupts - currentTicks) > kTicksIn45us) {
-          return 0;
+        asm __volatile__ (
+            // Write out byte 0, prepping byte 1
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+            // Write out byte 1, prepping byte 2
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+            // Write out byte 2, prepping byte 0
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
+
+            M0_ASM_ARGS
+        );
+
+        uint32_t ticksBeforeInterrupts = SysTick->VAL;
+        sei();
+        --counter;
+        cli();
+
+        // If more than 45 uSecs have elapsed, give up on this frame and start over.
+        // Note: this isn't completely correct. It's possible that more than one
+        // millisecond will elapse, and so SysTick->VAL will lap
+        // ticksBeforeInterrupts.
+        // Note: ticksBeforeInterrupts DECREASES
+        const uint32_t kTicksPerMs = VARIANT_MCK / 1000;
+        const uint32_t kTicksPerUs = kTicksPerMs / 1000;
+        const uint32_t kTicksIn45us = kTicksPerUs * 45;
+
+        const uint32_t currentTicks = SysTick->VAL;
+
+        if (ticksBeforeInterrupts < currentTicks) {
+            // Timer started over
+            if ((ticksBeforeInterrupts + (kTicksPerMs - currentTicks)) > kTicksIn45us) {
+                return 0;
+            }
+        } else {
+            if ((ticksBeforeInterrupts - currentTicks) > kTicksIn45us) {
+                return 0;
+            }
         }
-      }
     } while(counter);
 #else
     // We're not allowing interrupts - run the entire loop in asm to keep things
     // as tight as possible.  In an ideal world, we should be pushing out ws281x
     // leds (or other 3-wire leds) with zero gaps between pixels.
     asm __volatile__ (
-      // pre-load byte 0
-    LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-
-    // loop over writing out the data
-    LOOP
-      // Write out byte 0, prepping byte 1
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-      // Write out byte 1, prepping byte 2
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-      // Write out byte 2, prepping byte 0
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
-      HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-      HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
-      HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
-      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5) CMPLOOP5
-
-      M0_ASM_ARGS
+        // pre-load byte 0
+        LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+
+        // loop over writing out the data
+        LOOP
+            // Write out byte 0, prepping byte 1
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+            // Write out byte 1, prepping byte 2
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+            // Write out byte 2, prepping byte 0
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
+            HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+            HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
+            HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
+            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5) CMPLOOP5
+
+            M0_ASM_ARGS
     );
 #endif
     return num_leds;
diff --git a/platforms/arm/d21/clockless_arm_d21.h b/platforms/arm/d21/clockless_arm_d21.h
index 366a6bdeb4..16526ed674 100644
--- a/platforms/arm/d21/clockless_arm_d21.h
+++ b/platforms/arm/d21/clockless_arm_d21.h
@@ -7,51 +7,51 @@ FASTLED_NAMESPACE_BEGIN
 
 template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
-  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
-  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+    typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
 
-  data_t mPinMask;
-  data_ptr_t mPort;
-  CMinWait<WAIT_TIME> mWait;
 public:
-  virtual void init() {
-    FastPinBB<DATA_PIN>::setOutput();
-    mPinMask = FastPinBB<DATA_PIN>::mask();
-    mPort = FastPinBB<DATA_PIN>::port();
-  }
-
-	virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
-    cli();
-    if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
+    virtual void init() {
+        FastPinBB<DATA_PIN>::setOutput();
+        mPinMask = FastPinBB<DATA_PIN>::mask();
+        mPort = FastPinBB<DATA_PIN>::port();
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+        mWait.wait();
+        cli();
+        if(!showRGBInternal(pixels)) {
+            sei(); delayMicroseconds(WAIT_TIME); cli();
+            showRGBInternal(pixels);
+        }
+        sei();
+        mWait.mark();
     }
-    sei();
-    mWait.mark();
-  }
-
-  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-  // gcc will use register Y for the this pointer.
-  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-    struct M0ClocklessData data;
-    data.d[0] = pixels.d[0];
-    data.d[1] = pixels.d[1];
-    data.d[2] = pixels.d[2];
-    data.s[0] = pixels.mScale[0];
-    data.s[1] = pixels.mScale[1];
-    data.s[2] = pixels.mScale[2];
-    data.e[0] = pixels.e[0];
-    data.e[1] = pixels.e[1];
-    data.e[2] = pixels.e[2];
-    data.adj = pixels.mAdvance;
-
-    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
-    return showLedData<8,4,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
-  }
 
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+        struct M0ClocklessData data;
+        data.d[0] = pixels.d[0];
+        data.d[1] = pixels.d[1];
+        data.d[2] = pixels.d[2];
+        data.s[0] = pixels.mScale[0];
+        data.s[1] = pixels.mScale[1];
+        data.s[2] = pixels.mScale[2];
+        data.e[0] = pixels.e[0];
+        data.e[1] = pixels.e[1];
+        data.e[2] = pixels.e[2];
+        data.adj = pixels.mAdvance;
+
+        typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
+        return showLedData<8,4,T1,T2,T3,RGB_ORDER, WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+    }
 
 };
 
diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index cbfe5dbb2f..9f9ef869b4 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -17,41 +17,41 @@ FASTLED_NAMESPACE_BEGIN
 
 template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, int _GRP> class _ARMPIN {
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
-
-  #if 0
-  inline static void setOutput() {
-    if(_BIT<8) {
-      _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
-    } else {
-      _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
+
+    #if 0
+    inline static void setOutput() {
+        if(_BIT<8) {
+            _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
+        } else {
+            _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+        }
     }
-  }
-  inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
-  #endif
+    inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+    #endif
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
-  inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+    inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+    inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
 
-  inline static void hi() __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUTSET.reg = _MASK; }
-  inline static void lo() __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUTCLR.reg = _MASK; }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUT.reg = val; }
+    inline static void hi() __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUTSET.reg = _MASK; }
+    inline static void lo() __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUTCLR.reg = _MASK; }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUT.reg = val; }
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUTTGL.reg = _MASK; }
+    inline static void toggle() __attribute__ ((always_inline)) { PORT_IOBUS->Group[_GRP].OUTTGL.reg = _MASK; }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return PORT_IOBUS->Group[_GRP].OUT.reg | _MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return PORT_IOBUS->Group[_GRP].OUT.reg & ~_MASK; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &PORT_IOBUS->Group[_GRP].OUT.reg; }
-  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &PORT_IOBUS->Group[_GRP].OUTSET.reg; }
-  inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &PORT_IOBUS->Group[_GRP].OUTCLR.reg; }
-  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+    inline static port_t hival() __attribute__ ((always_inline)) { return PORT_IOBUS->Group[_GRP].OUT.reg | _MASK; }
+    inline static port_t loval() __attribute__ ((always_inline)) { return PORT_IOBUS->Group[_GRP].OUT.reg & ~_MASK; }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { return &PORT_IOBUS->Group[_GRP].OUT.reg; }
+    inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &PORT_IOBUS->Group[_GRP].OUTSET.reg; }
+    inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &PORT_IOBUS->Group[_GRP].OUTCLR.reg; }
+    inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
 };
 
 #define _R(T) struct __gen_struct_ ## T
diff --git a/platforms/arm/d51/clockless_arm_d51.h b/platforms/arm/d51/clockless_arm_d51.h
index 0c3f6d4dac..a5c7f68023 100644
--- a/platforms/arm/d51/clockless_arm_d51.h
+++ b/platforms/arm/d51/clockless_arm_d51.h
@@ -22,6 +22,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
@@ -32,15 +33,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
+    	mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
-    }
-    mWait.mark();
-  }
+			sei(); delayMicroseconds(WAIT_TIME); cli();
+			showRGBInternal(pixels);
+		}
+		mWait.mark();
+	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
 		for(register uint32_t i = BITS-1; i > 0; i--) {
diff --git a/platforms/arm/d51/fastpin_arm_d51.h b/platforms/arm/d51/fastpin_arm_d51.h
index dd40dbfd02..9d31cedb4b 100644
--- a/platforms/arm/d51/fastpin_arm_d51.h
+++ b/platforms/arm/d51/fastpin_arm_d51.h
@@ -17,41 +17,41 @@ FASTLED_NAMESPACE_BEGIN
 
 template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, int _GRP> class _ARMPIN {
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
-
-  #if 0
-  inline static void setOutput() {
-    if(_BIT<8) {
-      _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
-    } else {
-      _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
+
+    #if 0
+    inline static void setOutput() {
+        if(_BIT<8) {
+            _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
+        } else {
+            _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+        }
     }
-  }
-  inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
-  #endif
+    inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+    #endif
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
-  inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+    inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+    inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
 
-  inline static void hi() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTSET.reg = _MASK; }
-  inline static void lo() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTCLR.reg = _MASK; }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { PORT->Group[_GRP].OUT.reg = val; }
+    inline static void hi() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTSET.reg = _MASK; }
+    inline static void lo() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTCLR.reg = _MASK; }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { PORT->Group[_GRP].OUT.reg = val; }
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTTGL.reg = _MASK; }
+    inline static void toggle() __attribute__ ((always_inline)) { PORT->Group[_GRP].OUTTGL.reg = _MASK; }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return PORT->Group[_GRP].OUT.reg | _MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return PORT->Group[_GRP].OUT.reg & ~_MASK; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUT.reg; }
-  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUTSET.reg; }
-  inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUTCLR.reg; }
-  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+    inline static port_t hival() __attribute__ ((always_inline)) { return PORT->Group[_GRP].OUT.reg | _MASK; }
+    inline static port_t loval() __attribute__ ((always_inline)) { return PORT->Group[_GRP].OUT.reg & ~_MASK; }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUT.reg; }
+    inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUTSET.reg; }
+    inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &PORT->Group[_GRP].OUTCLR.reg; }
+    inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
 };
 
 #define _R(T) struct __gen_struct_ ## T
@@ -130,7 +130,6 @@ _FL_DEFPIN(23, 22, 1); _FL_DEFPIN(24,  23, 1); _FL_DEFPIN(25,  17, 0);
 #endif
 
 
-
 #endif // FASTLED_FORCE_SOFTWARE_PINS
 
 FASTLED_NAMESPACE_END
diff --git a/platforms/arm/k20/clockless_arm_k20.h b/platforms/arm/k20/clockless_arm_k20.h
index bc2090b3ad..87e8634573 100644
--- a/platforms/arm/k20/clockless_arm_k20.h
+++ b/platforms/arm/k20/clockless_arm_k20.h
@@ -17,6 +17,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
@@ -27,15 +28,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
+		mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
-    }
-    mWait.mark();
-  }
+			sei(); delayMicroseconds(WAIT_TIME); cli();
+			showRGBInternal(pixels);
+		}
+		mWait.mark();
+	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
 		for(register uint32_t i = BITS-1; i > 0; i--) {
diff --git a/platforms/arm/k20/clockless_block_arm_k20.h b/platforms/arm/k20/clockless_block_arm_k20.h
index 66c6191c77..c0d838d278 100644
--- a/platforms/arm/k20/clockless_block_arm_k20.h
+++ b/platforms/arm/k20/clockless_block_arm_k20.h
@@ -27,6 +27,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual int size() { return CLEDController::size() * LANES; }
 
@@ -197,6 +198,7 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		static_assert(LANES <= 16, "Maximum of 16 lanes for Teensy parallel controllers!");
diff --git a/platforms/arm/k20/fastspi_arm_k20.h b/platforms/arm/k20/fastspi_arm_k20.h
index 0512324368..cbb72a9f6b 100644
--- a/platforms/arm/k20/fastspi_arm_k20.h
+++ b/platforms/arm/k20/fastspi_arm_k20.h
@@ -29,6 +29,7 @@ template<int VAL, int BIT> class BitWork {
 public:
 	static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); }
 };
+
 template<int VAL> class BitWork<VAL, 0> {
 public:
 	static int highestBit() __attribute__((always_inline)) { return 0; }
@@ -246,7 +247,6 @@ class ARMHardwareSPIOutput {
 	ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
 	void setSelect(Selectable *pSelect) { m_pSelect = pSelect; }
 
-
 	void init() {
 		// set the pins to output
 		FastPin<_DATA_PIN>::setOutput();
@@ -316,8 +316,8 @@ class ARMHardwareSPIOutput {
 			if(WAIT_STATE == PRE) { wait(); }
 			cli();
 			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
-			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
-			SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
+						 ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+						 SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
 			SPIX.SR |= SPI_SR_TCF;
 			sei();
 			if(WAIT_STATE == POST) { wait(); }
@@ -327,8 +327,8 @@ class ARMHardwareSPIOutput {
 			if(WAIT_STATE == PRE) { wait(); }
 			cli();
 			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
-			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
-			SPI_PUSHR_CTAS(0) | (b & 0xFF);
+						 ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+						 SPI_PUSHR_CTAS(0) | (b & 0xFF);
 			SPIX.SR |= SPI_SR_TCF;
 			sei();
 			if(WAIT_STATE == POST) { wait(); }
diff --git a/platforms/arm/k20/octows2811_controller.h b/platforms/arm/k20/octows2811_controller.h
index 84c28667d1..749e18e1a6 100644
--- a/platforms/arm/k20/octows2811_controller.h
+++ b/platforms/arm/k20/octows2811_controller.h
@@ -9,54 +9,54 @@ FASTLED_NAMESPACE_BEGIN
 
 template<EOrder RGB_ORDER = GRB, uint8_t CHIP = WS2811_800kHz>
 class COctoWS2811Controller : public CPixelLEDController<RGB_ORDER, 8, 0xFF> {
-  OctoWS2811  *pocto;
-  uint8_t *drawbuffer,*framebuffer;
+    OctoWS2811  *pocto;
+    uint8_t *drawbuffer,*framebuffer;
 
-  void _init(int nLeds) {
-    if(pocto == NULL) {
-      drawbuffer = (uint8_t*)malloc(nLeds * 8 * 3);
-      framebuffer = (uint8_t*)malloc(nLeds * 8 * 3);
+    void _init(int nLeds) {
+        if(pocto == NULL) {
+            drawbuffer = (uint8_t*)malloc(nLeds * 8 * 3);
+            framebuffer = (uint8_t*)malloc(nLeds * 8 * 3);
 
-      // byte ordering is handled in show by the pixel controller
-      int config = WS2811_RGB;
-      config |= CHIP;
+            // byte ordering is handled in show by the pixel controller
+            int config = WS2811_RGB;
+            config |= CHIP;
 
-      pocto = new OctoWS2811(nLeds, framebuffer, drawbuffer, config);
+            pocto = new OctoWS2811(nLeds, framebuffer, drawbuffer, config);
 
-      pocto->begin();
+            pocto->begin();
+        }
     }
-  }
+
 public:
-  COctoWS2811Controller() { pocto = NULL; }
-  virtual int size() { return CLEDController::size() * 8; }
-
-  virtual void init() { /* do nothing yet */ }
-
-  typedef union {
-    uint8_t bytes[8];
-    uint32_t raw[2];
-  } Lines;
-
-  virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
-    _init(pixels.size());
-
-    uint8_t *pData = drawbuffer;
-    while(pixels.has(1)) {
-      Lines b;
-
-      for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale0(i); }
-      transpose8x1_MSB(b.bytes,pData); pData += 8;
-      for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale1(i); }
-      transpose8x1_MSB(b.bytes,pData); pData += 8;
-      for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale2(i); }
-      transpose8x1_MSB(b.bytes,pData); pData += 8;
-      pixels.stepDithering();
-      pixels.advanceData();
+    COctoWS2811Controller() { pocto = NULL; }
+    virtual int size() { return CLEDController::size() * 8; }
+
+    virtual void init() { /* do nothing yet */ }
+
+    typedef union {
+        uint8_t bytes[8];
+        uint32_t raw[2];
+    } Lines;
+
+    virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
+        _init(pixels.size());
+
+        uint8_t *pData = drawbuffer;
+        while(pixels.has(1)) {
+            Lines b;
+
+            for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale0(i); }
+            transpose8x1_MSB(b.bytes,pData); pData += 8;
+            for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale1(i); }
+            transpose8x1_MSB(b.bytes,pData); pData += 8;
+            for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale2(i); }
+            transpose8x1_MSB(b.bytes,pData); pData += 8;
+            pixels.stepDithering();
+            pixels.advanceData();
+        }
+
+        pocto->show();
     }
-
-    pocto->show();
-  }
-
 };
 
 FASTLED_NAMESPACE_END
diff --git a/platforms/arm/k20/smartmatrix_t3.h b/platforms/arm/k20/smartmatrix_t3.h
index 95af46cf0a..c9747f0b41 100644
--- a/platforms/arm/k20/smartmatrix_t3.h
+++ b/platforms/arm/k20/smartmatrix_t3.h
@@ -10,42 +10,41 @@ extern SmartMatrix *pSmartMatrix;
 
 // note - dmx simple must be included before FastSPI for this code to be enabled
 class CSmartMatrixController : public CPixelLEDController<RGB_ORDER> {
-  SmartMatrix matrix;
+    SmartMatrix matrix;
 
 public:
-  // initialize the LED controller
-  virtual void init() {
-      // Initialize 32x32 LED Matrix
-    matrix.begin();
-    matrix.setBrightness(255);
-    matrix.setColorCorrection(ccNone);
-
-    // Clear screen
-    clearLeds(0);
-    matrix.swapBuffers();
-    pSmartMatrix = &matrix;
-  }
-
-  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    if(SMART_MATRIX_CAN_TRIPLE_BUFFER) {
-      rgb24 *md = matrix.getRealBackBuffer();
-    } else {
-      rgb24 *md = matrix.backBuffer();
+    // initialize the LED controller
+    virtual void init() {
+        // Initialize 32x32 LED Matrix
+        matrix.begin();
+        matrix.setBrightness(255);
+        matrix.setColorCorrection(ccNone);
+
+        // Clear screen
+        clearLeds(0);
+        matrix.swapBuffers();
+        pSmartMatrix = &matrix;
     }
-    while(pixels.has(1)) {
-      md->red = pixels.loadAndScale0();
-      md->green = pixels.loadAndScale1();
-      md->blue = pixels.loadAndScale2();
-      md++;
-      pixels.advanceData();
-      pixels.stepDithering();
-    }
-    matrix.swapBuffers();
-    if(SMART_MATRIX_CAN_TRIPLE_BUFFER && pixels.advanceBy() > 0) {
-      matrix.setBackBuffer(pixels.mData);
-    }
-  }
 
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+        if(SMART_MATRIX_CAN_TRIPLE_BUFFER) {
+            rgb24 *md = matrix.getRealBackBuffer();
+        } else {
+            rgb24 *md = matrix.backBuffer();
+        }
+        while(pixels.has(1)) {
+            md->red = pixels.loadAndScale0();
+            md->green = pixels.loadAndScale1();
+            md->blue = pixels.loadAndScale2();
+            md++;
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+        matrix.swapBuffers();
+        if(SMART_MATRIX_CAN_TRIPLE_BUFFER && pixels.advanceBy() > 0) {
+            matrix.setBackBuffer(pixels.mData);
+        }
+    }
 };
 
 FASTLED_NAMESPACE_END
diff --git a/platforms/arm/k20/ws2812serial_controller.h b/platforms/arm/k20/ws2812serial_controller.h
index 0bca7d5eab..a761dd49ee 100644
--- a/platforms/arm/k20/ws2812serial_controller.h
+++ b/platforms/arm/k20/ws2812serial_controller.h
@@ -7,36 +7,37 @@ FASTLED_NAMESPACE_BEGIN
 
 template<int DATA_PIN, EOrder RGB_ORDER>
 class CWS2812SerialController : public CPixelLEDController<RGB_ORDER, 8, 0xFF> {
-  WS2812Serial *pserial;
-  uint8_t *drawbuffer,*framebuffer;
-
-  void _init(int nLeds) {
-    if (pserial == NULL) {
-      drawbuffer = (uint8_t*)malloc(nLeds * 3);
-      framebuffer = (uint8_t*)malloc(nLeds * 12);
-      pserial = new WS2812Serial(nLeds, framebuffer, drawbuffer, DATA_PIN, WS2812_RGB);
-      pserial->begin();
+    WS2812Serial *pserial;
+    uint8_t *drawbuffer,*framebuffer;
+
+    void _init(int nLeds) {
+        if (pserial == NULL) {
+            drawbuffer = (uint8_t*)malloc(nLeds * 3);
+            framebuffer = (uint8_t*)malloc(nLeds * 12);
+            pserial = new WS2812Serial(nLeds, framebuffer, drawbuffer, DATA_PIN, WS2812_RGB);
+            pserial->begin();
+        }
     }
-  }
+
 public:
-  CWS2812SerialController() { pserial = NULL; }
+    CWS2812SerialController() { pserial = NULL; }
 
-  virtual void init() { /* do nothing yet */ }
+    virtual void init() { /* do nothing yet */ }
 
-  virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
-    _init(pixels.size());
+    virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
+        _init(pixels.size());
 
-    uint8_t *p = drawbuffer;
+        uint8_t *p = drawbuffer;
 
-    while(pixels.has(1)) {
-      *p++ = pixels.loadAndScale0();
-      *p++ = pixels.loadAndScale1();
-      *p++ = pixels.loadAndScale2();
-      pixels.stepDithering();
-      pixels.advanceData();
+        while(pixels.has(1)) {
+            *p++ = pixels.loadAndScale0();
+            *p++ = pixels.loadAndScale1();
+            *p++ = pixels.loadAndScale2();
+            pixels.stepDithering();
+            pixels.advanceData();
+        }
+        pserial->show();
     }
-    pserial->show();
-  }
 
 };
 
diff --git a/platforms/arm/k66/clockless_arm_k66.h b/platforms/arm/k66/clockless_arm_k66.h
index 6102105754..ec4241f701 100644
--- a/platforms/arm/k66/clockless_arm_k66.h
+++ b/platforms/arm/k66/clockless_arm_k66.h
@@ -17,6 +17,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
@@ -27,15 +28,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
+		mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
-    }
-    mWait.mark();
-  }
+			sei(); delayMicroseconds(WAIT_TIME); cli();
+			showRGBInternal(pixels);
+		}
+		mWait.mark();
+	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
 		for(register uint32_t i = BITS-1; i > 0; i--) {
diff --git a/platforms/arm/k66/clockless_block_arm_k66.h b/platforms/arm/k66/clockless_block_arm_k66.h
index 85a8cc719d..c7eb99251f 100644
--- a/platforms/arm/k66/clockless_block_arm_k66.h
+++ b/platforms/arm/k66/clockless_block_arm_k66.h
@@ -30,6 +30,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual int size() { return CLEDController::size() * LANES; }
 
@@ -153,7 +154,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
 	// gcc will use register Y for the this pointer.
-		static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, LANE_MASK> &allpixels) {
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, LANE_MASK> &allpixels) {
 		// Get access to the clock
 		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
 		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
@@ -211,6 +212,7 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		static_assert(LANES <= 16, "Maximum of 16 lanes for Teensy parallel controllers!");
@@ -242,11 +244,11 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 	virtual void showPixels(PixelController<RGB_ORDER, LANES, PMASK> & pixels) { 
 		mWait.wait();
 		uint32_t clocks = showRGBInternal(pixels);
-		#if FASTLED_ALLOW_INTERRUPTS == 0
+	#if FASTLED_ALLOW_INTERRUPTS == 0
 		// Adjust the timer
 		long microsTaken = CLKS_TO_MICROS(clocks);
 		MS_COUNTER += (1 + (microsTaken / 1000));
-		#endif
+	#endif
 
 		mWait.mark();
 	}
@@ -286,10 +288,9 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 	}
 
 
-
 	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
 	// gcc will use register Y for the this pointer.
-		static uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, PMASK> &allpixels) {
+	static uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, PMASK> &allpixels) {
 		// Get access to the clock
 		ARM_DEMCR    |= ARM_DEMCR_TRCENA;
 		ARM_DWT_CTRL |= ARM_DWT_CTRL_CYCCNTENA;
@@ -309,13 +310,15 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 
 		while(allpixels.has(1)) {
 			allpixels.stepDithering();
-			#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+		#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
 			cli();
 			// if interrupts took longer than 45µs, punt on the current frame
 			if(ARM_DWT_CYCCNT > next_mark) {
-				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return ARM_DWT_CYCCNT; }
+				if((ARM_DWT_CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) {
+					sei();
+					return ARM_DWT_CYCCNT; }
 			}
-			#endif
+		#endif
 
 			// Write first byte, read next byte
 			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
@@ -327,9 +330,9 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 			// Write third byte
 			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
 
-			#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
+		#if 0 && (FASTLED_ALLOW_INTERRUPTS == 1)
 			sei();
-			#endif
+		#endif
 		};
 		sei();
 
diff --git a/platforms/arm/k66/fastspi_arm_k66.h b/platforms/arm/k66/fastspi_arm_k66.h
index a40e598522..e0683fa2ee 100644
--- a/platforms/arm/k66/fastspi_arm_k66.h
+++ b/platforms/arm/k66/fastspi_arm_k66.h
@@ -37,6 +37,7 @@ template<int VAL, int BIT> class BitWork {
 public:
 	static int highestBit() __attribute__((always_inline)) { return (VAL & 1 << BIT) ? BIT : BitWork<VAL, BIT-1>::highestBit(); }
 };
+
 template<int VAL> class BitWork<VAL, 0> {
 public:
 	static int highestBit() __attribute__((always_inline)) { return 0; }
@@ -248,7 +249,6 @@ class ARMHardwareSPIOutput {
 		// CORE_PIN14_CONFIG = gState.pins[3];
 	}
 
-
 public:
 	ARMHardwareSPIOutput() { m_pSelect = NULL; }
 	ARMHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
@@ -323,8 +323,8 @@ class ARMHardwareSPIOutput {
 		static void writeWord(uint16_t w) __attribute__((always_inline)) {
 			if(WAIT_STATE == PRE) { wait(); }
 			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
-			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
-			SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
+						 ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+						 SPI_PUSHR_CTAS(1) | (w & 0xFFFF);
 			SPIX.SR |= SPI_SR_TCF;
 			if(WAIT_STATE == POST) { wait(); }
 		}
@@ -332,8 +332,8 @@ class ARMHardwareSPIOutput {
 		static void writeByte(uint8_t b) __attribute__((always_inline)) {
 			if(WAIT_STATE == PRE) { wait(); }
 			SPIX.PUSHR = ((LAST_STATE == LAST) ? SPI_PUSHR_EOQ : 0) |
-			((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
-			SPI_PUSHR_CTAS(0) | (b & 0xFF);
+						 ((CONT_STATE == CONT) ? SPI_PUSHR_CONT : 0) |
+						 SPI_PUSHR_CTAS(0) | (b & 0xFF);
 			SPIX.SR |= SPI_SR_TCF;
 			if(WAIT_STATE == POST) { wait(); }
 		}
diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index 73f73de876..5c878c728b 100644
--- a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -10,197 +10,193 @@ FASTLED_NAMESPACE_BEGIN
 #define __FL_T4_MASK ((1<<(LANES))-1)
 template <uint8_t LANES, int FIRST_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = GRB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LANES, __FL_T4_MASK> {
+    uint8_t m_bitOffsets[16];
+    uint8_t m_nActualLanes;
+    uint8_t m_nLowBit;
+    uint8_t m_nHighBit;
+    uint32_t m_nWriteMask;
+    uint8_t m_nOutBlocks;
+    uint32_t m_offsets[3];
+    CMinWait<WAIT_TIME> mWait;
 
-  uint8_t m_bitOffsets[16];
-  uint8_t m_nActualLanes;
-  uint8_t m_nLowBit;
-  uint8_t m_nHighBit;
-  uint32_t m_nWriteMask;
-  uint8_t m_nOutBlocks;
-  uint32_t m_offsets[3];
-  CMinWait<WAIT_TIME> mWait;
 public:
-
-  virtual int size() { return CLEDController::size() * m_nActualLanes; }
-
-// For each pin, if we've hit our lane count, break, otherwise set the pin to output,
-// store the bit offset in our offset array, add this pin to the write mask, and if this
-// pin ends a block sequence, then break out of the switch as well
-#define _BLOCK_PIN(P) case P: {                           \
-  if(m_nActualLanes == LANES) break;                      \
-  FastPin<P>::setOutput();                                \
-  m_bitOffsets[m_nActualLanes++] = FastPin<P>::pinbit();  \
-  m_nWriteMask |= FastPin<P>::mask();                     \
-  if( P == 27 || P == 7 || P == 30) break;                \
-}
-
-  virtual void init() {
-    // pre-initialize
-    memset(m_bitOffsets,0,16);
-    m_nActualLanes = 0;
-    m_nLowBit = 33;
-    m_nHighBit = 0;
-    m_nWriteMask = 0;
-
-    // setup the bits and data tracking for parallel output
-    switch(FIRST_PIN) {
-      // GPIO6 block output
-      _BLOCK_PIN( 1);
-			_BLOCK_PIN( 0);
-			_BLOCK_PIN(24);
-			_BLOCK_PIN(25);
-			_BLOCK_PIN(19);
-			_BLOCK_PIN(18);
-			_BLOCK_PIN(14);
-			_BLOCK_PIN(15);
-			_BLOCK_PIN(17);
-			_BLOCK_PIN(16);
-			_BLOCK_PIN(22);
-			_BLOCK_PIN(23);
-			_BLOCK_PIN(20);
-			_BLOCK_PIN(21);
-			_BLOCK_PIN(26);
-			_BLOCK_PIN(27);
-      // GPIO7 block output
-			_BLOCK_PIN(10);
-			_BLOCK_PIN(12);
-			_BLOCK_PIN(11);
-			_BLOCK_PIN(13);
-			_BLOCK_PIN( 6);
-			_BLOCK_PIN( 9);
-			_BLOCK_PIN(32);
-			_BLOCK_PIN( 8);
-			_BLOCK_PIN( 7);
-      // GPIO 37 block output
-			_BLOCK_PIN(37);
-			_BLOCK_PIN(36);
-			_BLOCK_PIN(35);
-			_BLOCK_PIN(34);
-			_BLOCK_PIN(39);
-			_BLOCK_PIN(38);
-			_BLOCK_PIN(28);
-			_BLOCK_PIN(31);
-			_BLOCK_PIN(30);
+    virtual int size() { return CLEDController::size() * m_nActualLanes; }
+
+    // For each pin, if we've hit our lane count, break, otherwise set the pin to output,
+    // store the bit offset in our offset array, add this pin to the write mask, and if this
+    // pin ends a block sequence, then break out of the switch as well
+    #define _BLOCK_PIN(P) case P: {                             \
+        if(m_nActualLanes == LANES) break;                      \
+        FastPin<P>::setOutput();                                \
+        m_bitOffsets[m_nActualLanes++] = FastPin<P>::pinbit();  \
+        m_nWriteMask |= FastPin<P>::mask();                     \
+        if( P == 27 || P == 7 || P == 30) break;                \
     }
 
-    for(int i = 0; i < m_nActualLanes; i++) {
-      if(m_bitOffsets[i] < m_nLowBit) { m_nLowBit = m_bitOffsets[i]; }
-      if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; }
-    }
+    virtual void init() {
+        // pre-initialize
+        memset(m_bitOffsets,0,16);
+        m_nActualLanes = 0;
+        m_nLowBit = 33;
+        m_nHighBit = 0;
+        m_nWriteMask = 0;
+
+        // setup the bits and data tracking for parallel output
+        switch(FIRST_PIN) {
+            // GPIO6 block output
+            _BLOCK_PIN( 1);
+            _BLOCK_PIN( 0);
+            _BLOCK_PIN(24);
+            _BLOCK_PIN(25);
+            _BLOCK_PIN(19);
+            _BLOCK_PIN(18);
+            _BLOCK_PIN(14);
+            _BLOCK_PIN(15);
+            _BLOCK_PIN(17);
+            _BLOCK_PIN(16);
+            _BLOCK_PIN(22);
+            _BLOCK_PIN(23);
+            _BLOCK_PIN(20);
+            _BLOCK_PIN(21);
+            _BLOCK_PIN(26);
+            _BLOCK_PIN(27);
+            // GPIO7 block output
+            _BLOCK_PIN(10);
+            _BLOCK_PIN(12);
+            _BLOCK_PIN(11);
+            _BLOCK_PIN(13);
+            _BLOCK_PIN( 6);
+            _BLOCK_PIN( 9);
+            _BLOCK_PIN(32);
+            _BLOCK_PIN( 8);
+            _BLOCK_PIN( 7);
+            // GPIO 37 block output
+            _BLOCK_PIN(37);
+            _BLOCK_PIN(36);
+            _BLOCK_PIN(35);
+            _BLOCK_PIN(34);
+            _BLOCK_PIN(39);
+            _BLOCK_PIN(38);
+            _BLOCK_PIN(28);
+            _BLOCK_PIN(31);
+            _BLOCK_PIN(30);
+        }
 
-    m_nOutBlocks = (m_nHighBit + 8)/8;
+        for(int i = 0; i < m_nActualLanes; i++) {
+            if(m_bitOffsets[i] < m_nLowBit) { m_nLowBit = m_bitOffsets[i]; }
+            if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; }
+        }
+
+        m_nOutBlocks = (m_nHighBit + 8)/8;
 
-  }
+    }
 
-  virtual uint16_t getMaxRefreshRate() const { return 400; }
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
 
-  virtual void showPixels(PixelController<RGB_ORDER, LANES, __FL_T4_MASK> & pixels) {
-		mWait.wait();
+    virtual void showPixels(PixelController<RGB_ORDER, LANES, __FL_T4_MASK> & pixels) {
+        mWait.wait();
     #if FASTLED_ALLOW_INTERRUPTS == 0
-		uint32_t clocks = showRGBInternal(pixels);
-		// Adjust the timer
-		long microsTaken = CLKS_TO_MICROS(clocks);
-		MS_COUNTER += (1 + (microsTaken / 1000));
-		#else
-      showRGBInternal(pixels);
+        uint32_t clocks = showRGBInternal(pixels);
+        // Adjust the timer
+        long microsTaken = CLKS_TO_MICROS(clocks);
+        MS_COUNTER += (1 + (microsTaken / 1000));
+    #else
+        showRGBInternal(pixels);
     #endif
-
 		mWait.mark();
 	}
 
-  typedef union {
-    uint8_t bytes[32];
-    uint8_t bg[4][8];
-    uint16_t shorts[16];
-    uint32_t raw[8];
-  } _outlines;
-
-
-  template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
-    _outlines b2;
-    transpose8x1(b.bg[3], b2.bg[3]);
-    transpose8x1(b.bg[2], b2.bg[2]);
-    transpose8x1(b.bg[1], b2.bg[1]);
-    transpose8x1(b.bg[0], b2.bg[0]);
-
-    register uint8_t d = pixels.template getd<PX>(pixels);
-    register uint8_t scale = pixels.template getscale<PX>(pixels);
-
-    int x = 0;
-    for(uint32_t i = 8; i > 0;) {
-      i--;
-      while(ARM_DWT_CYCCNT < next_mark);
-      *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
-      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
-
-      uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
-
-      out = ((~out) & m_nWriteMask);
-      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
-      *FastPin<FIRST_PIN>::cport() = out;
-
-      out = m_nWriteMask;
-      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
-      *FastPin<FIRST_PIN>::cport() = out;
-
-      // Read and store up to two bytes
-      if (x < m_nActualLanes) {
-        b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
-        x++;
-        if (x < m_nActualLanes) {
-          b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
-          x++;
+    typedef union {
+        uint8_t bytes[32];
+        uint8_t bg[4][8];
+        uint16_t shorts[16];
+        uint32_t raw[8];
+    } _outlines;
+
+    template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
+        _outlines b2;
+        transpose8x1(b.bg[3], b2.bg[3]);
+        transpose8x1(b.bg[2], b2.bg[2]);
+        transpose8x1(b.bg[1], b2.bg[1]);
+        transpose8x1(b.bg[0], b2.bg[0]);
+
+        register uint8_t d = pixels.template getd<PX>(pixels);
+        register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+        int x = 0;
+        for(uint32_t i = 8; i > 0;) {
+            i--;
+            while(ARM_DWT_CYCCNT < next_mark);
+            *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
+            next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+            uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
+
+            out = ((~out) & m_nWriteMask);
+            while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
+            *FastPin<FIRST_PIN>::cport() = out;
+
+            out = m_nWriteMask;
+            while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
+            *FastPin<FIRST_PIN>::cport() = out;
+
+            // Read and store up to two bytes
+            if (x < m_nActualLanes) {
+                b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+                x++;
+                if (x < m_nActualLanes) {
+                    b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+                    x++;
+                }
+            }
         }
-      }
     }
-  }
 
-  uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, __FL_T4_MASK> &allpixels) {
-    allpixels.preStepFirstByteDithering();
-    _outlines b0;
-    uint32_t start = ARM_DWT_CYCCNT;
+    uint32_t showRGBInternal(PixelController<RGB_ORDER,LANES, __FL_T4_MASK> &allpixels) {
+        allpixels.preStepFirstByteDithering();
+        _outlines b0;
+        uint32_t start = ARM_DWT_CYCCNT;
 
-    for(int i = 0; i < m_nActualLanes; i++) {
-      b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
-    }
+        for(int i = 0; i < m_nActualLanes; i++) {
+            b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
+        }
 
-    cli();
-    m_offsets[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
-    m_offsets[1] = _FASTLED_NS_TO_DWT(T2+T3);
-    m_offsets[2] = _FASTLED_NS_TO_DWT(T3);
-    uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
-
-    uint32_t next_mark = ARM_DWT_CYCCNT + m_offsets[0];
-
-    while(allpixels.has(1)) {
-      allpixels.stepDithering();
-      #if (FASTLED_ALLOW_INTERRUPTS == 1)
-			cli();
-			// if interrupts took longer than 45µs, punt on the current frame
-			if(ARM_DWT_CYCCNT > next_mark) {
-				if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; }
-			}
-			#endif
-
-			// Write first byte, read next byte
-			writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
-
-			// Write second byte, read 3rd byte
-			writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
-			allpixels.advanceData();
-
-			// Write third byte
-			writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
-
-			#if (FASTLED_ALLOW_INTERRUPTS == 1)
-			sei();
-			#endif
-    }
+        cli();
+
+        m_offsets[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
+        m_offsets[1] = _FASTLED_NS_TO_DWT(T2+T3);
+        m_offsets[2] = _FASTLED_NS_TO_DWT(T3);
+        uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
+
+        uint32_t next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+        while(allpixels.has(1)) {
+            allpixels.stepDithering();
+        #if (FASTLED_ALLOW_INTERRUPTS == 1)
+            cli();
+            // if interrupts took longer than 45µs, punt on the current frame
+            if(ARM_DWT_CYCCNT > next_mark) {
+                if((ARM_DWT_CYCCNT-next_mark) > wait_off) { sei(); return ARM_DWT_CYCCNT - start; }
+            }
+        #endif
+            // Write first byte, read next byte
+            writeBits<8+XTRA0,1>(next_mark, b0, allpixels);
+
+            // Write second byte, read 3rd byte
+            writeBits<8+XTRA0,2>(next_mark, b0, allpixels);
+            allpixels.advanceData();
+
+            // Write third byte
+            writeBits<8+XTRA0,0>(next_mark, b0, allpixels);
+        #if (FASTLED_ALLOW_INTERRUPTS == 1)
+            sei();
+        #endif
+        }
 
-    sei();
+        sei();
 
-    return ARM_DWT_CYCCNT - start;
-  }
+        return ARM_DWT_CYCCNT - start;
+    }
 };
 
 template<template<uint8_t DATA_PIN, EOrder RGB_ORDER> class CHIPSET, uint8_t DATA_PIN, int NUM_LANES, EOrder RGB_ORDER=GRB>
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
index ed72713aef..dfb772aead 100644
--- a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -35,21 +35,20 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		FastPin<DATA_PIN>::setOutput();
 		mPinMask = FastPin<DATA_PIN>::mask();
 		mPort = FastPin<DATA_PIN>::port();
-    FastPin<DATA_PIN>::lo();
+    	FastPin<DATA_PIN>::lo();
 	}
 
-  virtual uint16_t getMaxRefreshRate() const { return 400; }
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
+    	mWait.wait();
 		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
-    }
-    mWait.mark();
-  }
+      		sei(); delayMicroseconds(WAIT_TIME); cli();
+      		showRGBInternal(pixels);
+    	}
+    	mWait.mark();
+  	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register uint32_t & b)  {
 		for(register uint32_t i = BITS-1; i > 0; i--) {
@@ -87,12 +86,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		register uint32_t b = pixels.loadAndScale0();
 
 		cli();
-    off[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
-    off[1] = _FASTLED_NS_TO_DWT(T2+T3);
+
+		off[0] = _FASTLED_NS_TO_DWT(T1+T2+T3);
+		off[1] = _FASTLED_NS_TO_DWT(T2+T3);
 		off[2] = _FASTLED_NS_TO_DWT(T3);
-    uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
 
-    uint32_t next_mark = ARM_DWT_CYCCNT + off[0];
+    	uint32_t wait_off = _FASTLED_NS_TO_DWT((WAIT_TIME-INTERRUPT_THRESHOLD));
+
+    	uint32_t next_mark = ARM_DWT_CYCCNT + off[0];
 
 		while(pixels.has(1)) {
 			pixels.stepDithering();
diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
index 38c8841023..8960a8c9c6 100644
--- a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
@@ -39,7 +39,7 @@ template<uint8_t PIN, uint32_t _BIT, uint32_t _MASK, typename _GPIO_DR, typename
 	inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO_DR_SET::r(); }
 	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO_DR_CLEAR::r(); }
 	inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
-  inline static uint32_t pinbit() __attribute__ ((always_inline)) { return _BIT; }
+	inline static uint32_t pinbit() __attribute__ ((always_inline)) { return _BIT; }
 };
 
 
diff --git a/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h b/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
index fa6b81ff4a..068c7be185 100644
--- a/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
@@ -9,18 +9,18 @@ FASTLED_NAMESPACE_BEGIN
 template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_RATE, SPIClass & _SPIObject, int _SPI_INDEX>
 class Teesy4HardwareSPIOutput {
 	Selectable *m_pSelect;
-  uint32_t  m_bitCount;
-  uint32_t m_bitData;
-  inline IMXRT_LPSPI_t & port() __attribute__((always_inline)) {
-    switch(_SPI_INDEX) {
-      case 0:
-        return IMXRT_LPSPI4_S;
-      case 1:
-        return IMXRT_LPSPI3_S;
-      case 2:
-        return IMXRT_LPSPI1_S;
-    }
-  }
+	uint32_t  m_bitCount;
+	uint32_t m_bitData;
+	inline IMXRT_LPSPI_t & port() __attribute__((always_inline)) {
+		switch(_SPI_INDEX) {
+			case 0:
+			return IMXRT_LPSPI4_S;
+			case 1:
+			return IMXRT_LPSPI3_S;
+			case 2:
+			return IMXRT_LPSPI1_S;
+		}
+	}
 
 public:
 	Teesy4HardwareSPIOutput() { m_pSelect = NULL; m_bitCount = 0;}
@@ -34,42 +34,42 @@ class Teesy4HardwareSPIOutput {
 
 	// latch the CS select
 	void inline select() __attribute__((always_inline)) {
-    // begin the SPI transaction
-    _SPIObject.beginTransaction(SPISettings(_SPI_CLOCK_RATE, MSBFIRST, SPI_MODE0));
-    if(m_pSelect != NULL) { m_pSelect->select(); }
-  }
+		// begin the SPI transaction
+		_SPIObject.beginTransaction(SPISettings(_SPI_CLOCK_RATE, MSBFIRST, SPI_MODE0));
+		if(m_pSelect != NULL) { m_pSelect->select(); }
+	}
 
 	// release the CS select
 	void inline release() __attribute__((always_inline)) {
-    if(m_pSelect != NULL) { m_pSelect->release(); }
-    _SPIObject.endTransaction();
-  }
+		if(m_pSelect != NULL) { m_pSelect->release(); }
+		_SPIObject.endTransaction();
+	}
 
 	// wait until all queued up data has been written
 	static void waitFully() { /* TODO */ }
 
 	// write a byte out via SPI (returns immediately on writing register) -
 	void inline writeByte(uint8_t b) __attribute__((always_inline)) {
-    if(m_bitCount == 0) {
-      _SPIObject.transfer(b);
-    } else {
-      // There's been a bit of data written, add that to the output as well
-      uint32_t outData = (m_bitData << 8) | b;
-      uint32_t tcr = port().TCR;
-      port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8+m_bitCount) - 1);  // turn on 9 bit mode
-      port().TDR = outData;		// output 9 bit data.
-      while ((port().RSR & LPSPI_RSR_RXEMPTY)) ;	// wait while the RSR fifo is empty...
+		if(m_bitCount == 0) {
+			_SPIObject.transfer(b);
+		} else {
+			// There's been a bit of data written, add that to the output as well
+			uint32_t outData = (m_bitData << 8) | b;
+			uint32_t tcr = port().TCR;
+			port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8+m_bitCount) - 1);  // turn on 9 bit mode
+			port().TDR = outData;		// output 9 bit data.
+			while ((port().RSR & LPSPI_RSR_RXEMPTY)) ;	// wait while the RSR fifo is empty...
 			port().TCR = (tcr & 0xfffff000) | LPSPI_TCR_FRAMESZ((8) - 1);  // turn back on 8 bit mode
-      port().RDR;
-      m_bitCount = 0;
-    }
-  }
+			port().RDR;
+			m_bitCount = 0;
+		}
+	}
 
 	// write a word out via SPI (returns immediately on writing register)
 	void inline writeWord(uint16_t w) __attribute__((always_inline)) {
-    writeByte(((w>>8) & 0xFF));
-    _SPIObject.transfer(w & 0xFF);
-  }
+		writeByte(((w>>8) & 0xFF));
+		_SPIObject.transfer(w & 0xFF);
+	}
 
 	// A raw set of writing byte values, assumes setup/init/waiting done elsewhere
 	static void writeBytesValueRaw(uint8_t value, int len) {
@@ -99,16 +99,16 @@ class Teesy4HardwareSPIOutput {
 
 	// write a single bit out, which bit from the passed in byte is determined by template parameter
 	template <uint8_t BIT> inline void writeBit(uint8_t b) {
-    m_bitData = (m_bitData<<1) | ((b&(1<<BIT)) != 0);
-    // If this is the 8th bit we've collected, just write it out raw
-    register uint32_t bc = m_bitCount;
-    bc = (bc + 1) & 0x07;
-    if (!bc) {
-      m_bitCount = 0;
-      _SPIObject.transfer(m_bitData);
-    }
-    m_bitCount = bc;
-  }
+		m_bitData = (m_bitData<<1) | ((b&(1<<BIT)) != 0);
+		// If this is the 8th bit we've collected, just write it out raw
+		register uint32_t bc = m_bitCount;
+		bc = (bc + 1) & 0x07;
+		if (!bc) {
+			m_bitCount = 0;
+			_SPIObject.transfer(m_bitData);
+		}
+		m_bitCount = bc;
+	}
 
 	// write a block of uint8_ts out in groups of three.  len is the total number of uint8_ts to write out.  The template
 	// parameters indicate how many uint8_ts to skip at the beginning and/or end of each grouping
diff --git a/platforms/arm/nrf51/clockless_arm_nrf51.h b/platforms/arm/nrf51/clockless_arm_nrf51.h
index b748957993..c607e61e5f 100644
--- a/platforms/arm/nrf51/clockless_arm_nrf51.h
+++ b/platforms/arm/nrf51/clockless_arm_nrf51.h
@@ -20,62 +20,63 @@
 #include "../common/m0clockless.h"
 template <uint8_t DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 75>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
-  typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
-  typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+    typedef typename FastPinBB<DATA_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPinBB<DATA_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
 
-  data_t mPinMask;
-  data_ptr_t mPort;
-  CMinWait<WAIT_TIME> mWait;
 public:
-  virtual void init() {
-    FastPinBB<DATA_PIN>::setOutput();
-    mPinMask = FastPinBB<DATA_PIN>::mask();
-    mPort = FastPinBB<DATA_PIN>::port();
-  }
+    virtual void init() {
+        FastPinBB<DATA_PIN>::setOutput();
+        mPinMask = FastPinBB<DATA_PIN>::mask();
+        mPort = FastPinBB<DATA_PIN>::port();
+    }
 
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
-  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
-    cli();
-    if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+        mWait.wait();
+        cli();
+        if(!showRGBInternal(pixels)) {
+            sei(); delayMicroseconds(WAIT_TIME); cli();
+            showRGBInternal(pixels);
+        }
+        sei();
+        mWait.mark();
+    }
+
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+        struct M0ClocklessData data;
+        data.d[0] = pixels.d[0];
+        data.d[1] = pixels.d[1];
+        data.d[2] = pixels.d[2];
+        data.s[0] = pixels.mScale[0];
+        data.s[1] = pixels.mScale[1];
+        data.s[2] = pixels.mScale[2];
+        data.e[0] = pixels.e[0];
+        data.e[1] = pixels.e[1];
+        data.e[2] = pixels.e[2];
+        data.adj = pixels.mAdvance;
+
+        typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
+
+        // timer mode w/prescaler of 0
+        LED_TIMER->MODE = TIMER_MODE_MODE_Timer;
+        LED_TIMER->PRESCALER = 0;
+        LED_TIMER->EVENTS_COMPARE[0] = 0;
+        LED_TIMER->BITMODE = TIMER_BITMODE_BITMODE_16Bit;
+        LED_TIMER->SHORTS = TIMER_SHORTS_COMPARE0_CLEAR_Msk;
+        LED_TIMER->TASKS_START = 1;
+
+        int ret = showLedData<4,8,T1,T2,T3,RGB_ORDER,WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
+
+        LED_TIMER->TASKS_STOP = 1;
+        return ret; // 0x00FFFFFF - _VAL;
     }
-    sei();
-    mWait.mark();
-  }
-
-  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-  // gcc will use register Y for the this pointer.
-  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-    struct M0ClocklessData data;
-    data.d[0] = pixels.d[0];
-    data.d[1] = pixels.d[1];
-    data.d[2] = pixels.d[2];
-    data.s[0] = pixels.mScale[0];
-    data.s[1] = pixels.mScale[1];
-    data.s[2] = pixels.mScale[2];
-    data.e[0] = pixels.e[0];
-    data.e[1] = pixels.e[1];
-    data.e[2] = pixels.e[2];
-    data.adj = pixels.mAdvance;
-
-    typename FastPin<DATA_PIN>::port_ptr_t portBase = FastPin<DATA_PIN>::port();
-
-    // timer mode w/prescaler of 0
-    LED_TIMER->MODE = TIMER_MODE_MODE_Timer;
-    LED_TIMER->PRESCALER = 0;
-    LED_TIMER->EVENTS_COMPARE[0] = 0;
-    LED_TIMER->BITMODE = TIMER_BITMODE_BITMODE_16Bit;
-    LED_TIMER->SHORTS = TIMER_SHORTS_COMPARE0_CLEAR_Msk;
-    LED_TIMER->TASKS_START = 1;
-
-    int ret = showLedData<4,8,T1,T2,T3,RGB_ORDER,WAIT_TIME>(portBase, FastPin<DATA_PIN>::mask(), pixels.mData, pixels.mLen, &data);
-
-    LED_TIMER->TASKS_STOP = 1;
-    return ret; // 0x00FFFFFF - _VAL;
-  }
 };
 
 
diff --git a/platforms/arm/nrf51/fastpin_arm_nrf51.h b/platforms/arm/nrf51/fastpin_arm_nrf51.h
index 3d02edc1ac..6005c44830 100644
--- a/platforms/arm/nrf51/fastpin_arm_nrf51.h
+++ b/platforms/arm/nrf51/fastpin_arm_nrf51.h
@@ -9,28 +9,28 @@
 #if 0
 template<uint8_t PIN, uint32_t _MASK, typename _DIRSET, typename _DIRCLR, typename _OUTSET, typename _OUTCLR, typename _OUT> class _ARMPIN {
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
 
-  inline static void setOutput() { _DIRSET::r() = _MASK; }
-  inline static void setInput() { _DIRCLR::r() = _MASK; }
+    inline static void setOutput() { _DIRSET::r() = _MASK; }
+    inline static void setInput() { _DIRCLR::r() = _MASK; }
 
-  inline static void hi() __attribute__ ((always_inline)) { _OUTSET::r() = _MASK; }
-  inline static void lo() __attribute__ ((always_inline)) { _OUTCLR::r() = _MASK; }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { _OUT::r() = val; }
+    inline static void hi() __attribute__ ((always_inline)) { _OUTSET::r() = _MASK; }
+    inline static void lo() __attribute__ ((always_inline)) { _OUTCLR::r() = _MASK; }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { _OUT::r() = val; }
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { _OUT::r() ^= _MASK; }
+    inline static void toggle() __attribute__ ((always_inline)) { _OUT::r() ^= _MASK; }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return _OUT::r() | _MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return _OUT::r() & ~_MASK; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_OUT::r(); }
-  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+    inline static port_t hival() __attribute__ ((always_inline)) { return _OUT::r() | _MASK; }
+    inline static port_t loval() __attribute__ ((always_inline)) { return _OUT::r() & ~_MASK; }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_OUT::r(); }
+    inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
 };
 
 #define ADDR(X) *(volatile uint32_t*)X
@@ -50,20 +50,20 @@ _RD32_NRF(NR_OUTCLR);
 _RD32_NRF(NR_OUT);
 
 #define _FL_DEFPIN(PIN) template<> class FastPin<PIN> : public _ARMPIN<PIN, 1 << PIN, \
-  _R(NR_DIRSET), _R(NR_DIRCLR), _R(NR_OUTSET), _R(NR_OUTCLR), _R(NR_OUT)> {};
+    _R(NR_DIRSET), _R(NR_DIRCLR), _R(NR_OUTSET), _R(NR_OUTCLR), _R(NR_OUT)> {};
 #else
 
 typedef struct {                                    /*!< GPIO Structure                                                        */
-  // __I  uint32_t  RESERVED0[321];
-  __IO uint32_t  OUT;                               /*!< Write GPIO port.                                                      */
-  __IO uint32_t  OUTSET;                            /*!< Set individual bits in GPIO port.                                     */
-  __IO uint32_t  OUTCLR;                            /*!< Clear individual bits in GPIO port.                                   */
-  __I  uint32_t  IN;                                /*!< Read GPIO port.                                                       */
-  __IO uint32_t  DIR;                               /*!< Direction of GPIO pins.                                               */
-  __IO uint32_t  DIRSET;                            /*!< DIR set register.                                                     */
-  __IO uint32_t  DIRCLR;                            /*!< DIR clear register.                                                   */
-  __I  uint32_t  RESERVED1[120];
-  __IO uint32_t  PIN_CNF[32];                       /*!< Configuration of GPIO pins.                                           */
+    // __I  uint32_t  RESERVED0[321];
+    __IO uint32_t  OUT;                               /*!< Write GPIO port.                                                      */
+    __IO uint32_t  OUTSET;                            /*!< Set individual bits in GPIO port.                                     */
+    __IO uint32_t  OUTCLR;                            /*!< Clear individual bits in GPIO port.                                   */
+    __I  uint32_t  IN;                                /*!< Read GPIO port.                                                       */
+    __IO uint32_t  DIR;                               /*!< Direction of GPIO pins.                                               */
+    __IO uint32_t  DIRSET;                            /*!< DIR set register.                                                     */
+    __IO uint32_t  DIRCLR;                            /*!< DIR clear register.                                                   */
+    __I  uint32_t  RESERVED1[120];
+    __IO uint32_t  PIN_CNF[32];                       /*!< Configuration of GPIO pins.                                           */
 } FL_NRF_GPIO_Type;
 
 #define FL_NRF_GPIO_BASE                   0x50000504UL
@@ -71,30 +71,30 @@ typedef struct {                                    /*!< GPIO Structure
 
 template<uint8_t PIN, uint32_t _MASK> class _ARMPIN {
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
 
-  inline static void setOutput() { FL_NRF_GPIO->DIRSET = _MASK; }
-  inline static void setInput() { FL_NRF_GPIO->DIRCLR = _MASK; }
+    inline static void setOutput() { FL_NRF_GPIO->DIRSET = _MASK; }
+    inline static void setInput() { FL_NRF_GPIO->DIRCLR = _MASK; }
 
-  inline static void hi() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUTSET = _MASK; }
-  inline static void lo() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUTCLR= _MASK; }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { FL_NRF_GPIO->OUT = val; }
+    inline static void hi() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUTSET = _MASK; }
+    inline static void lo() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUTCLR= _MASK; }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { FL_NRF_GPIO->OUT = val; }
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUT ^= _MASK; }
+    inline static void toggle() __attribute__ ((always_inline)) { FL_NRF_GPIO->OUT ^= _MASK; }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return FL_NRF_GPIO->OUT | _MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return FL_NRF_GPIO->OUT & ~_MASK; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &FL_NRF_GPIO->OUT; }
-  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+    inline static port_t hival() __attribute__ ((always_inline)) { return FL_NRF_GPIO->OUT | _MASK; }
+    inline static port_t loval() __attribute__ ((always_inline)) { return FL_NRF_GPIO->OUT & ~_MASK; }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { return &FL_NRF_GPIO->OUT; }
+    inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
 
-  inline static bool isset() __attribute__ ((always_inline)) { return (FL_NRF_GPIO->IN & _MASK) != 0; }
+    inline static bool isset() __attribute__ ((always_inline)) { return (FL_NRF_GPIO->IN & _MASK) != 0; }
 };
 
 
diff --git a/platforms/arm/nrf51/fastspi_arm_nrf51.h b/platforms/arm/nrf51/fastspi_arm_nrf51.h
index 6299e89d96..6826ebcba3 100644
--- a/platforms/arm/nrf51/fastspi_arm_nrf51.h
+++ b/platforms/arm/nrf51/fastspi_arm_nrf51.h
@@ -12,136 +12,135 @@
 template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class NRF51SPIOutput {
 
-  struct saveData {
-    uint32_t sck;
-    uint32_t mosi;
-    uint32_t miso;
-    uint32_t freq;
-    uint32_t enable;
-  } mSavedData;
-
-  void saveSPIData() {
-    mSavedData.sck = NRF_SPI0->PSELSCK;
-    mSavedData.mosi = NRF_SPI0->PSELMOSI;
-    mSavedData.miso = NRF_SPI0->PSELMISO;
-    mSavedData.freq = NRF_SPI0->FREQUENCY;
-    mSavedData.enable = NRF_SPI0->ENABLE;
-  }
-
-  void restoreSPIData() {
-    NRF_SPI0->PSELSCK = mSavedData.sck;
-    NRF_SPI0->PSELMOSI = mSavedData.mosi;
-    NRF_SPI0->PSELMISO = mSavedData.miso;
-    NRF_SPI0->FREQUENCY = mSavedData.freq;
-    mSavedData.enable = NRF_SPI0->ENABLE;
-  }
+    struct saveData {
+        uint32_t sck;
+        uint32_t mosi;
+        uint32_t miso;
+        uint32_t freq;
+        uint32_t enable;
+    } mSavedData;
+
+    void saveSPIData() {
+        mSavedData.sck = NRF_SPI0->PSELSCK;
+        mSavedData.mosi = NRF_SPI0->PSELMOSI;
+        mSavedData.miso = NRF_SPI0->PSELMISO;
+        mSavedData.freq = NRF_SPI0->FREQUENCY;
+        mSavedData.enable = NRF_SPI0->ENABLE;
+    }
+
+    void restoreSPIData() {
+        NRF_SPI0->PSELSCK = mSavedData.sck;
+        NRF_SPI0->PSELMOSI = mSavedData.mosi;
+        NRF_SPI0->PSELMISO = mSavedData.miso;
+        NRF_SPI0->FREQUENCY = mSavedData.freq;
+        mSavedData.enable = NRF_SPI0->ENABLE;
+    }
 
 public:
-  NRF51SPIOutput() { FastPin<_DATA_PIN>::setOutput(); FastPin<_CLOCK_PIN>::setOutput(); }
-  NRF51SPIOutput(Selectable *pSelect) {  FastPin<_DATA_PIN>::setOutput(); FastPin<_CLOCK_PIN>::setOutput();  }
-
-  // set the object representing the selectable
-  void setSelect(Selectable *pSelect) { /* TODO */ }
-
-  // initialize the SPI subssytem
-  void init() {
-    FastPin<_DATA_PIN>::setOutput();
-    FastPin<_CLOCK_PIN>::setOutput();
-    NRF_SPI0->PSELSCK = _CLOCK_PIN;
-    NRF_SPI0->PSELMOSI = _DATA_PIN;
-    NRF_SPI0->PSELMISO = 0xFFFFFFFF;
-    NRF_SPI0->FREQUENCY = 0x80000000;
-    NRF_SPI0->ENABLE = 1;
-    NRF_SPI0->EVENTS_READY = 0;
-  }
-
-  // latch the CS select
-  void select() { saveSPIData(); init(); }
-
-  // release the CS select
-  void release() { shouldWait(); restoreSPIData(); }
-
-  static bool shouldWait(bool wait = false) __attribute__((always_inline)) __attribute__((always_inline)) {
-    // static bool sWait=false;
-    // bool oldWait = sWait;
-    // sWait = wait;
-    // never going to bother with waiting since we're always running the spi clock at max speed on the rfduino
-    // TODO: When we set clock rate, implement/fix waiting properly, otherwise the world hangs up
-    return false;
-  }
-  
-  // wait until all queued up data has been written
-  static void waitFully() __attribute__((always_inline)){ if(shouldWait()) { while(NRF_SPI0->EVENTS_READY==0); } NRF_SPI0->INTENCLR; }
-  static void wait() __attribute__((always_inline)){ if(shouldWait()) { while(NRF_SPI0->EVENTS_READY==0); } NRF_SPI0->INTENCLR; }
-
-  // write a byte out via SPI (returns immediately on writing register)
-  static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); NRF_SPI0->TXD = b; NRF_SPI0->INTENCLR; shouldWait(true); }
-
-  // write a word out via SPI (returns immediately on writing register)
-  static void writeWord(uint16_t w) __attribute__((always_inline)){ writeByte(w>>8); writeByte(w & 0xFF);  }
-
-  // A raw set of writing byte values, assumes setup/init/waiting done elsewhere (static for use by adjustment classes)
-  static void writeBytesValueRaw(uint8_t value, int len) { while(len--) { writeByte(value);  } }
-
-  // A full cycle of writing a value for len bytes, including select, release, and waiting
-  void writeBytesValue(uint8_t value, int len) {
-    select();
-    while(len--) {
-      writeByte(value);
+    NRF51SPIOutput() { FastPin<_DATA_PIN>::setOutput(); FastPin<_CLOCK_PIN>::setOutput(); }
+    NRF51SPIOutput(Selectable *pSelect) {  FastPin<_DATA_PIN>::setOutput(); FastPin<_CLOCK_PIN>::setOutput();  }
+
+    // set the object representing the selectable
+    void setSelect(Selectable *pSelect) { /* TODO */ }
+
+    // initialize the SPI subssytem
+    void init() {
+        FastPin<_DATA_PIN>::setOutput();
+        FastPin<_CLOCK_PIN>::setOutput();
+        NRF_SPI0->PSELSCK = _CLOCK_PIN;
+        NRF_SPI0->PSELMOSI = _DATA_PIN;
+        NRF_SPI0->PSELMISO = 0xFFFFFFFF;
+        NRF_SPI0->FREQUENCY = 0x80000000;
+        NRF_SPI0->ENABLE = 1;
+        NRF_SPI0->EVENTS_READY = 0;
     }
-    waitFully();
-    release();
-  }
-
-  // A full cycle of writing a raw block of data out, including select, release, and waiting
-  template<class D> void writeBytes(uint8_t *data, int len) {
-    uint8_t *end = data + len;
-    select();
-    while(data != end) {
-      writeByte(D::adjust(*data++));
+
+    // latch the CS select
+    void select() { saveSPIData(); init(); }
+
+    // release the CS select
+    void release() { shouldWait(); restoreSPIData(); }
+
+    static bool shouldWait(bool wait = false) __attribute__((always_inline)) __attribute__((always_inline)) {
+        // static bool sWait=false;
+        // bool oldWait = sWait;
+        // sWait = wait;
+        // never going to bother with waiting since we're always running the spi clock at max speed on the rfduino
+        // TODO: When we set clock rate, implement/fix waiting properly, otherwise the world hangs up
+        return false;
     }
-    D::postBlock(len);
-    waitFully();
-    release();
-  }
-
-  void writeBytes(uint8_t *data, int len) {
-    writeBytes<DATA_NOP>(data, len);
-  }
-
-  // write a single bit out, which bit from the passed in byte is determined by template parameter
-  template <uint8_t BIT> inline static void writeBit(uint8_t b) {
-    waitFully();
-    NRF_SPI0->ENABLE = 0;
-    if(b & 1<<BIT) {
-      FastPin<_DATA_PIN>::hi();
-    } else {
-      FastPin<_DATA_PIN>::lo();
+    
+    // wait until all queued up data has been written
+    static void waitFully() __attribute__((always_inline)){ if(shouldWait()) { while(NRF_SPI0->EVENTS_READY==0); } NRF_SPI0->INTENCLR; }
+    static void wait() __attribute__((always_inline)){ if(shouldWait()) { while(NRF_SPI0->EVENTS_READY==0); } NRF_SPI0->INTENCLR; }
+
+    // write a byte out via SPI (returns immediately on writing register)
+    static void writeByte(uint8_t b) __attribute__((always_inline)) { wait(); NRF_SPI0->TXD = b; NRF_SPI0->INTENCLR; shouldWait(true); }
+
+    // write a word out via SPI (returns immediately on writing register)
+    static void writeWord(uint16_t w) __attribute__((always_inline)){ writeByte(w>>8); writeByte(w & 0xFF);  }
+
+    // A raw set of writing byte values, assumes setup/init/waiting done elsewhere (static for use by adjustment classes)
+    static void writeBytesValueRaw(uint8_t value, int len) { while(len--) { writeByte(value);  } }
+
+    // A full cycle of writing a value for len bytes, including select, release, and waiting
+    void writeBytesValue(uint8_t value, int len) {
+        select();
+        while(len--) {
+            writeByte(value);
+        }
+        waitFully();
+        release();
     }
-    FastPin<_CLOCK_PIN>::toggle();
-    FastPin<_CLOCK_PIN>::toggle();
-    NRF_SPI0->ENABLE = 1;
-  }
-
-  template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
-    select();
-    int len = pixels.mLen;
-    while(pixels.has(1)) {
-      if(FLAGS & FLAG_START_BIT) {
-				writeBit<0>(1);
-      }
-			writeByte(D::adjust(pixels.loadAndScale0()));
-			writeByte(D::adjust(pixels.loadAndScale1()));
-			writeByte(D::adjust(pixels.loadAndScale2()));
-
-			pixels.advanceData();
-			pixels.stepDithering();
-		}
-		D::postBlock(len);
-		waitFully();
-		release();
-  }
 
+    // A full cycle of writing a raw block of data out, including select, release, and waiting
+    template<class D> void writeBytes(uint8_t *data, int len) {
+        uint8_t *end = data + len;
+        select();
+        while(data != end) {
+            writeByte(D::adjust(*data++));
+        }
+        D::postBlock(len);
+        waitFully();
+        release();
+    }
+
+    void writeBytes(uint8_t *data, int len) {
+        writeBytes<DATA_NOP>(data, len);
+    }
+
+    // write a single bit out, which bit from the passed in byte is determined by template parameter
+    template <uint8_t BIT> inline static void writeBit(uint8_t b) {
+        waitFully();
+        NRF_SPI0->ENABLE = 0;
+        if(b & 1<<BIT) {
+            FastPin<_DATA_PIN>::hi();
+        } else {
+            FastPin<_DATA_PIN>::lo();
+        }
+        FastPin<_CLOCK_PIN>::toggle();
+        FastPin<_CLOCK_PIN>::toggle();
+        NRF_SPI0->ENABLE = 1;
+    }
+
+    template <uint8_t FLAGS, class D, EOrder RGB_ORDER> void writePixels(PixelController<RGB_ORDER> pixels) {
+        select();
+        int len = pixels.mLen;
+        while(pixels.has(1)) {
+            if(FLAGS & FLAG_START_BIT) {
+                writeBit<0>(1);
+            }
+            writeByte(D::adjust(pixels.loadAndScale0()));
+            writeByte(D::adjust(pixels.loadAndScale1()));
+            writeByte(D::adjust(pixels.loadAndScale2()));
+
+            pixels.advanceData();
+            pixels.stepDithering();
+        }
+        D::postBlock(len);
+        waitFully();
+        release();
+    }
 };
 
 #endif
diff --git a/platforms/arm/nrf52/arbiter_nrf52.h b/platforms/arm/nrf52/arbiter_nrf52.h
index 5a6aa92a67..8972d2d233 100644
--- a/platforms/arm/nrf52/arbiter_nrf52.h
+++ b/platforms/arm/nrf52/arbiter_nrf52.h
@@ -36,7 +36,6 @@ static_assert(FASTLED_NRF52_PWM_INSTANCE_COUNT > 0, "Instance count must be grea
 
 template <uint32_t _PWM_ID>
 class PWM_Arbiter {
-
 private:
     static_assert(_PWM_ID < 32, "PWM_ID over 31 breaks current arbitration bitmask");
     //const  uint32_t _ACQUIRE_MASK =             (1u << _PWM_ID) ;
diff --git a/platforms/arm/nrf52/clockless_arm_nrf52.h b/platforms/arm/nrf52/clockless_arm_nrf52.h
index 56a1dbe097..613ff2824a 100644
--- a/platforms/arm/nrf52/clockless_arm_nrf52.h
+++ b/platforms/arm/nrf52/clockless_arm_nrf52.h
@@ -15,7 +15,6 @@
 // NOTE: Update platforms.cpp in root of FastLED library if this changes
 #define FASTLED_NRF52_PWM_ID 0
 
-
 extern uint32_t isrCount;
 
 
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/platforms/arm/nrf52/fastpin_arm_nrf52.h
index 7a780876a6..9d0a8ec990 100644
--- a/platforms/arm/nrf52/fastpin_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastpin_arm_nrf52.h
@@ -90,54 +90,54 @@ struct __generated_struct_NRF_P1 {
 // The actual class template can then use a typename, for what is essentially a constexpr NRF_GPIO_Type*
 template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUMBER> class _ARMPIN  {
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
 
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       setOutput() {
-    // OK for this to be more than one instruction, as unusual to quickly switch input/output modes
-    nrf_gpio_cfg(
-        nrf_pin(),
-        NRF_GPIO_PIN_DIR_OUTPUT,        // set pin as output
-        NRF_GPIO_PIN_INPUT_DISCONNECT,  // disconnect the input buffering
-        NRF_GPIO_PIN_NOPULL,            // neither pull-up nor pull-down resistors enabled
-        NRF_GPIO_PIN_H0H1,              // high drive mode required for faster speeds
-        NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
-        );
-  }
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       setInput()  {
-    // OK for this to be more than one instruction, as unusual to quickly switch input/output modes
-    nrf_gpio_cfg(
-        nrf_pin(),
-        NRF_GPIO_PIN_DIR_INPUT,         // set pin as input
-        NRF_GPIO_PIN_INPUT_DISCONNECT,  // disconnect the input buffering
-        NRF_GPIO_PIN_NOPULL,            // neither pull-up nor pull-down resistors enabled
-        NRF_GPIO_PIN_H0H1,              // high drive mode required for faster speeds
-        NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
-        );
-  }
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       hi()        { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTSET = _MASK;            } // sets _MASK in the SET   OUTPUT register (output set high)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       lo()        { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTCLR = _MASK;            } // sets _MASK in the CLEAR OUTPUT register (output set low)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       toggle()    { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT ^= _MASK;              } // toggles _MASK bits in the OUTPUT GPIO port directly
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void       strobe()    { toggle();     toggle();                } // BUGBUG -- Is this used by FastLED?  Without knowing (for example) SPI Speed?
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     hival()     { return (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT | _MASK;        } // sets all _MASK bit(s) in the OUTPUT GPIO port to 1
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     loval()     { return (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT & ~_MASK;       } // sets all _MASK bit(s) in the OUTPUT GPIO port to 0
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t port()      { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT);             } // gets raw pointer to OUTPUT          GPIO port
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t cport()     { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTCLR);          } // gets raw pointer to SET   DIRECTION GPIO port
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t sport()     { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTSET);          } // gets raw pointer to CLEAR DIRECTION GPIO port
-  FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     mask()      { return _MASK;                          } // gets the value of _MASK
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void hi (register port_ptr_t port) { hi();                      } // sets _MASK in the SET   OUTPUT register (output set high)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void lo (register port_ptr_t port) { lo();                      } // sets _MASK in the CLEAR OUTPUT register (output set low)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void set(register port_t     val ) { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT = val;     } // sets entire port's value (optimization used by FastLED)
-  FASTLED_NRF52_INLINE_ATTRIBUTE static void fastset(register port_ptr_t port, register port_t val) { *port = val; }
-  constexpr                      static uint32_t   nrf_pin2() { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
-  constexpr                      static bool       LowSpeedOnlyRecommended() {
-    // Caller must always determine if high speed use if allowed on a given pin,
-    // because it depends on more than just the chip packaging ... it depends on entire board (and even system) design.
-    return false; // choosing default to be FALSE, to allow users to ATTEMPT to use high-speed on pins where support is not known
-  }
-  // Expose the nrf pin (port/pin combined), port, and pin as properties (e.g., for setting up SPI)
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void       setOutput() {
+        // OK for this to be more than one instruction, as unusual to quickly switch input/output modes
+        nrf_gpio_cfg(
+            nrf_pin(),
+            NRF_GPIO_PIN_DIR_OUTPUT,        // set pin as output
+            NRF_GPIO_PIN_INPUT_DISCONNECT,  // disconnect the input buffering
+            NRF_GPIO_PIN_NOPULL,            // neither pull-up nor pull-down resistors enabled
+            NRF_GPIO_PIN_H0H1,              // high drive mode required for faster speeds
+            NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
+            );
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void       setInput()  {
+        // OK for this to be more than one instruction, as unusual to quickly switch input/output modes
+        nrf_gpio_cfg(
+            nrf_pin(),
+            NRF_GPIO_PIN_DIR_INPUT,         // set pin as input
+            NRF_GPIO_PIN_INPUT_DISCONNECT,  // disconnect the input buffering
+            NRF_GPIO_PIN_NOPULL,            // neither pull-up nor pull-down resistors enabled
+            NRF_GPIO_PIN_H0H1,              // high drive mode required for faster speeds
+            NRF_GPIO_PIN_NOSENSE            // pin sense level disabled
+            );
+    }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void       hi()        { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTSET = _MASK;            } // sets _MASK in the SET   OUTPUT register (output set high)
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void       lo()        { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTCLR = _MASK;            } // sets _MASK in the CLEAR OUTPUT register (output set low)
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void       toggle()    { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT ^= _MASK;              } // toggles _MASK bits in the OUTPUT GPIO port directly
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void       strobe()    { toggle();     toggle();                } // BUGBUG -- Is this used by FastLED?  Without knowing (for example) SPI Speed?
+    FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     hival()     { return (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT | _MASK;        } // sets all _MASK bit(s) in the OUTPUT GPIO port to 1
+    FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     loval()     { return (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT & ~_MASK;       } // sets all _MASK bit(s) in the OUTPUT GPIO port to 0
+    FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t port()      { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT);             } // gets raw pointer to OUTPUT          GPIO port
+    FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t cport()     { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTCLR);          } // gets raw pointer to SET   DIRECTION GPIO port
+    FASTLED_NRF52_INLINE_ATTRIBUTE static port_ptr_t sport()     { return &((reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUTSET);          } // gets raw pointer to CLEAR DIRECTION GPIO port
+    FASTLED_NRF52_INLINE_ATTRIBUTE static port_t     mask()      { return _MASK;                          } // gets the value of _MASK
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void hi (register port_ptr_t port) { hi();                      } // sets _MASK in the SET   OUTPUT register (output set high)
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void lo (register port_ptr_t port) { lo();                      } // sets _MASK in the CLEAR OUTPUT register (output set low)
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void set(register port_t     val ) { (reinterpret_cast<NRF_GPIO_Type*>(_PORT::r()))->OUT = val;     } // sets entire port's value (optimization used by FastLED)
+    FASTLED_NRF52_INLINE_ATTRIBUTE static void fastset(register port_ptr_t port, register port_t val) { *port = val; }
+    constexpr                      static uint32_t   nrf_pin2() { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
+    constexpr                      static bool       LowSpeedOnlyRecommended() {
+        // Caller must always determine if high speed use if allowed on a given pin,
+        // because it depends on more than just the chip packaging ... it depends on entire board (and even system) design.
+        return false; // choosing default to be FALSE, to allow users to ATTEMPT to use high-speed on pins where support is not known
+    }
+    // Expose the nrf pin (port/pin combined), port, and pin as properties (e.g., for setting up SPI)
 
-  FASTLED_NRF52_INLINE_ATTRIBUTE static uint32_t   nrf_pin()  { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
+    FASTLED_NRF52_INLINE_ATTRIBUTE static uint32_t   nrf_pin()  { return NRF_GPIO_PIN_MAP(_PORT_NUMBER, _PIN_NUMBER); }
 };
 
 //
@@ -152,7 +152,7 @@ template <uint32_t _MASK, typename _PORT, uint8_t _PORT_NUMBER, uint8_t _PIN_NUM
 //     _FL_DEFPIN(47, 47, 1);
 //
 
-#define _FL_DEFPIN(ARDUINO_PIN, BOARD_PIN, BOARD_PORT)    \
+#define _FL_DEFPIN(ARDUINO_PIN, BOARD_PIN, BOARD_PORT)   \
     template<> class FastPin<ARDUINO_PIN> :              \
     public _ARMPIN<                                      \
         1u << (BOARD_PIN & 31u),                         \
diff --git a/platforms/arm/nrf52/fastspi_arm_nrf52.h b/platforms/arm/nrf52/fastspi_arm_nrf52.h
index 9c1a219826..89d006e398 100644
--- a/platforms/arm/nrf52/fastspi_arm_nrf52.h
+++ b/platforms/arm/nrf52/fastspi_arm_nrf52.h
@@ -23,7 +23,6 @@
     /// SPI_CLOCK_DIVIDER is number of CPU clock cycles per SPI transmission bit?
     template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
     class NRF52SPIOutput {
-
     private:
         // static variables -- always using same SPIM instance
         static bool s_InUse;
diff --git a/platforms/arm/sam/clockless_arm_sam.h b/platforms/arm/sam/clockless_arm_sam.h
index 0fc621d2aa..737a4555c2 100644
--- a/platforms/arm/sam/clockless_arm_sam.h
+++ b/platforms/arm/sam/clockless_arm_sam.h
@@ -22,6 +22,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		FastPinBB<DATA_PIN>::setOutput();
@@ -32,15 +33,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
-	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-		mWait.wait();
-		if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+        mWait.wait();
+        if(!showRGBInternal(pixels)) {
+            sei(); delayMicroseconds(WAIT_TIME); cli();
+            showRGBInternal(pixels);
+        }
+        mWait.mark();
     }
-		mWait.mark();
-	}
 
 	template<int BITS>  __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register uint8_t & b) {
 		// Make sure we don't slot into a wrapping spot, this will delay up to 12.5µs for WS2812
@@ -90,7 +90,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			cli();
 			if(DUE_TIMER_VAL > next_mark) {
-				if((DUE_TIMER_VAL - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL); return 0; }
+				if((DUE_TIMER_VAL - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) {
+                    sei(); TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL); return 0;
+                }
 			}
 			#endif
 
diff --git a/platforms/arm/sam/clockless_block_arm_sam.h b/platforms/arm/sam/clockless_block_arm_sam.h
index 355f945d0e..a179989150 100644
--- a/platforms/arm/sam/clockless_block_arm_sam.h
+++ b/platforms/arm/sam/clockless_block_arm_sam.h
@@ -21,8 +21,8 @@ FASTLED_NAMESPACE_BEGIN
 #define PORTB_FIRST_PIN 90
 
 typedef union {
-  uint8_t bytes[8];
-  uint32_t raw[2];
+    uint8_t bytes[8];
+    uint32_t raw[2];
 } Lines;
 
 #define TADJUST 0
@@ -37,144 +37,143 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual int size() { return CLEDController::size() * LANES; }
 	virtual void init() {
-    static_assert(LANES <= 8, "Maximum of 8 lanes for Due parallel controllers!");
-    if(FIRST_PIN == PORTA_FIRST_PIN) {
-      switch(LANES) {
-        case 8: FastPin<31>::setOutput();
-        case 7: FastPin<58>::setOutput();
-        case 6: FastPin<100>::setOutput();
-        case 5: FastPin<59>::setOutput();
-        case 4: FastPin<60>::setOutput();
-        case 3: FastPin<61>::setOutput();
-        case 2: FastPin<68>::setOutput();
-        case 1: FastPin<69>::setOutput();
-      }
-    } else if(FIRST_PIN == PORTD_FIRST_PIN) {
-      switch(LANES) {
-        case 8: FastPin<11>::setOutput();
-        case 7: FastPin<29>::setOutput();
-        case 6: FastPin<15>::setOutput();
-        case 5: FastPin<14>::setOutput();
-        case 4: FastPin<28>::setOutput();
-        case 3: FastPin<27>::setOutput();
-        case 2: FastPin<26>::setOutput();
-        case 1: FastPin<25>::setOutput();
-      }
-    } else if(FIRST_PIN == PORTB_FIRST_PIN) {
-      switch(LANES) {
-        case 8: FastPin<97>::setOutput();
-        case 7: FastPin<96>::setOutput();
-        case 6: FastPin<95>::setOutput();
-        case 5: FastPin<94>::setOutput();
-        case 4: FastPin<93>::setOutput();
-        case 3: FastPin<92>::setOutput();
-        case 2: FastPin<91>::setOutput();
-        case 1: FastPin<90>::setOutput();
-      }
-    }
-    mPinMask = FastPin<FIRST_PIN>::mask();
-    mPort = FastPin<FIRST_PIN>::port();
-	}
-
-	virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-  virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
-    mWait.wait();
-    showRGBInternal(pixels);
-    sei();
-    mWait.mark();
-  }
-
-	static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
-		// Serial.println("Entering show");
-
-    int nLeds = allpixels.mLen;
-
-    // Setup the pixel controller and load/scale the first byte
-		Lines b0,b1,b2;
-
-    allpixels.preStepFirstByteDithering();
-		for(uint8_t i = 0; i < LANES; i++) {
-			b0.bytes[i] = allpixels.loadAndScale0(i);
-		}
-
-		// Setup and start the clock
-    TC_Configure(DUE_TIMER,DUE_TIMER_CHANNEL,TC_CMR_TCCLKS_TIMER_CLOCK1);
-    pmc_enable_periph_clk(DUE_TIMER_ID);
-    TC_Start(DUE_TIMER,DUE_TIMER_CHANNEL);
-
-    #if (FASTLED_ALLOW_INTERRUPTS == 1)
-    cli();
-    #endif
-		uint32_t next_mark = (DUE_TIMER_VAL + (TOTAL));
-		while(nLeds--) {
-      allpixels.stepDithering();
-      #if (FASTLED_ALLOW_INTERRUPTS == 1)
-      cli();
-      if(DUE_TIMER_VAL > next_mark) {
-        if((DUE_TIMER_VAL - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) {
-          sei(); TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL); return DUE_TIMER_VAL;
+        static_assert(LANES <= 8, "Maximum of 8 lanes for Due parallel controllers!");
+        if(FIRST_PIN == PORTA_FIRST_PIN) {
+            switch(LANES) {
+                case 8: FastPin<31>::setOutput();
+                case 7: FastPin<58>::setOutput();
+                case 6: FastPin<100>::setOutput();
+                case 5: FastPin<59>::setOutput();
+                case 4: FastPin<60>::setOutput();
+                case 3: FastPin<61>::setOutput();
+                case 2: FastPin<68>::setOutput();
+                case 1: FastPin<69>::setOutput();
+            }
+        } else if(FIRST_PIN == PORTD_FIRST_PIN) {
+            switch(LANES) {
+                case 8: FastPin<11>::setOutput();
+                case 7: FastPin<29>::setOutput();
+                case 6: FastPin<15>::setOutput();
+                case 5: FastPin<14>::setOutput();
+                case 4: FastPin<28>::setOutput();
+                case 3: FastPin<27>::setOutput();
+                case 2: FastPin<26>::setOutput();
+                case 1: FastPin<25>::setOutput();
+            }
+        } else if(FIRST_PIN == PORTB_FIRST_PIN) {
+            switch(LANES) {
+                case 8: FastPin<97>::setOutput();
+                case 7: FastPin<96>::setOutput();
+                case 6: FastPin<95>::setOutput();
+                case 5: FastPin<94>::setOutput();
+                case 4: FastPin<93>::setOutput();
+                case 3: FastPin<92>::setOutput();
+                case 2: FastPin<91>::setOutput();
+                case 1: FastPin<90>::setOutput();
+            }
         }
-      }
-      #endif
+        mPinMask = FastPin<FIRST_PIN>::mask();
+        mPort = FastPin<FIRST_PIN>::port();
+    }
 
-			// Write first byte, read next byte
-			writeBits<8+XTRA0,1>(next_mark, b0, b1, allpixels);
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
 
-			// Write second byte, read 3rd byte
-			writeBits<8+XTRA0,2>(next_mark, b1, b2, allpixels);
+    virtual void showPixels(PixelController<RGB_ORDER, LANES, PORT_MASK> & pixels) {
+        mWait.wait();
+        showRGBInternal(pixels);
+        sei();
+        mWait.mark();
+    }
 
-      allpixels.advanceData();
-			// Write third byte
-			writeBits<8+XTRA0,0>(next_mark, b2, b0, allpixels);
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+        // Serial.println("Entering show");
 
-      #if (FASTLED_ALLOW_INTERRUPTS == 1)
-      sei();
-      #endif
-		}
+        int nLeds = allpixels.mLen;
 
-		return DUE_TIMER_VAL;
-	}
+        // Setup the pixel controller and load/scale the first byte
+        Lines b0,b1,b2;
 
-  template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, Lines & b3, PixelController<RGB_ORDER,LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
-    Lines b2;
-    transpose8x1(b.bytes,b2.bytes);
+        allpixels.preStepFirstByteDithering();
+        for(uint8_t i = 0; i < LANES; i++) {
+            b0.bytes[i] = allpixels.loadAndScale0(i);
+        }
 
-    register uint8_t d = pixels.template getd<PX>(pixels);
-    register uint8_t scale = pixels.template getscale<PX>(pixels);
+        // Setup and start the clock
+        TC_Configure(DUE_TIMER,DUE_TIMER_CHANNEL,TC_CMR_TCCLKS_TIMER_CLOCK1);
+        pmc_enable_periph_clk(DUE_TIMER_ID);
+        TC_Start(DUE_TIMER,DUE_TIMER_CHANNEL);
+
+        #if (FASTLED_ALLOW_INTERRUPTS == 1)
+        cli();
+        #endif
+        uint32_t next_mark = (DUE_TIMER_VAL + (TOTAL));
+        while(nLeds--) {
+            allpixels.stepDithering();
+            #if (FASTLED_ALLOW_INTERRUPTS == 1)
+            cli();
+            if(DUE_TIMER_VAL > next_mark) {
+                if((DUE_TIMER_VAL - next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) {
+                    sei(); TC_Stop(DUE_TIMER,DUE_TIMER_CHANNEL); return DUE_TIMER_VAL;
+                }
+            }
+            #endif
+
+            // Write first byte, read next byte
+            writeBits<8+XTRA0,1>(next_mark, b0, b1, allpixels);
+
+            // Write second byte, read 3rd byte
+            writeBits<8+XTRA0,2>(next_mark, b1, b2, allpixels);
+
+            allpixels.advanceData();
+            // Write third byte
+            writeBits<8+XTRA0,0>(next_mark, b2, b0, allpixels);
+
+            #if (FASTLED_ALLOW_INTERRUPTS == 1)
+            sei();
+            #endif
+        }
 
-    for(uint32_t i = 0; (i < LANES) && (i<8); i++) {
-      while(DUE_TIMER_VAL < next_mark);
-      next_mark = (DUE_TIMER_VAL+TOTAL);
+        return DUE_TIMER_VAL;
+    }
 
-      *FastPin<FIRST_PIN>::sport() = PORT_MASK;
+    template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register Lines & b, Lines & b3, PixelController<RGB_ORDER,LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+        Lines b2;
+        transpose8x1(b.bytes,b2.bytes);
 
-      while((next_mark - DUE_TIMER_VAL) > (T2+T3+6));
-      *FastPin<FIRST_PIN>::cport() = (~b2.bytes[7-i]) & PORT_MASK;
+        register uint8_t d = pixels.template getd<PX>(pixels);
+        register uint8_t scale = pixels.template getscale<PX>(pixels);
 
-      while((next_mark - (DUE_TIMER_VAL)) > T3);
-      *FastPin<FIRST_PIN>::cport() = PORT_MASK;
+        for(uint32_t i = 0; (i < LANES) && (i<8); i++) {
+            while(DUE_TIMER_VAL < next_mark);
+            next_mark = (DUE_TIMER_VAL+TOTAL);
 
-      b3.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
-    }
+            *FastPin<FIRST_PIN>::sport() = PORT_MASK;
 
-    for(uint32_t i = LANES; i < 8; i++) {
-      while(DUE_TIMER_VAL < next_mark);
-      next_mark = (DUE_TIMER_VAL+TOTAL);
-      *FastPin<FIRST_PIN>::sport() = PORT_MASK;
+            while((next_mark - DUE_TIMER_VAL) > (T2+T3+6));
+            *FastPin<FIRST_PIN>::cport() = (~b2.bytes[7-i]) & PORT_MASK;
 
-      while((next_mark - DUE_TIMER_VAL) > (T2+T3+6));
-      *FastPin<FIRST_PIN>::cport() = (~b2.bytes[7-i]) & PORT_MASK;
+            while((next_mark - (DUE_TIMER_VAL)) > T3);
+            *FastPin<FIRST_PIN>::cport() = PORT_MASK;
 
-      while((next_mark - DUE_TIMER_VAL) > T3);
-      *FastPin<FIRST_PIN>::cport() = PORT_MASK;
-    }
-  }
+            b3.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
+        }
 
+        for(uint32_t i = LANES; i < 8; i++) {
+            while(DUE_TIMER_VAL < next_mark);
+            next_mark = (DUE_TIMER_VAL+TOTAL);
+            *FastPin<FIRST_PIN>::sport() = PORT_MASK;
 
+            while((next_mark - DUE_TIMER_VAL) > (T2+T3+6));
+            *FastPin<FIRST_PIN>::cport() = (~b2.bytes[7-i]) & PORT_MASK;
+
+            while((next_mark - DUE_TIMER_VAL) > T3);
+            *FastPin<FIRST_PIN>::cport() = PORT_MASK;
+        }
+    }
 };
 
 #endif
diff --git a/platforms/arm/sam/fastpin_arm_sam.h b/platforms/arm/sam/fastpin_arm_sam.h
index 339c5e75a4..e1354c7346 100644
--- a/platforms/arm/sam/fastpin_arm_sam.h
+++ b/platforms/arm/sam/fastpin_arm_sam.h
@@ -92,7 +92,6 @@ _FL_IO(D,3);
 
 #if defined(__SAM3X8E__)
 
-
 #define MAX_PIN 78
 _FL_DEFPIN(0, 8, A); _FL_DEFPIN(1, 9, A); _FL_DEFPIN(2, 25, B); _FL_DEFPIN(3, 28, C);
 _FL_DEFPIN(4, 26, C); _FL_DEFPIN(5, 25, C); _FL_DEFPIN(6, 24, C); _FL_DEFPIN(7, 23, C);
diff --git a/platforms/arm/stm32/clockless_arm_stm32.h b/platforms/arm/stm32/clockless_arm_stm32.h
index e4b4de08a5..1cc1f66785 100644
--- a/platforms/arm/stm32/clockless_arm_stm32.h
+++ b/platforms/arm/stm32/clockless_arm_stm32.h
@@ -9,118 +9,118 @@ FASTLED_NAMESPACE_BEGIN
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 50>
 class ClocklessController : public CPixelLEDController<RGB_ORDER> {
-  typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
-  typedef typename FastPin<DATA_PIN>::port_t data_t;
+    typedef typename FastPin<DATA_PIN>::port_ptr_t data_ptr_t;
+    typedef typename FastPin<DATA_PIN>::port_t data_t;
+
+    data_t mPinMask;
+    data_ptr_t mPort;
+    CMinWait<WAIT_TIME> mWait;
 
-  data_t mPinMask;
-  data_ptr_t mPort;
-  CMinWait<WAIT_TIME> mWait;
 public:
-  virtual void init() {
-    FastPin<DATA_PIN>::setOutput();
-    mPinMask = FastPin<DATA_PIN>::mask();
-    mPort = FastPin<DATA_PIN>::port();
-  }
+    virtual void init() {
+        FastPin<DATA_PIN>::setOutput();
+        mPinMask = FastPin<DATA_PIN>::mask();
+        mPort = FastPin<DATA_PIN>::port();
+    }
 
-	virtual uint16_t getMaxRefreshRate() const { return 400; }
+  	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
-  virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    mWait.wait();
-    if(!showRGBInternal(pixels)) {
-      sei(); delayMicroseconds(WAIT_TIME); cli();
-      showRGBInternal(pixels);
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
+        mWait.wait();
+        if(!showRGBInternal(pixels)) {
+            sei(); delayMicroseconds(WAIT_TIME); cli();
+            showRGBInternal(pixels);
+        }
+        mWait.mark();
     }
-    mWait.mark();
-  }
 
 #define _CYCCNT (*(volatile uint32_t*)(0xE0001004UL))
 
-  template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
-    for(register uint32_t i = BITS-1; i > 0; i--) {
-      while(_CYCCNT < (T1+T2+T3-20));
-      FastPin<DATA_PIN>::fastset(port, hi);
-      _CYCCNT = 4;
-      if(b&0x80) {
-        while(_CYCCNT < (T1+T2-20));
-        FastPin<DATA_PIN>::fastset(port, lo);
-      } else {
-        while(_CYCCNT < (T1-10));
-        FastPin<DATA_PIN>::fastset(port, lo);
-      }
-      b <<= 1;
+    template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
+        for(register uint32_t i = BITS-1; i > 0; i--) {
+            while(_CYCCNT < (T1+T2+T3-20));
+            FastPin<DATA_PIN>::fastset(port, hi);
+            _CYCCNT = 4;
+            if(b&0x80) {
+                while(_CYCCNT < (T1+T2-20));
+                FastPin<DATA_PIN>::fastset(port, lo);
+            } else {
+                while(_CYCCNT < (T1-10));
+                FastPin<DATA_PIN>::fastset(port, lo);
+            }
+            b <<= 1;
+        }
+
+        while(_CYCCNT < (T1+T2+T3-20));
+        FastPin<DATA_PIN>::fastset(port, hi);
+        _CYCCNT = 4;
+
+        if(b&0x80) {
+            while(_CYCCNT < (T1+T2-20));
+            FastPin<DATA_PIN>::fastset(port, lo);
+        } else {
+            while(_CYCCNT < (T1-10));
+            FastPin<DATA_PIN>::fastset(port, lo);
+        }
     }
 
-    while(_CYCCNT < (T1+T2+T3-20));
-    FastPin<DATA_PIN>::fastset(port, hi);
-    _CYCCNT = 4;
-
-    if(b&0x80) {
-      while(_CYCCNT < (T1+T2-20));
-      FastPin<DATA_PIN>::fastset(port, lo);
-    } else {
-      while(_CYCCNT < (T1-10));
-      FastPin<DATA_PIN>::fastset(port, lo);
+    // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+    // gcc will use register Y for the this pointer.
+    static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
+        // Get access to the clock
+        CoreDebug->DEMCR  |= CoreDebug_DEMCR_TRCENA_Msk;
+        DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
+        DWT->CYCCNT = 0;
+
+        register data_ptr_t port = FastPin<DATA_PIN>::port();
+        register data_t hi = *port | FastPin<DATA_PIN>::mask();;
+        register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+        *port = lo;
+
+        // Setup the pixel controller and load/scale the first byte
+        pixels.preStepFirstByteDithering();
+        register uint8_t b = pixels.loadAndScale0();
+
+        cli();
+
+        uint32_t next_mark = (T1+T2+T3);
+
+        DWT->CYCCNT = 0;
+        while(pixels.has(1)) {
+            pixels.stepDithering();
+            #if (FASTLED_ALLOW_INTERRUPTS == 1)
+            cli();
+            // if interrupts took longer than 45µs, punt on the current frame
+            if(DWT->CYCCNT > next_mark) {
+                if((DWT->CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
+            }
+
+            hi = *port | FastPin<DATA_PIN>::mask();
+            lo = *port & ~FastPin<DATA_PIN>::mask();
+            #endif
+
+            // Write first byte, read next byte
+            writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+            b = pixels.loadAndScale1();
+
+            // Write second byte, read 3rd byte
+            writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+            b = pixels.loadAndScale2();
+
+            // Write third byte, read 1st byte of next pixel
+            writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
+            b = pixels.advanceAndLoadAndScale0();
+            #if (FASTLED_ALLOW_INTERRUPTS == 1)
+            sei();
+            #endif
+        };
+
+        sei();
+        return DWT->CYCCNT;
     }
-  }
-
-  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-  // gcc will use register Y for the this pointer.
-  static uint32_t showRGBInternal(PixelController<RGB_ORDER> pixels) {
-    // Get access to the clock
-    CoreDebug->DEMCR  |= CoreDebug_DEMCR_TRCENA_Msk;
-    DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
-    DWT->CYCCNT = 0;
-
-    register data_ptr_t port = FastPin<DATA_PIN>::port();
-    register data_t hi = *port | FastPin<DATA_PIN>::mask();;
-    register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
-    *port = lo;
-
-    // Setup the pixel controller and load/scale the first byte
-    pixels.preStepFirstByteDithering();
-    register uint8_t b = pixels.loadAndScale0();
-
-    cli();
-
-    uint32_t next_mark = (T1+T2+T3);
-
-    DWT->CYCCNT = 0;
-    while(pixels.has(1)) {
-      pixels.stepDithering();
-      #if (FASTLED_ALLOW_INTERRUPTS == 1)
-      cli();
-      // if interrupts took longer than 45µs, punt on the current frame
-      if(DWT->CYCCNT > next_mark) {
-        if((DWT->CYCCNT-next_mark) > ((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US)) { sei(); return 0; }
-      }
-
-      hi = *port | FastPin<DATA_PIN>::mask();
-      lo = *port & ~FastPin<DATA_PIN>::mask();
-      #endif
-
-      // Write first byte, read next byte
-      writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
-      b = pixels.loadAndScale1();
-
-      // Write second byte, read 3rd byte
-      writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
-      b = pixels.loadAndScale2();
-
-      // Write third byte, read 1st byte of next pixel
-      writeBits<8+XTRA0>(next_mark, port, hi, lo, b);
-      b = pixels.advanceAndLoadAndScale0();
-      #if (FASTLED_ALLOW_INTERRUPTS == 1)
-      sei();
-      #endif
-    };
-
-    sei();
-    return DWT->CYCCNT;
-  }
 };
 
 FASTLED_NAMESPACE_END
 
-  #endif
+#endif
diff --git a/platforms/arm/stm32/cm3_regs.h b/platforms/arm/stm32/cm3_regs.h
index f81f24cbd3..7bb7f759cc 100644
--- a/platforms/arm/stm32/cm3_regs.h
+++ b/platforms/arm/stm32/cm3_regs.h
@@ -4,20 +4,20 @@
 #include <stdint.h>
 
 #ifdef __cplusplus
-  #define   __I     volatile             /*!< Defines 'read only' permissions                 */
+#define   __I     volatile             /*!< Defines 'read only' permissions                 */
 #else
-  #define   __I     volatile const       /*!< Defines 'read only' permissions                 */
+#define   __I     volatile const       /*!< Defines 'read only' permissions                 */
 #endif
-#define     __O     volatile             /*!< Defines 'write only' permissions                */
-#define     __IO    volatile             /*!< Defines 'read / write' permissions              */
+#define   __O     volatile             /*!< Defines 'write only' permissions                */
+#define   __IO    volatile             /*!< Defines 'read / write' permissions              */
 
 
 typedef struct
 {
-  __IO uint32_t DHCSR;                   /*!< Offset: 0x000 (R/W)  Debug Halting Control and Status Register    */
-  __O  uint32_t DCRSR;                   /*!< Offset: 0x004 ( /W)  Debug Core Register Selector Register        */
-  __IO uint32_t DCRDR;                   /*!< Offset: 0x008 (R/W)  Debug Core Register Data Register            */
-  __IO uint32_t DEMCR;                   /*!< Offset: 0x00C (R/W)  Debug Exception and Monitor Control Register */
+    __IO uint32_t DHCSR;                   /*!< Offset: 0x000 (R/W)  Debug Halting Control and Status Register    */
+    __O  uint32_t DCRSR;                   /*!< Offset: 0x004 ( /W)  Debug Core Register Selector Register        */
+    __IO uint32_t DCRDR;                   /*!< Offset: 0x008 (R/W)  Debug Core Register Data Register            */
+    __IO uint32_t DEMCR;                   /*!< Offset: 0x00C (R/W)  Debug Exception and Monitor Control Register */
 } CoreDebug_Type;
 
 #define CoreDebug_BASE      (0xE000EDF0UL)                            /*!< Core Debug Base Address            */
@@ -28,29 +28,29 @@ typedef struct
 
 typedef struct
 {
-  __IO uint32_t CTRL;                    /*!< Offset: 0x000 (R/W)  Control Register                          */
-  __IO uint32_t CYCCNT;                  /*!< Offset: 0x004 (R/W)  Cycle Count Register                      */
-  __IO uint32_t CPICNT;                  /*!< Offset: 0x008 (R/W)  CPI Count Register                        */
-  __IO uint32_t EXCCNT;                  /*!< Offset: 0x00C (R/W)  Exception Overhead Count Register         */
-  __IO uint32_t SLEEPCNT;                /*!< Offset: 0x010 (R/W)  Sleep Count Register                      */
-  __IO uint32_t LSUCNT;                  /*!< Offset: 0x014 (R/W)  LSU Count Register                        */
-  __IO uint32_t FOLDCNT;                 /*!< Offset: 0x018 (R/W)  Folded-instruction Count Register         */
-  __I  uint32_t PCSR;                    /*!< Offset: 0x01C (R/ )  Program Counter Sample Register           */
-  __IO uint32_t COMP0;                   /*!< Offset: 0x020 (R/W)  Comparator Register 0                     */
-  __IO uint32_t MASK0;                   /*!< Offset: 0x024 (R/W)  Mask Register 0                           */
-  __IO uint32_t FUNCTION0;               /*!< Offset: 0x028 (R/W)  Function Register 0                       */
-       uint32_t RESERVED0[1];
-  __IO uint32_t COMP1;                   /*!< Offset: 0x030 (R/W)  Comparator Register 1                     */
-  __IO uint32_t MASK1;                   /*!< Offset: 0x034 (R/W)  Mask Register 1                           */
-  __IO uint32_t FUNCTION1;               /*!< Offset: 0x038 (R/W)  Function Register 1                       */
-       uint32_t RESERVED1[1];
-  __IO uint32_t COMP2;                   /*!< Offset: 0x040 (R/W)  Comparator Register 2                     */
-  __IO uint32_t MASK2;                   /*!< Offset: 0x044 (R/W)  Mask Register 2                           */
-  __IO uint32_t FUNCTION2;               /*!< Offset: 0x048 (R/W)  Function Register 2                       */
-       uint32_t RESERVED2[1];
-  __IO uint32_t COMP3;                   /*!< Offset: 0x050 (R/W)  Comparator Register 3                     */
-  __IO uint32_t MASK3;                   /*!< Offset: 0x054 (R/W)  Mask Register 3                           */
-  __IO uint32_t FUNCTION3;               /*!< Offset: 0x058 (R/W)  Function Register 3                       */
+    __IO uint32_t CTRL;                    /*!< Offset: 0x000 (R/W)  Control Register                          */
+    __IO uint32_t CYCCNT;                  /*!< Offset: 0x004 (R/W)  Cycle Count Register                      */
+    __IO uint32_t CPICNT;                  /*!< Offset: 0x008 (R/W)  CPI Count Register                        */
+    __IO uint32_t EXCCNT;                  /*!< Offset: 0x00C (R/W)  Exception Overhead Count Register         */
+    __IO uint32_t SLEEPCNT;                /*!< Offset: 0x010 (R/W)  Sleep Count Register                      */
+    __IO uint32_t LSUCNT;                  /*!< Offset: 0x014 (R/W)  LSU Count Register                        */
+    __IO uint32_t FOLDCNT;                 /*!< Offset: 0x018 (R/W)  Folded-instruction Count Register         */
+    __I  uint32_t PCSR;                    /*!< Offset: 0x01C (R/ )  Program Counter Sample Register           */
+    __IO uint32_t COMP0;                   /*!< Offset: 0x020 (R/W)  Comparator Register 0                     */
+    __IO uint32_t MASK0;                   /*!< Offset: 0x024 (R/W)  Mask Register 0                           */
+    __IO uint32_t FUNCTION0;               /*!< Offset: 0x028 (R/W)  Function Register 0                       */
+          uint32_t RESERVED0[1];
+    __IO uint32_t COMP1;                   /*!< Offset: 0x030 (R/W)  Comparator Register 1                     */
+    __IO uint32_t MASK1;                   /*!< Offset: 0x034 (R/W)  Mask Register 1                           */
+    __IO uint32_t FUNCTION1;               /*!< Offset: 0x038 (R/W)  Function Register 1                       */
+          uint32_t RESERVED1[1];
+    __IO uint32_t COMP2;                   /*!< Offset: 0x040 (R/W)  Comparator Register 2                     */
+    __IO uint32_t MASK2;                   /*!< Offset: 0x044 (R/W)  Mask Register 2                           */
+    __IO uint32_t FUNCTION2;               /*!< Offset: 0x048 (R/W)  Function Register 2                       */
+          uint32_t RESERVED2[1];
+    __IO uint32_t COMP3;                   /*!< Offset: 0x050 (R/W)  Comparator Register 3                     */
+    __IO uint32_t MASK3;                   /*!< Offset: 0x054 (R/W)  Mask Register 3                           */
+    __IO uint32_t FUNCTION3;               /*!< Offset: 0x058 (R/W)  Function Register 3                       */
 } DWT_Type;
 
 
diff --git a/platforms/arm/stm32/fastpin_arm_stm32.h b/platforms/arm/stm32/fastpin_arm_stm32.h
index 274d0f60da..bc69912c38 100644
--- a/platforms/arm/stm32/fastpin_arm_stm32.h
+++ b/platforms/arm/stm32/fastpin_arm_stm32.h
@@ -16,55 +16,56 @@ FASTLED_NAMESPACE_BEGIN
 /// The registers are data output, set output, clear output, toggle output, input, and direction
 
 template<uint8_t PIN, uint8_t _BIT, uint32_t _MASK, typename _GPIO> class _ARMPIN {
+
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
-
-  #if 0
-  inline static void setOutput() {
-    if(_BIT<8) {
-      _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
-    } else {
-      _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
+
+    #if 0
+    inline static void setOutput() {
+        if(_BIT<8) {
+            _CRL::r() = (_CRL::r() & (0xF << (_BIT*4)) | (0x1 << (_BIT*4));
+        } else {
+            _CRH::r() = (_CRH::r() & (0xF << ((_BIT-8)*4))) | (0x1 << ((_BIT-8)*4));
+        }
     }
-  }
-  inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
-  #endif
+    inline static void setInput() { /* TODO */ } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+    #endif
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
-  inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
+    inline static void setOutput() { pinMode(PIN, OUTPUT); } // TODO: perform MUX config { _PDDR::r() |= _MASK; }
+    inline static void setInput() { pinMode(PIN, INPUT); } // TODO: preform MUX config { _PDDR::r() &= ~_MASK; }
 
-  inline static void hi() __attribute__ ((always_inline)) { _GPIO::r()->BSRR = _MASK; }
-  inline static void lo() __attribute__ ((always_inline)) { _GPIO::r()->BRR = _MASK; }
-  // inline static void lo() __attribute__ ((always_inline)) { _GPIO::r()->BSRR = (_MASK<<16); }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { _GPIO::r()->ODR = val; }
+    inline static void hi() __attribute__ ((always_inline)) { _GPIO::r()->BSRR = _MASK; }
+    inline static void lo() __attribute__ ((always_inline)) { _GPIO::r()->BRR = _MASK; }
+    // inline static void lo() __attribute__ ((always_inline)) { _GPIO::r()->BSRR = (_MASK<<16); }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { _GPIO::r()->ODR = val; }
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if(_GPIO::r()->ODR & _MASK) { lo(); } else { hi(); } }
+    inline static void toggle() __attribute__ ((always_inline)) { if(_GPIO::r()->ODR & _MASK) { lo(); } else { hi(); } }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { return _GPIO::r()->ODR | _MASK; }
-  inline static port_t loval() __attribute__ ((always_inline)) { return _GPIO::r()->ODR & ~_MASK; }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_GPIO::r()->ODR; }
-  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO::r()->BSRR; }
-  inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO::r()->BRR; }
-  inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
+    inline static port_t hival() __attribute__ ((always_inline)) { return _GPIO::r()->ODR | _MASK; }
+    inline static port_t loval() __attribute__ ((always_inline)) { return _GPIO::r()->ODR & ~_MASK; }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { return &_GPIO::r()->ODR; }
+    inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPIO::r()->BSRR; }
+    inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPIO::r()->BRR; }
+    inline static port_t mask() __attribute__ ((always_inline)) { return _MASK; }
 };
 
 #if defined(STM32F10X_MD)
-  #define _R(T) struct __gen_struct_ ## T
-  #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile GPIO_TypeDef * r() { return T; } };
-  #define _FL_IO(L,C) _RD32(GPIO ## L);  _FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline volatile GPIO_TypeDef * r() { return T; } };
+#define _FL_IO(L,C) _RD32(GPIO ## L);  _FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
 #elif defined(__STM32F1__)
-  #define _R(T) struct __gen_struct_ ## T
-  #define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline gpio_reg_map* r() { return T->regs; } };
-  #define _FL_IO(L,C) _RD32(GPIO ## L); _FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
+#define _R(T) struct __gen_struct_ ## T
+#define _RD32(T) struct __gen_struct_ ## T { static __attribute__((always_inline)) inline gpio_reg_map* r() { return T->regs; } };
+#define _FL_IO(L,C) _RD32(GPIO ## L); _FL_DEFINE_PORT3(L, C, _R(GPIO ## L));
 #else
- #error "Platform not supported"
+#error "Platform not supported"
 #endif
 
 #define _FL_DEFPIN(PIN, BIT, L) template<> class FastPin<PIN> : public _ARMPIN<PIN, BIT, 1 << BIT, _R(GPIO ## L)> {};
@@ -94,8 +95,6 @@ _FL_IO(G,6);
 // Actual pin definitions
 #if defined(SPARK) // Sparkfun STM32F103 based board
 
-
-
 #define MAX_PIN 19
 _FL_DEFPIN(0, 7, B);
 _FL_DEFPIN(1, 6, B);
@@ -118,7 +117,6 @@ _FL_DEFPIN(17, 1, B);
 _FL_DEFPIN(18, 3, A);
 _FL_DEFPIN(19, 2, A);
 
-
 #define SPI_DATA 15
 #define SPI_CLOCK 13
 
diff --git a/platforms/arm/stm32/led_sysdefs_arm_stm32.h b/platforms/arm/stm32/led_sysdefs_arm_stm32.h
index 6b9ce7cad5..afcf178535 100644
--- a/platforms/arm/stm32/led_sysdefs_arm_stm32.h
+++ b/platforms/arm/stm32/led_sysdefs_arm_stm32.h
@@ -3,25 +3,25 @@
 
 #if defined(STM32F10X_MD)
 
- #include <application.h>
+#include <application.h>
 
- #define FASTLED_NAMESPACE_BEGIN namespace NSFastLED {
- #define FASTLED_NAMESPACE_END }
- #define FASTLED_USING_NAMESPACE using namespace NSFastLED;
+#define FASTLED_NAMESPACE_BEGIN namespace NSFastLED {
+#define FASTLED_NAMESPACE_END }
+#define FASTLED_USING_NAMESPACE using namespace NSFastLED;
 
- // reusing/abusing cli/sei defs for due
- #define cli()  __disable_irq(); __disable_fault_irq();
- #define sei() __enable_irq(); __enable_fault_irq();
+// reusing/abusing cli/sei defs for due
+#define cli()  __disable_irq(); __disable_fault_irq();
+#define sei() __enable_irq(); __enable_fault_irq();
 
 #elif defined (__STM32F1__)
 
- #include "cm3_regs.h"
+#include "cm3_regs.h"
 
- #define cli() nvic_globalirq_disable()
- #define sei() nvic_globalirq_enable()
+#define cli() nvic_globalirq_disable()
+#define sei() nvic_globalirq_enable()
 
 #else
- #error "Platform not supported"
+#error "Platform not supported"
 #endif
 
 #define FASTLED_ARM
@@ -56,6 +56,6 @@ typedef volatile       uint8_t RwReg; /**< Read-Write 8-bit register (volatile u
 #define FASTLED_NO_PINMAP
 
 #ifndef F_CPU
- #define F_CPU 72000000
+#define F_CPU 72000000
 #endif
 #endif
diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index 824553feff..b6ff96b936 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -97,6 +97,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	typedef typename FastPin<DATA_PIN>::port_t data_t;
 
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
@@ -105,7 +106,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
 
 		mWait.wait();
diff --git a/platforms/avr/fastspi_avr.h b/platforms/avr/fastspi_avr.h
index d2edc9660c..245e40654b 100644
--- a/platforms/avr/fastspi_avr.h
+++ b/platforms/avr/fastspi_avr.h
@@ -187,7 +187,6 @@ class AVRUSART0SPIOutput {
 		FastPin<_CLOCK_PIN>::setOutput();
 		FastPin<_DATA_PIN>::setOutput();
 
-
 		// must be done last, see page 206
 		setSPIRate();
 	}
@@ -249,12 +248,12 @@ class AVRUSART0SPIOutput {
 		setSPIRate();
 	}
 
-		void release() {
-			if(m_pSelect != NULL) {
-				m_pSelect->release();
-			}
-			disable_pins();
+	void release() {
+		if(m_pSelect != NULL) {
+			m_pSelect->release();
 		}
+		disable_pins();
+	}
 
 	static void writeBytesValueRaw(uint8_t value, int len) {
 		while(len--) {
@@ -333,6 +332,7 @@ template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRHardwareSPIOutput {
 	Selectable *m_pSelect;
 	bool mWait;
+
 public:
 	AVRHardwareSPIOutput() { m_pSelect = NULL; mWait = false;}
 	AVRHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
@@ -510,6 +510,7 @@ template <uint8_t _DATA_PIN, uint8_t _CLOCK_PIN, uint32_t _SPI_CLOCK_DIVIDER>
 class AVRHardwareSPIOutput {
 	Selectable *m_pSelect;
 	bool mWait;
+
 public:
 	AVRHardwareSPIOutput() { m_pSelect = NULL; mWait = false;}
 	AVRHardwareSPIOutput(Selectable *pSelect) { m_pSelect = pSelect; }
diff --git a/platforms/esp/32/clockless_block_esp32.h b/platforms/esp/32/clockless_block_esp32.h
index 8ab5807af8..41f44be2b6 100644
--- a/platforms/esp/32/clockless_block_esp32.h
+++ b/platforms/esp/32/clockless_block_esp32.h
@@ -24,6 +24,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
     data_t mPinMask;
     data_ptr_t mPort;
     CMinWait<WAIT_TIME> mWait;
+
 public:
     virtual int size() { return CLEDController::size() * LANES; }
 
diff --git a/platforms/esp/32/clockless_esp32.h.orig b/platforms/esp/32/clockless_esp32.h.orig
deleted file mode 100644
index e0cd00dae9..0000000000
--- a/platforms/esp/32/clockless_esp32.h.orig
+++ /dev/null
@@ -1,786 +0,0 @@
-/*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Modifications Copyright (c) 2018 Samuel Z. Guyer
- *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently to send sequences
- * of high/low bits. Memory for each channel is limited, however, so
- * in order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * Since the RMT device only has 8 channels, we need a strategy to
- * allow more than 8 LED controllers. Our driver assigns controllers
- * to channels on the fly, queuing up controllers as necessary until a
- * channel is free. The main showPixels routine just fires off the
- * first 8 controllers; the interrupt handler starts new controllers
- * asynchronously as previous ones finish. So, for example, it can
- * send the data for 8 controllers simultaneously, but 16 controllers
- * would take approximately twice as much time.
- *
- * There is a #define that allows a program to control the total
- * number of channels that the driver is allowed to use. It defaults
- * to 8 -- use all the channels. Setting it to 1, for example, results
- * in fully serial output:
- *
- *     #define FASTLED_RMT_MAX_CHANNELS 1
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT interrupt
- * handler, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
- *
- * There may be a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- */
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp32-hal.h"
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "driver/rmt.h"
-#include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
-#include "esp_log.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_BUILTIN_DRIVER
-#define FASTLED_RMT_BUILTIN_DRIVER false
-#endif
-
-// -- Max number of controllers we can support
-#ifndef FASTLED_RMT_MAX_CONTROLLERS
-#define FASTLED_RMT_MAX_CONTROLLERS 32
-#endif
-
-// -- Number of RMT channels to use (up to 8)
-//    Redefine this value to 1 to force serial output
-#ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
-#endif
-
-// -- Array of all controllers
-static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-static bool gInitialized = false;
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t  mRMT_channel;
-
-    // -- Store the GPIO pin
-    gpio_num_t     mPin;
-<<<<<<< HEAD
-
-    // -- This instantiation forces a check on the pin choice
-    FastPin<DATA_PIN> mFastPin;
-
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
-=======
-
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
->>>>>>> upstream/master
-    // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels = NULL;
-    void *         mPixelSpace = NULL;
-    uint8_t        mRGB_channel;
-    uint16_t       mCurPulse;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t       mBufferSize;
-
-public:
-
-    virtual void init()
-    {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-<<<<<<< HEAD
-        gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-        mPin = gpio_num_t(DATA_PIN);
-=======
-	gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-	mPin = gpio_num_t(DATA_PIN);
->>>>>>> upstream/master
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    void initRMT()
-    {
-<<<<<<< HEAD
-        // -- Only need to do this once
-        if (gInitialized) return;
-
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-            gOnChannel[i] = NULL;
-
-            // -- RMT configuration for transmission
-            rmt_config_t rmt_tx;
-            rmt_tx.channel = rmt_channel_t(i);
-            rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-            rmt_tx.mem_block_num = 1;
-            rmt_tx.clk_div = DIVIDER;
-            rmt_tx.tx_config.loop_en = false;
-            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-            rmt_tx.tx_config.carrier_en = false;
-            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-            rmt_tx.tx_config.idle_output_en = true;
-                
-            // -- Apply the configuration
-            rmt_config(&rmt_tx);
-
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                rmt_driver_install(rmt_channel_t(i), 0, 0);
-            } else {
-                // -- Set up the RMT to send 1/2 of the pulse buffer and then
-                //    generate an interrupt. When we get this interrupt we
-                //    fill the other half in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-            }
-        }
-
-        // -- Create a semaphore to block execution until all the controllers are done
-        if (gTX_sem == NULL) {
-            gTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(gTX_sem);
-        }
-                
-        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-        }
-
-        gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-        if (gNumStarted == 0) {
-            // -- First controller: make sure everything is set up
-            initRMT();
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-        }
-
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-
-        if (mPixels != NULL) delete mPixels;
-        mPixels = new PixelController<RGB_ORDER>(pixels);
-        
-        // -- Keep track of the number of strips we've seen
-        gNumStarted++;
-
-        // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual worl
-        if (gNumStarted == gNumControllers) {
-            gNext = 0;
-
-            // -- First, fill all the available channels
-            int channel = 0;
-            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                startNext(channel);
-                channel++;
-            }
-
-            // -- Wait here while the rest of the data is sent. The interrupt handler
-            //    will keep refilling the RMT buffers until it is all sent; then it
-            //    gives the semaphore back.
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-            xSemaphoreGive(gTX_sem);
-
-            // -- Reset the counters
-            gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
-        }
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-        if (gNext < gNumControllers) {
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-            pController->startOnChannel(channel);
-            gNext++;
-        }
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-        // -- Assign this channel and configure the RMT
-        mRMT_channel = rmt_channel_t(channel);
-
-        // -- Store a reference to this controller, so we can get it
-        //    inside the interrupt handler
-        gOnChannel[channel] = this;
-
-        // -- Assign the pin to this channel
-        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Use the built-in RMT driver to send all the data in one shot
-            rmt_register_tx_end_callback(doneOnChannel, 0);
-            writeAllRMTItems();
-        } else {
-            // -- Use our custom driver to send the data incrementally
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        
-            // -- Initialize the counters that keep track of where we are in
-            //    the pixel data.
-            mCurPulse = 0;
-            mRGB_channel = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-            
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-        }
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        // -- Turn off output on the pin
-        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-        gOnChannel[channel] = NULL;
-        gNumDone++;
-
-        if (gNumDone == gNumControllers) {
-            // -- If this is the last controller, signal that we are all done
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-        } else {
-            // -- Otherwise, if there are still controllers waiting, then
-            //    start the next one on this channel
-            if (gNext < gNumControllers)
-                startNext(channel);
-        }
-=======
-	// -- Only need to do this once
-	if (gInitialized) return;
-
-	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-	    gOnChannel[i] = NULL;
-
-	    // -- RMT configuration for transmission
-	    rmt_config_t rmt_tx;
-	    rmt_tx.channel = rmt_channel_t(i);
-	    rmt_tx.rmt_mode = RMT_MODE_TX;
-	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-	    rmt_tx.mem_block_num = 1;
-	    rmt_tx.clk_div = DIVIDER;
-	    rmt_tx.tx_config.loop_en = false;
-	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-	    rmt_tx.tx_config.carrier_en = false;
-	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-	    rmt_tx.tx_config.idle_output_en = true;
-		
-	    // -- Apply the configuration
-	    rmt_config(&rmt_tx);
-
-	    if (FASTLED_RMT_BUILTIN_DRIVER) {
-		rmt_driver_install(rmt_channel_t(i), 0, 0);
-	    } else {
-		// -- Set up the RMT to send 1/2 of the pulse buffer and then
-		//    generate an interrupt. When we get this interrupt we
-		//    fill the other half in preparation (kind of like double-buffering)
-		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-	    }
-	}
-
-	// -- Create a semaphore to block execution until all the controllers are done
-	if (gTX_sem == NULL) {
-	    gTX_sem = xSemaphoreCreateBinary();
-	    xSemaphoreGive(gTX_sem);
-	}
-		
-	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Allocate the interrupt if we have not done so yet. This
-	    //    interrupt handler must work for all different kinds of
-	    //    strips, so it delegates to the refill function for each
-	    //    specific instantiation of ClocklessController.
-	    if (gRMT_intr_handle == NULL)
-		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-	}
-
-	gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-	if (gNumStarted == 0) {
-	    // -- First controller: make sure everything is set up
-	    initRMT();
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	}
-
-	// -- Initialize the local state, save a pointer to the pixel
-	//    data. We need to make a copy because pixels is a local
-	//    variable in the calling function, and this data structure
-	//    needs to outlive this call to showPixels.
-
-	if (mPixels != NULL) delete mPixels;
-	mPixels = new PixelController<RGB_ORDER>(pixels);
-	
-	// -- Keep track of the number of strips we've seen
-	gNumStarted++;
-
-	// -- The last call to showPixels is the one responsible for doing
-	//    all of the actual worl
-	if (gNumStarted == gNumControllers) {
-	    gNext = 0;
-
-	    // -- First, fill all the available channels
-	    int channel = 0;
-	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-		startNext(channel);
-		channel++;
-	    }
-
-	    // -- Wait here while the rest of the data is sent. The interrupt handler
-	    //    will keep refilling the RMT buffers until it is all sent; then it
-	    //    gives the semaphore back.
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	    xSemaphoreGive(gTX_sem);
-
-	    // -- Reset the counters
-	    gNumStarted = 0;
-	    gNumDone = 0;
-	    gNext = 0;
-	}
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-	if (gNext < gNumControllers) {
-	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-	    pController->startOnChannel(channel);
-	    gNext++;
-	}
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-	// -- Assign this channel and configure the RMT
-	mRMT_channel = rmt_channel_t(channel);
-
-	// -- Store a reference to this controller, so we can get it
-	//    inside the interrupt handler
-	gOnChannel[channel] = this;
-
-	// -- Assign the pin to this channel
-	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-	if (FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Use the built-in RMT driver to send all the data in one shot
-	    rmt_register_tx_end_callback(doneOnChannel, 0);
-	    writeAllRMTItems();
-	} else {
-	    // -- Use our custom driver to send the data incrementally
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	
-	    // -- Initialize the counters that keep track of where we are in
-	    //    the pixel data.
-	    mCurPulse = 0;
-	    mRGB_channel = 0;
-
-	    // -- Fill both halves of the buffer
-	    fillHalfRMTBuffer();
-	    fillHalfRMTBuffer();
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	    
-	    // -- Start the RMT TX operation
-	    rmt_tx_start(mRMT_channel, true);
-	}
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-	// -- Turn off output on the pin
-	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-	gOnChannel[channel] = NULL;
-	gNumDone++;
-
-	if (gNumDone == gNumControllers) {
-	    // -- If this is the last controller, signal that we are all done
-	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-	} else {
-	    // -- Otherwise, if there are still controllers waiting, then
-	    //    start the next one on this channel
-	    if (gNext < gNumControllers)
-		startNext(channel);
-	}
->>>>>>> upstream/master
-    }
-    
-    static IRAM_ATTR void interruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gOnChannel[channel] != NULL) {
-
-<<<<<<< HEAD
-                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-                // -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    controller->fillHalfRMTBuffer();
-                }
-
-                // -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    doneOnChannel(rmt_channel_t(channel), 0);
-=======
-		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-		// -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-		    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-		    controller->fillHalfRMTBuffer();
-                }
-
-		// -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-		    doneOnChannel(rmt_channel_t(channel), 0);
->>>>>>> upstream/master
-                }
-            }
-        }
-    }
-
-    virtual void fillHalfRMTBuffer()
-    {
-        // -- Fill half of the RMT pulse buffer
-
-        //    The buffer holds 64 total pulse items, so this loop converts
-        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
-        //    32 items). In our case, each pixel consists of three bytes,
-        //    each bit turns into one pulse item -- 24 items per pixel. So,
-        //    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-        //    The member variable mCurPulse keeps track of which of the 64
-        //    items we are writing. During the first call to this method it
-        //    fills 0-31; in the second call it fills 32-63, and then wraps
-        //    back around to zero.
-
-        //    When we run out of pixel data, just fill the remaining items
-        //    with zero pulses.
-
-        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
-        uint32_t byteval = 0;
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-        bool done_strip = false;
-
-        while (pulse_count < MAX_PULSES) {
-            if (! mPixels->has(1)) {
-<<<<<<< HEAD
-                if (mCurPulse > 0) {
-                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
-                    //    sure if this is really necessary.
-                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-                }
-=======
->>>>>>> upstream/master
-                done_strip = true;
-                break;
-            }
-
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulse_count++;
-            }
-<<<<<<< HEAD
-=======
-
-	    if (done_strip)
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
->>>>>>> upstream/master
-        }
-        
-        if (done_strip) {
-            // -- And fill the remaining items with zero pulses. The zero values triggers
-            //    the tx_done interrupt.
-            while (pulse_count < MAX_PULSES) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-
-    virtual void writeAllRMTItems()
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-<<<<<<< HEAD
-        mBufferSize = mPixels->size() * 3 * 8;
-=======
-	mBufferSize = mPixels->size() * 3 * 8;
->>>>>>> upstream/master
-
-        // TODO: need a specific number here
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        mCurPulse = 0;
-        mRGB_channel = 0;
-        uint32_t byteval = 0;
-        while (mPixels->has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-
-<<<<<<< HEAD
-        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-=======
-	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
->>>>>>> upstream/master
-    }
-};
-
-FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
index 6d3241d9b6..a82e43a6cf 100644
--- a/platforms/esp/32/clockless_i2s_esp32.h
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -171,8 +171,8 @@ static DMABuffer * dmaBuffers[NUM_DMA_BUFFERS];
 //    are global variables.
 
 static int      gPulsesPerBit = 0;
-static uint32_t gOneBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-static uint32_t gZeroBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static uint32_t gOneBit[40]  = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static uint32_t gZeroBit[40] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
 // -- Counters to track progress
 static int gCurBuffer = 0;
@@ -202,8 +202,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- Make sure we can't call show() too quickly
     CMinWait<50>   mWait;
 
- public:
-
+public:
     void init()
     {
         i2sInit();
@@ -233,7 +232,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     virtual uint16_t getMaxRefreshRate() const { return 400; }
     
 protected:
-   
    static int pgcd(int smallest,int precision,int a,int b,int c)
     {
         int pgc_=1;
@@ -393,8 +391,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
         //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
         ones_for_zero =T1/pgc_  ;
-       // Serial.print("Zero bit:  target ");
-       // Serial.print(T1ns); Serial.print("ns --- ");
+        // Serial.print("Zero bit:  target ");
+        // Serial.print(T1ns); Serial.print("ns --- ");
         //Serial.print(ones_for_zero); Serial.print(" 1 bits");
         //Serial.print(" = "); Serial.print(ones_for_zero * FASTLED_I2S_NS_PER_PULSE); Serial.println("ns");
         // Serial.printf("Zero bit : target %d ns --- %d pulses  1 bit =   %f ns\n",T1ns,ones_for_zero ,ones_for_zero*pulseduration);
@@ -664,11 +662,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
                 uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
                 
-               /* SZG: More general, but too slow:
-                    for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
-                        buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
-                     }
-               */
+                /* SZG: More general, but too slow:
+                for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
+                    buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
+                }
+                */
 
                 // -- Only fill in the pulses that are different between the "0" and "1" encodings
                 for(int pulse_num = ones_for_zero; pulse_num < ones_for_one; pulse_num++) {
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index bf4dd142c1..82a1b3b61a 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -214,7 +214,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     CMinWait<50>   mWait;
 
 public:
-
     void init()
     {
         // -- Allocate space to save the pixel controller
@@ -246,7 +245,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
     void initRMT()
     {
         for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
diff --git a/platforms/esp/32/fastpin_esp32.h b/platforms/esp/32/fastpin_esp32.h
index d54d7fee67..7876b281f9 100644
--- a/platforms/esp/32/fastpin_esp32.h
+++ b/platforms/esp/32/fastpin_esp32.h
@@ -3,7 +3,6 @@
 FASTLED_NAMESPACE_BEGIN
 
 template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
-
 public:
   typedef volatile uint32_t * port_ptr_t;
   typedef uint32_t port_t;
diff --git a/platforms/esp/8266/clockless_block_esp8266.h b/platforms/esp/8266/clockless_block_esp8266.h
index 40c91612f7..d3b1cf9595 100644
--- a/platforms/esp/8266/clockless_block_esp8266.h
+++ b/platforms/esp/8266/clockless_block_esp8266.h
@@ -23,6 +23,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	typedef typename FastPin<FIRST_PIN>::port_t data_t;
 
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual int size() { return CLEDController::size() * LANES; }
 
@@ -31,13 +32,13 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		/*uint32_t clocks = */
 		int cnt=FASTLED_INTERRUPT_RETRY_COUNT;
 		while(!showRGBInternal(pixels) && cnt--) {
-      os_intr_unlock();
+      		os_intr_unlock();
 			#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 			_retry_cnt++;
 			#endif
-      delayMicroseconds(WAIT_TIME * 10);
-      os_intr_lock();
-    }
+			delayMicroseconds(WAIT_TIME * 10);
+			os_intr_lock();
+		}
 		// #if FASTLED_ALLOW_INTTERUPTS == 0
 		// Adjust the timer
 		// long microsTaken = CLKS_TO_MICROS(clocks);
@@ -47,19 +48,19 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		// mWait.mark();
 	}
 
-  template<int PIN> static void initPin() {
-			_ESPPIN<PIN, 1<<(PIN & 0xFF)>::setOutput();
-  }
+	template<int PIN> static void initPin() {
+		_ESPPIN<PIN, 1<<(PIN & 0xFF)>::setOutput();
+	}
 
-  virtual void init() {
+	virtual void init() {
 		void (* funcs[])() ={initPin<12>, initPin<13>, initPin<14>, initPin<15>, initPin<4>, initPin<5>};
 
 		for (uint8_t i = 0; i < USED_LANES; ++i) {
 			funcs[i]();
 		}
-  }
+	}
 
-  virtual uint16_t getMaxRefreshRate() const { return 400; }
+	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 	typedef union {
 		uint8_t bytes[8];
@@ -69,8 +70,8 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 
 #define ESP_ADJUST 0 // (2*(F_CPU/24000000))
 #define ESP_ADJUST2 0
-  template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
-	  Lines b2 = b;
+  	template<int BITS,int PX> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register Lines & b, PixelController<RGB_ORDER, LANES, PORT_MASK> &pixels) { // , register uint32_t & b2)  {
+	  	Lines b2 = b;
 		transpose8x1_noinline(b.bytes,b2.bytes);
 
 		register uint8_t d = pixels.template getd<PX>(pixels);
@@ -105,9 +106,9 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		}
 	}
 
-  // This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+  	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
 	// gcc will use register Y for the this pointer.
-		static uint32_t ICACHE_RAM_ATTR showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
+	static uint32_t ICACHE_RAM_ATTR showRGBInternal(PixelController<RGB_ORDER, LANES, PORT_MASK> &allpixels) {
 
 		// Setup the pixel controller and load/scale the first byte
 		Lines b0;
@@ -132,26 +133,26 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 			// Write third byte
 			writeBits<8+XTRA0,0>(last_mark, b0, allpixels);
 
-      #if (FASTLED_ALLOW_INTERRUPTS == 1)
+		#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			os_intr_unlock();
-			#endif
+		#endif
 
 			allpixels.stepDithering();
 
-			#if (FASTLED_ALLOW_INTERRUPTS == 1)
-      os_intr_lock();
+		#if (FASTLED_ALLOW_INTERRUPTS == 1)
+			os_intr_lock();
 			// if interrupts took longer than 45µs, punt on the current frame
 			if((int32_t)(__clock_cycles()-last_mark) > 0) {
 				if((int32_t)(__clock_cycles()-last_mark) > (T1+T2+T3+((WAIT_TIME-INTERRUPT_THRESHOLD)*CLKS_PER_US))) { os_intr_unlock(); return 0; }
 			}
-			#endif
+		#endif
 		};
 
-    os_intr_unlock();
+		os_intr_unlock();
 		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
 		_frame_cnt++;
 		#endif
-    return __clock_cycles() - _start;
+		return __clock_cycles() - _start;
 	}
 };
 
diff --git a/platforms/esp/8266/clockless_esp8266.h b/platforms/esp/8266/clockless_esp8266.h
index 83d05b3f07..504b9f963a 100644
--- a/platforms/esp/8266/clockless_esp8266.h
+++ b/platforms/esp/8266/clockless_esp8266.h
@@ -9,9 +9,9 @@ extern uint32_t _retry_cnt;
 
 // Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
 __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
+	uint32_t cyc;
+	__asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+	return cyc;
 }
 
 #define FASTLED_HAS_CLOCKLESS 1
@@ -24,6 +24,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
+
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
@@ -34,37 +35,36 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
-
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-    // mWait.wait();
-		int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
-    while((showRGBInternal(pixels)==0) && cnt--) {
-      #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-      _retry_cnt++;
-      #endif
-      os_intr_unlock();
-      delayMicroseconds(WAIT_TIME);
-      os_intr_lock();
-    }
-    // mWait.mark();
+	// mWait.wait();
+	int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
+	while((showRGBInternal(pixels)==0) && cnt--) {
+		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+		_retry_cnt++;
+		#endif
+		os_intr_unlock();
+		delayMicroseconds(WAIT_TIME);
+		os_intr_lock();
+	}
+	// mWait.mark();
   }
 
 #define _ESP_ADJ (0)
 #define _ESP_ADJ2 (0)
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b)  {
-    b <<= 24; b = ~b;
-    for(register uint32_t i = BITS; i > 0; i--) {
-      while((__clock_cycles() - last_mark) < (T1+T2+T3));
+		b <<= 24; b = ~b;
+		for(register uint32_t i = BITS; i > 0; i--) {
+			while((__clock_cycles() - last_mark) < (T1+T2+T3));
 			last_mark = __clock_cycles();
-      FastPin<DATA_PIN>::hi();
+			FastPin<DATA_PIN>::hi();
 
-      while((__clock_cycles() - last_mark) < T1);
-      if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
-      b <<= 1;
+			while((__clock_cycles() - last_mark) < T1);
+			if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
+			b <<= 1;
 
-      while((__clock_cycles() - last_mark) < (T1+T2));
-      FastPin<DATA_PIN>::lo();
+			while((__clock_cycles() - last_mark) < (T1+T2));
+			FastPin<DATA_PIN>::lo();
 		}
 	}
 
@@ -74,9 +74,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		// Setup the pixel controller and load/scale the first byte
 		pixels.preStepFirstByteDithering();
 		register uint32_t b = pixels.loadAndScale0();
-    pixels.preStepFirstByteDithering();
+    	pixels.preStepFirstByteDithering();
 		os_intr_lock();
-    uint32_t start = __clock_cycles();
+    	uint32_t start = __clock_cycles();
 		uint32_t last_mark = start;
 		while(pixels.has(1)) {
 			// Write first byte, read next byte
@@ -89,13 +89,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 			// Write third byte, read 1st byte of next pixel
 			writeBits<8+XTRA0>(last_mark, b);
-      b = pixels.advanceAndLoadAndScale0();
+      		b = pixels.advanceAndLoadAndScale0();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			os_intr_unlock();
 			#endif
 
-      pixels.stepDithering();
+      		pixels.stepDithering();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			os_intr_lock();
@@ -107,9 +107,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		};
 
 		os_intr_unlock();
-    #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-    _frame_cnt++;
-    #endif
+		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+		_frame_cnt++;
+		#endif
 		return __clock_cycles() - start;
 	}
 };
diff --git a/platforms/esp/8266/fastpin_esp8266.h b/platforms/esp/8266/fastpin_esp8266.h
index 1ce7934b04..4d6cbaafc1 100644
--- a/platforms/esp/8266/fastpin_esp8266.h
+++ b/platforms/esp/8266/fastpin_esp8266.h
@@ -3,43 +3,42 @@
 FASTLED_NAMESPACE_BEGIN
 
 struct FASTLED_ESP_IO {
-  volatile uint32_t _GPO;
-  volatile uint32_t _GPOS;
-  volatile uint32_t _GPOC;
+    volatile uint32_t _GPO;
+    volatile uint32_t _GPOS;
+    volatile uint32_t _GPOC;
 };
 
 #define _GPB (*(FASTLED_ESP_IO*)(0x60000000+(0x300)))
 
 
 template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
-
 public:
-  typedef volatile uint32_t * port_ptr_t;
-  typedef uint32_t port_t;
+    typedef volatile uint32_t * port_ptr_t;
+    typedef uint32_t port_t;
 
-  inline static void setOutput() { pinMode(PIN, OUTPUT); }
-  inline static void setInput() { pinMode(PIN, INPUT); }
+    inline static void setOutput() { pinMode(PIN, OUTPUT); }
+    inline static void setInput() { pinMode(PIN, INPUT); }
 
-  inline static void hi() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOS = MASK; } else { GP16O |= MASK; } }
-  inline static void lo() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOC = MASK; } else { GP16O &= ~MASK; } }
-  inline static void set(register port_t val) __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO = val; } else { GP16O = val; }}
+    inline static void hi() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOS = MASK; } else { GP16O |= MASK; } }
+    inline static void lo() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOC = MASK; } else { GP16O &= ~MASK; } }
+    inline static void set(register port_t val) __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO = val; } else { GP16O = val; }}
 
-  inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
+    inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }
 
-  inline static void toggle() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO ^= MASK; } else { GP16O ^= MASK; } }
+    inline static void toggle() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO ^= MASK; } else { GP16O ^= MASK; } }
 
-  inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
-  inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
-  inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
+    inline static void hi(register port_ptr_t port) __attribute__ ((always_inline)) { hi(); }
+    inline static void lo(register port_ptr_t port) __attribute__ ((always_inline)) { lo(); }
+    inline static void fastset(register port_ptr_t port, register port_t val) __attribute__ ((always_inline)) { *port = val; }
 
-  inline static port_t hival() __attribute__ ((always_inline)) { if (PIN<16) { return GPO | MASK;  } else { return GP16O | MASK; } }
-  inline static port_t loval() __attribute__ ((always_inline)) { if (PIN<16) { return GPO & ~MASK; } else { return GP16O & ~MASK; } }
-  inline static port_ptr_t port() __attribute__ ((always_inline)) { if(PIN<16) { return &_GPB._GPO; } else { return &GP16O; } }
-  inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPB._GPOS; } // there is no GP160 support for this
-	inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPB._GPOC; }
-  inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
+    inline static port_t hival() __attribute__ ((always_inline)) { if (PIN<16) { return GPO | MASK;  } else { return GP16O | MASK; } }
+    inline static port_t loval() __attribute__ ((always_inline)) { if (PIN<16) { return GPO & ~MASK; } else { return GP16O & ~MASK; } }
+    inline static port_ptr_t port() __attribute__ ((always_inline)) { if(PIN<16) { return &_GPB._GPO; } else { return &GP16O; } }
+    inline static port_ptr_t sport() __attribute__ ((always_inline)) { return &_GPB._GPOS; } // there is no GP160 support for this
+    inline static port_ptr_t cport() __attribute__ ((always_inline)) { return &_GPB._GPOC; }
+    inline static port_t mask() __attribute__ ((always_inline)) { return MASK; }
 
-  inline static bool isset() __attribute__ ((always_inline)) { return (PIN < 16) ? (GPO & MASK) : (GP16O & MASK); }
+    inline static bool isset() __attribute__ ((always_inline)) { return (PIN < 16) ? (GPO & MASK) : (GP16O & MASK); }
 };
 
 #define _FL_DEFPIN(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
diff --git a/power_mgt.cpp b/power_mgt.cpp
index 8e46d93f8d..33b4cece1c 100644
--- a/power_mgt.cpp
+++ b/power_mgt.cpp
@@ -88,7 +88,7 @@ uint8_t calculate_max_brightness_for_power_mW(const CRGB* ledbuffer, uint16_t nu
 
 	uint8_t recommended_brightness = target_brightness;
 	if(requested_power_mW > max_power_mW) { 
-    		recommended_brightness = (uint32_t)((uint8_t)(target_brightness) * (uint32_t)(max_power_mW)) / ((uint32_t)(requested_power_mW));
+        recommended_brightness = (uint32_t)((uint8_t)(target_brightness) * (uint32_t)(max_power_mW)) / ((uint32_t)(requested_power_mW));
 	}
 
 	return recommended_brightness;
@@ -163,23 +163,23 @@ void set_max_power_indicator_LED( uint8_t pinNumber)
 
 void set_max_power_in_volts_and_milliamps( uint8_t volts, uint32_t milliamps)
 {
-  FastLED.setMaxPowerInVoltsAndMilliamps(volts, milliamps);
+    FastLED.setMaxPowerInVoltsAndMilliamps(volts, milliamps);
 }
 
 void set_max_power_in_milliwatts( uint32_t powerInmW)
 {
-  FastLED.setMaxPowerInMilliWatts(powerInmW);
+    FastLED.setMaxPowerInMilliWatts(powerInmW);
 }
 
 void show_at_max_brightness_for_power()
 {
-  // power management usage is now in FastLED.show, no need for this function
-  FastLED.show();
+    // power management usage is now in FastLED.show, no need for this function
+    FastLED.show();
 }
 
 void delay_at_max_brightness_for_power( uint16_t ms)
 {
-  FastLED.delay(ms);
+    FastLED.delay(ms);
 }
 
 FASTLED_NAMESPACE_END
diff --git a/wiring.cpp b/wiring.cpp
index b2af51cd08..e366c64c09 100644
--- a/wiring.cpp
+++ b/wiring.cpp
@@ -17,16 +17,16 @@ volatile unsigned long FastLED_timer0_overflow_count=0;
 volatile unsigned long FastLED_timer0_millis = 0;
 
 LIB8STATIC void  __attribute__((always_inline)) fastinc32 (volatile uint32_t & _long) {
-  uint8_t b = ++((tBytesForLong&)_long).raw[0];
-  if(!b) {
-    b = ++((tBytesForLong&)_long).raw[1];
+    uint8_t b = ++((tBytesForLong&)_long).raw[0];
     if(!b) {
-      b = ++((tBytesForLong&)_long).raw[2];
-      if(!b) {
-        ++((tBytesForLong&)_long).raw[3];
-      }
+        b = ++((tBytesForLong&)_long).raw[1];
+        if(!b) {
+            b = ++((tBytesForLong&)_long).raw[2];
+            if(!b) {
+                ++((tBytesForLong&)_long).raw[3];
+            }
+        }
     }
-  }
 }
 
 #if defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__)
@@ -35,200 +35,200 @@ ISR(TIM0_OVF_vect)
 ISR(TIMER0_OVF_vect)
 #endif
 {
-  fastinc32(FastLED_timer0_overflow_count);
-  // FastLED_timer0_overflow_count++;
+    fastinc32(FastLED_timer0_overflow_count);
+    // FastLED_timer0_overflow_count++;
 }
 
 // there are 1024 microseconds per overflow counter tick.
 unsigned long millis()
 {
-        unsigned long m;
-        uint8_t oldSREG = SREG;
+    unsigned long m;
+    uint8_t oldSREG = SREG;
 
-        // disable interrupts while we read FastLED_timer0_millis or we might get an
-        // inconsistent value (e.g. in the middle of a write to FastLED_timer0_millis)
-        cli();
-        m = FastLED_timer0_overflow_count;  //._long;
-        SREG = oldSREG;
+    // disable interrupts while we read FastLED_timer0_millis or we might get an
+    // inconsistent value (e.g. in the middle of a write to FastLED_timer0_millis)
+    cli();
+    m = FastLED_timer0_overflow_count;  //._long;
+    SREG = oldSREG;
 
-        return (m*(MICROSECONDS_PER_TIMER0_OVERFLOW/8))/(1000/8);
+    return (m*(MICROSECONDS_PER_TIMER0_OVERFLOW/8))/(1000/8);
 }
 
 unsigned long micros() {
-        unsigned long m;
-        uint8_t oldSREG = SREG, t;
+    unsigned long m;
+    uint8_t oldSREG = SREG, t;
 
-        cli();
-        m = FastLED_timer0_overflow_count; // ._long;
+    cli();
+    m = FastLED_timer0_overflow_count; // ._long;
 #if defined(TCNT0)
-        t = TCNT0;
+    t = TCNT0;
 #elif defined(TCNT0L)
-        t = TCNT0L;
+    t = TCNT0L;
 #else
-        #error TIMER 0 not defined
+#error TIMER 0 not defined
 #endif
 
 
 #ifdef TIFR0
-        if ((TIFR0 & _BV(TOV0)) && (t < 255))
-                m++;
+    if ((TIFR0 & _BV(TOV0)) && (t < 255))
+        m++;
 #else
-        if ((TIFR & _BV(TOV0)) && (t < 255))
-                m++;
+    if ((TIFR & _BV(TOV0)) && (t < 255))
+        m++;
 #endif
 
-        SREG = oldSREG;
+    SREG = oldSREG;
 
-        return ((m << 8) + t) * (64 / clockCyclesPerMicrosecond());
+    return ((m << 8) + t) * (64 / clockCyclesPerMicrosecond());
 }
 
 void delay(unsigned long ms)
 {
-        uint16_t start = (uint16_t)micros();
+    uint16_t start = (uint16_t)micros();
 
-        while (ms > 0) {
-                if (((uint16_t)micros() - start) >= 1000) {
-                        ms--;
-                        start += 1000;
-                }
+    while (ms > 0) {
+        if (((uint16_t)micros() - start) >= 1000) {
+            ms--;
+            start += 1000;
         }
+    }
 }
 
 #define sbi(sfr, bit) (_SFR_BYTE(sfr) |= _BV(bit))
 void init()
 {
-  // this needs to be called before setup() or some functions won't
-  // work there
-  sei();
+    // this needs to be called before setup() or some functions won't
+    // work there
+    sei();
 
-  // on the ATmega168, timer 0 is also used for fast hardware pwm
-  // (using phase-correct PWM would mean that timer 0 overflowed half as often
-  // resulting in different millis() behavior on the ATmega8 and ATmega168)
+    // on the ATmega168, timer 0 is also used for fast hardware pwm
+    // (using phase-correct PWM would mean that timer 0 overflowed half as often
+    // resulting in different millis() behavior on the ATmega8 and ATmega168)
 #if defined(TCCR0A) && defined(WGM01)
-  sbi(TCCR0A, WGM01);
-  sbi(TCCR0A, WGM00);
+    sbi(TCCR0A, WGM01);
+    sbi(TCCR0A, WGM00);
 #endif
 
-  // set timer 0 prescale factor to 64
+    // set timer 0 prescale factor to 64
 #if defined(__AVR_ATmega128__)
-  // CPU specific: different values for the ATmega128
-  sbi(TCCR0, CS02);
+    // CPU specific: different values for the ATmega128
+    sbi(TCCR0, CS02);
 #elif defined(TCCR0) && defined(CS01) && defined(CS00)
-  // this combination is for the standard atmega8
-  sbi(TCCR0, CS01);
-  sbi(TCCR0, CS00);
+    // this combination is for the standard atmega8
+    sbi(TCCR0, CS01);
+    sbi(TCCR0, CS00);
 #elif defined(TCCR0B) && defined(CS01) && defined(CS00)
-  // this combination is for the standard 168/328/1280/2560
-  sbi(TCCR0B, CS01);
-  sbi(TCCR0B, CS00);
+    // this combination is for the standard 168/328/1280/2560
+    sbi(TCCR0B, CS01);
+    sbi(TCCR0B, CS00);
 #elif defined(TCCR0A) && defined(CS01) && defined(CS00)
-  // this combination is for the __AVR_ATmega645__ series
-  sbi(TCCR0A, CS01);
-  sbi(TCCR0A, CS00);
+    // this combination is for the __AVR_ATmega645__ series
+    sbi(TCCR0A, CS01);
+    sbi(TCCR0A, CS00);
 #else
-  #error Timer 0 prescale factor 64 not set correctly
+#error Timer 0 prescale factor 64 not set correctly
 #endif
 
-  // enable timer 0 overflow interrupt
+    // enable timer 0 overflow interrupt
 #if defined(TIMSK) && defined(TOIE0)
-  sbi(TIMSK, TOIE0);
+    sbi(TIMSK, TOIE0);
 #elif defined(TIMSK0) && defined(TOIE0)
-  sbi(TIMSK0, TOIE0);
+    sbi(TIMSK0, TOIE0);
 #else
-  #error	Timer 0 overflow interrupt not set correctly
+#error	Timer 0 overflow interrupt not set correctly
 #endif
 
-  // timers 1 and 2 are used for phase-correct hardware pwm
-  // this is better for motors as it ensures an even waveform
-  // note, however, that fast pwm mode can achieve a frequency of up
-  // 8 MHz (with a 16 MHz clock) at 50% duty cycle
+    // timers 1 and 2 are used for phase-correct hardware pwm
+    // this is better for motors as it ensures an even waveform
+    // note, however, that fast pwm mode can achieve a frequency of up
+    // 8 MHz (with a 16 MHz clock) at 50% duty cycle
 
 #if defined(TCCR1B) && defined(CS11) && defined(CS10)
-  TCCR1B = 0;
+    TCCR1B = 0;
 
-  // set timer 1 prescale factor to 64
-  sbi(TCCR1B, CS11);
+    // set timer 1 prescale factor to 64
+    sbi(TCCR1B, CS11);
 #if F_CPU >= 8000000L
-  sbi(TCCR1B, CS10);
+    sbi(TCCR1B, CS10);
 #endif
 #elif defined(TCCR1) && defined(CS11) && defined(CS10)
-  sbi(TCCR1, CS11);
+    sbi(TCCR1, CS11);
 #if F_CPU >= 8000000L
-  sbi(TCCR1, CS10);
+    sbi(TCCR1, CS10);
 #endif
 #endif
-  // put timer 1 in 8-bit phase correct pwm mode
+    // put timer 1 in 8-bit phase correct pwm mode
 #if defined(TCCR1A) && defined(WGM10)
-  sbi(TCCR1A, WGM10);
+    sbi(TCCR1A, WGM10);
 #elif defined(TCCR1)
-  #warning this needs to be finished
+#warning this needs to be finished
 #endif
 
   // set timer 2 prescale factor to 64
 #if defined(TCCR2) && defined(CS22)
-  sbi(TCCR2, CS22);
+    sbi(TCCR2, CS22);
 #elif defined(TCCR2B) && defined(CS22)
-  sbi(TCCR2B, CS22);
+    sbi(TCCR2B, CS22);
 #else
-  #warning Timer 2 not finished (may not be present on this CPU)
+#warning Timer 2 not finished (may not be present on this CPU)
 #endif
 
   // configure timer 2 for phase correct pwm (8-bit)
 #if defined(TCCR2) && defined(WGM20)
-  sbi(TCCR2, WGM20);
+    sbi(TCCR2, WGM20);
 #elif defined(TCCR2A) && defined(WGM20)
-  sbi(TCCR2A, WGM20);
+    sbi(TCCR2A, WGM20);
 #else
-  #warning Timer 2 not finished (may not be present on this CPU)
+#warning Timer 2 not finished (may not be present on this CPU)
 #endif
 
 #if defined(TCCR3B) && defined(CS31) && defined(WGM30)
-  sbi(TCCR3B, CS31);		// set timer 3 prescale factor to 64
-  sbi(TCCR3B, CS30);
-  sbi(TCCR3A, WGM30);		// put timer 3 in 8-bit phase correct pwm mode
+    sbi(TCCR3B, CS31);		// set timer 3 prescale factor to 64
+    sbi(TCCR3B, CS30);
+    sbi(TCCR3A, WGM30);		// put timer 3 in 8-bit phase correct pwm mode
 #endif
 
 #if defined(TCCR4A) && defined(TCCR4B) && defined(TCCR4D) /* beginning of timer4 block for 32U4 and similar */
-  sbi(TCCR4B, CS42);		// set timer4 prescale factor to 64
-  sbi(TCCR4B, CS41);
-  sbi(TCCR4B, CS40);
-  sbi(TCCR4D, WGM40);		// put timer 4 in phase- and frequency-correct PWM mode
-  sbi(TCCR4A, PWM4A);		// enable PWM mode for comparator OCR4A
-  sbi(TCCR4C, PWM4D);		// enable PWM mode for comparator OCR4D
+    sbi(TCCR4B, CS42);		// set timer4 prescale factor to 64
+    sbi(TCCR4B, CS41);
+    sbi(TCCR4B, CS40);
+    sbi(TCCR4D, WGM40);		// put timer 4 in phase- and frequency-correct PWM mode
+    sbi(TCCR4A, PWM4A);		// enable PWM mode for comparator OCR4A
+    sbi(TCCR4C, PWM4D);		// enable PWM mode for comparator OCR4D
 #else /* beginning of timer4 block for ATMEGA1280 and ATMEGA2560 */
 #if defined(TCCR4B) && defined(CS41) && defined(WGM40)
-  sbi(TCCR4B, CS41);		// set timer 4 prescale factor to 64
-  sbi(TCCR4B, CS40);
-  sbi(TCCR4A, WGM40);		// put timer 4 in 8-bit phase correct pwm mode
+    sbi(TCCR4B, CS41);		// set timer 4 prescale factor to 64
+    sbi(TCCR4B, CS40);
+    sbi(TCCR4A, WGM40);		// put timer 4 in 8-bit phase correct pwm mode
 #endif
 #endif /* end timer4 block for ATMEGA1280/2560 and similar */
 
 #if defined(TCCR5B) && defined(CS51) && defined(WGM50)
-  sbi(TCCR5B, CS51);		// set timer 5 prescale factor to 64
-  sbi(TCCR5B, CS50);
-  sbi(TCCR5A, WGM50);		// put timer 5 in 8-bit phase correct pwm mode
+    sbi(TCCR5B, CS51);		// set timer 5 prescale factor to 64
+    sbi(TCCR5B, CS50);
+    sbi(TCCR5A, WGM50);		// put timer 5 in 8-bit phase correct pwm mode
 #endif
 
 #if defined(ADCSRA)
-  // set a2d prescale factor to 128
-  // 16 MHz / 128 = 125 KHz, inside the desired 50-200 KHz range.
-  // XXX: this will not work properly for other clock speeds, and
-  // this code should use F_CPU to determine the prescale factor.
-  sbi(ADCSRA, ADPS2);
-  sbi(ADCSRA, ADPS1);
-  sbi(ADCSRA, ADPS0);
+    // set a2d prescale factor to 128
+    // 16 MHz / 128 = 125 KHz, inside the desired 50-200 KHz range.
+    // XXX: this will not work properly for other clock speeds, and
+    // this code should use F_CPU to determine the prescale factor.
+    sbi(ADCSRA, ADPS2);
+    sbi(ADCSRA, ADPS1);
+    sbi(ADCSRA, ADPS0);
 
-  // enable a2d conversions
-  sbi(ADCSRA, ADEN);
+    // enable a2d conversions
+    sbi(ADCSRA, ADEN);
 #endif
 
-  // the bootloader connects pins 0 and 1 to the USART; disconnect them
-  // here so they can be used as normal digital i/o; they will be
-  // reconnected in Serial.begin()
+    // the bootloader connects pins 0 and 1 to the USART; disconnect them
+    // here so they can be used as normal digital i/o; they will be
+    // reconnected in Serial.begin()
 #if defined(UCSRB)
-  UCSRB = 0;
+    UCSRB = 0;
 #elif defined(UCSR0B)
-  UCSR0B = 0;
+    UCSR0B = 0;
 #endif
 }
 };

From 1946bd0751ab4b7eef6573a8643d8841becb966b Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Sun, 31 May 2020 16:08:02 +0200
Subject: [PATCH 163/204] fix for newer SoftwareSerial.h using differently
 named macro for inclusion (__SoftwareSerial_h)

---
 chipsets.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chipsets.h b/chipsets.h
index 8e9051d5cf..a4dc40bd25 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -16,7 +16,7 @@ FASTLED_NAMESPACE_BEGIN
 #if defined(ARDUINO) //&& defined(SoftwareSerial_h)
 
 
-#if defined(SoftwareSerial_h)
+#if defined(SoftwareSerial_h) || defined(__SoftwareSerial_h)
 #include <SoftwareSerial.h>
 
 #define HAS_PIXIE

From 56a440d6d25ca5516c6b93cf03320ec6e3266c3a Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 12 Jun 2020 09:23:12 -0400
Subject: [PATCH 164/204] Complete restructuring of the code so that the
 interrupt handler and supporting methods are not part of the controller
 template. The reason is a bug in gcc causes the IRAM_ATTR to be lost during
 template instantiation.

---
 platforms/esp/32/clockless_rmt_esp32.h | 496 ++++++++++++-------------
 1 file changed, 230 insertions(+), 266 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index bf4dd142c1..bdd71fff1b 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -134,7 +134,7 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
 #define MAX_PULSES         64 /* A channel has a 64 "pulse" buffer */
-#define PULSES_PER_FILL    24 /* One pixel's worth of pulses */
+#define PULSES_PER_FILL    32 /* Half of the channel buffer */
 
 // -- Convert ESP32 CPU cycles to RMT device cycles, taking into account the divider
 #define F_CPU_RMT                   (  80000000L)
@@ -163,12 +163,15 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define FASTLED_RMT_MAX_CHANNELS 8
 #endif
 
+// -- Forward reference
+class ESP32RMTController;
+
 // -- Array of all controllers
-static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
 
 // -- Current set of active controllers, indexed by the RMT
 //    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+static ESP32RMTController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
 
 static int gNumControllers = 0;
 static int gNumStarted = 0;
@@ -183,44 +186,40 @@ static xSemaphoreHandle gTX_sem = NULL;
 
 static bool gInitialized = false;
 
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
+class ESP32RMTController
 {
+private:
+
     // -- RMT has 8 channels, numbered 0 to 7
     rmt_channel_t  mRMT_channel;
 
     // -- Store the GPIO pin
     gpio_num_t     mPin;
 
-    // -- This instantiation forces a check on the pin choice
-    FastPin<DATA_PIN> mFastPin;
-
     // -- Timing values for zero and one bits, derived from T1, T2, and T3
     rmt_item32_t   mZero;
     rmt_item32_t   mOne;
 
-    // -- Save the pixel controller
-    PixelController<RGB_ORDER> * mPixels;
-    int            mCurColor;
-    uint16_t       mCurPulse;
+    // -- Pixel data
+    uint32_t *     mPixelData;
+    int            mSize;
+    int            mCur;
+
+    // -- RMT memory
     volatile uint32_t * mRMT_mem_ptr;
+    int                 mWhichHalf;
 
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
     rmt_item32_t * mBuffer;
     uint16_t       mBufferSize;
-
-    // -- Make sure we can't call show() too quickly
-    CMinWait<50>   mWait;
+    int            mCurPulse;
 
 public:
 
-    void init()
+    ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
+        : mPixelData(0), mSize(0), mCur(0), mWhichHalf(0)
     {
-        // -- Allocate space to save the pixel controller
-        //    during parallel output
-        mPixels = (PixelController<RGB_ORDER> *) malloc(sizeof(PixelController<RGB_ORDER>));
-        
         // -- Precompute rmt items corresponding to a zero bit and a one bit
         //    according to the timing values given in the template instantiation
         // T1H
@@ -243,12 +242,23 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mPin = gpio_num_t(DATA_PIN);
     }
 
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
+    // -- Getters and setters for use in ClocklessController
+    uint8_t * getPixelData(int size_in_bytes)
+    {
+        if (mPixelData == 0) {
+            // -- Round up
+            mSize = ((size_in_bytes-1) / sizeof(uint32_t)) + 1;
+            mPixelData = (uint32_t *) calloc( mSize, sizeof(uint32_t));
+        }
+        return (uint8_t *) mPixelData;
+    }
 
-    void initRMT()
+    // -- Initialize RMT subsystem
+    //    This only needs to be done once
+    static void init()
     {
+        if (gInitialized) return;
+
         for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
             gOnChannel[i] = NULL;
 
@@ -256,7 +266,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             rmt_config_t rmt_tx;
             rmt_tx.channel = rmt_channel_t(i);
             rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.gpio_num = gpio_num_t(0);  // The particular pin will be assigned later
             rmt_tx.mem_block_num = 1;
             rmt_tx.clk_div = DIVIDER;
             rmt_tx.tx_config.loop_en = false;
@@ -264,7 +274,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             rmt_tx.tx_config.carrier_en = false;
             rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
             rmt_tx.tx_config.idle_output_en = true;
-                
+
             // -- Apply the configuration
             rmt_config(&rmt_tx);
 
@@ -296,130 +306,15 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gInitialized = true;
     }
 
-    // -- Show pixels
-    //    This is the main entry point for the controller.
-    virtual void IRAM_ATTR showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-        if (gNumStarted == 0) {
-            // -- First controller: make sure everything is set up
-            // -- Only need to do this once
-            if ( ! gInitialized) {
-                initRMT();
-            }
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-
-#if FASTLED_ESP32_FLASH_LOCK == 1
-            // -- Make sure no flash operations happen right now
-            spi_flash_op_lock();
-#endif
-        }
-
-        if (FASTLED_RMT_BUILTIN_DRIVER)
-            convertAllPixelData(pixels);
-        else {
-            // -- Initialize the local state, save a pointer to the pixel
-            //    data. We need to make a copy because pixels is a local
-            //    variable in the calling function, and this data structure
-            //    needs to outlive this call to showPixels.
-            (*mPixels) = pixels;
-        }
-
-        // -- Keep track of the number of strips we've seen
-        gNumStarted++;
-
-        // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual worl
-        if (gNumStarted == gNumControllers) {
-            gNext = 0;
-
-            // -- First, fill all the available channels
-            int channel = 0;
-            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                startNext(channel);
-                channel++;
-            }
-
-            // -- Make sure it's been at least 50ms since last show
-            mWait.wait();
-
-            // -- Start them all
-            for (int i = 0; i < channel; i++) {
-                ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
-                rmt_tx_start(pController->mRMT_channel, true);
-            }
-
-            // -- Wait here while the rest of the data is sent. The interrupt handler
-            //    will keep refilling the RMT buffers until it is all sent; then it
-            //    gives the semaphore back.
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-            xSemaphoreGive(gTX_sem);
-
-            mWait.mark();
-
-            // -- Reset the counters
-            gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
-
-#if FASTLED_ESP32_FLASH_LOCK == 1
-            // -- Release the lock on flash operations
-            spi_flash_op_unlock();
-#endif
-        }
-    }
-
-    // -- Convert all pixels to RMT pulses
-    //    This function is only used when the user chooses to use the
-    //    built-in RMT driver, which needs all of the RMT pulses
-    //    up-front.
-    void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-        mBufferSize = pixels.size() * 3 * 8;
-
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        // -- Cycle through the R,G, and B values in the right order,
-        //    storing the pulses in the big buffer
-        mCurPulse = 0;
-
-        uint32_t byteval;
-        while (pixels.has(1)) {
-            byteval = pixels.loadAndScale0();
-            convertByte(byteval);
-            byteval = pixels.loadAndScale1();
-            convertByte(byteval);
-            byteval = pixels.loadAndScale2();
-            convertByte(byteval);
-            pixels.advanceData();
-            pixels.stepDithering();
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-    }
-
-    void convertByte(uint32_t byteval)
-    {
-        // -- Write one byte's worth of RMT pulses to the big buffer
-        byteval <<= 24;
-        for (register uint32_t j = 0; j < 8; j++) {
-            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-            byteval <<= 1;
-            mCurPulse++;
-        }
-    }
+public:
 
     // -- Start up the next controller
     //    This method is static so that it can dispatch to the
     //    appropriate startOnChannel method of the given controller.
-    static void IRAM_ATTR startNext(int channel)
+    static void startNext(int channel)
     {
         if (gNext < gNumControllers) {
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            ESP32RMTController * pController = gControllers[gNext];
             pController->startOnChannel(channel);
             gNext++;
         }
@@ -428,7 +323,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     // -- Start this controller on the given channel
     //    This function just initiates the RMT write; it does not wait
     //    for it to finish.
-    void IRAM_ATTR startOnChannel(int channel)
+    void startOnChannel(int channel)
     {
         // -- Assign this channel and configure the RMT
         mRMT_channel = rmt_channel_t(channel);
@@ -450,8 +345,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             // -- Initialize the counters that keep track of where we are in
             //    the pixel data.
             mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-            mCurPulse = 0;
-            mCurColor = 0;
+            mCur = 0;
+            mWhichHalf = 0;
 
             // -- Store 2 pixels worth of data (two "buffers" full)
             fillNext();
@@ -462,19 +357,26 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
     }
 
+    // -- Start RMT transmission
+    //    Setting this RMT flag is what actually kicks off the peripheral
+    void tx_start()
+    {
+        rmt_tx_start(mRMT_channel, true);
+    }
+
     // -- A controller is done 
     //    This function is called when a controller finishes writing
     //    its data. It is called either by the custom interrupt
     //    handler (below), or as a callback from the built-in
     //    interrupt handler. It is static because we don't know which
     //    controller is done until we look it up.
-    static void IRAM_ATTR doneOnChannel(rmt_channel_t channel, void * arg)
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
     {
-        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        ESP32RMTController * pController = gOnChannel[channel];
         portBASE_TYPE HPTaskAwoken = 0;
 
         // -- Turn off output on the pin
-        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+        gpio_matrix_out(pController->mPin, 0x100, 0, 0);
 
         gOnChannel[channel] = NULL;
         gNumDone++;
@@ -492,9 +394,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             //    start the next one on this channel
             if (gNext < gNumControllers) {
                 startNext(channel);
-                // -- Start the RMT TX operation
-                //    (I'm not sure if this is necessary here)
-                rmt_tx_start(controller->mRMT_channel, true);
+                pController->tx_start();
             }
         }
     }
@@ -514,7 +414,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             int tx_done_bit = channel * 3;
             int tx_next_bit = channel + 24;
 
-            if (gOnChannel[channel] != NULL) {
+            ESP32RMTController * pController = gOnChannel[channel];
+            if (pController != NULL) {
 
                 // -- More to send on this channel
                 if (intr_st & BIT(tx_next_bit)) {
@@ -522,8 +423,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                     
                     // -- Refill the half of the buffer that we just finished,
                     //    allowing the other half to proceed.
-                    ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-                    controller->fillNext();
+                    pController->fillNext();
                 } else {
                     // -- Transmission is complete on this channel
                     if (intr_st & BIT(tx_done_bit)) {
@@ -536,155 +436,219 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     }
 
     // -- Fill RMT buffer
-    //    Puts one pixel's worth of data into the next 24 slots in the RMT memory
-    void IRAM_ATTR fillNext()
+    //    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
+    //    Each data bit is represented by a 32-bit RMT item that specifies how
+    //    long to hold the signal high, followed by how long to hold it low.
+    void fillNext()
     {
-        if (mPixels->has(1)) {
-            uint32_t t1 = __clock_cycles();
-            
+        if (mCur < mSize) {
+            // -- Get the zero and one values into local variables
             uint32_t one_val = mOne.val;
             uint32_t zero_val = mZero.val;
 
-            // -- Get a pixel's worth of data
-            uint8_t byte0 = mPixels->loadAndScale0();
-            uint8_t byte1 = mPixels->loadAndScale1();
-            uint8_t byte2 = mPixels->loadAndScale2();
-            mPixels->advanceData();
-            mPixels->stepDithering();
-
-            // -- Fill 24 slots in the RMT memory
-            register uint32_t pixel = byte0 << 24 | byte1 << 16 | byte2 << 8;
+            // -- Fill 32 slots in the RMT memory
+            register uint32_t pixeldata = mPixelData[mCur];
 
             // -- Use locals for speed
             volatile register uint32_t * pItem =  mRMT_mem_ptr;
-            register uint16_t curPulse = mCurPulse;
             
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 24; j++) {
-                uint32_t val = (pixel & 0x80000000L) ? one_val : zero_val;
+            for (register uint32_t j = 0; j < PULSES_PER_FILL; j++) {
+                uint32_t val = (pixeldata & 0x80000000L) ? one_val : zero_val;
                 *pItem++ = val;
                 // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
 
-                pixel <<= 1;
-                curPulse++;
+                pixeldata <<= 1;
+            }
 
-                if (curPulse == MAX_PULSES) {
-                    pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-                    curPulse = 0;
-                }
+            // -- Flip to the other half, resetting the pointer if necessary
+            mWhichHalf++;
+            if (mWhichHalf == 2) {
+                pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+                mWhichHalf = 0;
             }
 
-            // -- Store the new values back into the object
-            mCurPulse = curPulse;
+            // -- Store the new pointer back into the object
             mRMT_mem_ptr = pItem;
         } else {
             // -- No more data; signal to the RMT we are done
-            for (uint32_t j = 0; j < 8; j++) {
+            for (uint32_t j = 0; j < PULSES_PER_FILL; j++) {
                 * mRMT_mem_ptr++ = 0;
             }
         }   
     }
 
-    // NO LONGER USED
-    uint8_t IRAM_ATTR getNextByte() __attribute__ ((always_inline))
+    // -- Init pulse buffer
+    //    Set up the buffer that will hold all of the pulse items for this
+    //    controller. 
+    //    This function is only used when the built-in RMT driver is chosen
+    void initPulseBuffer(int size_in_bytes)
     {
-        uint8_t byte;
-
-        // -- Cycle through the color channels
-        switch (mCurColor) {
-        case 0: 
-            byte = mPixels->loadAndScale0();
-            break;
-        case 1: 
-            byte = mPixels->loadAndScale1();
-            break;
-        case 2: 
-            byte = mPixels->loadAndScale2();
-            mPixels->advanceData();
-            mPixels->stepDithering();
-            break;
-        default:
-            // -- This is bad!
-            byte = 0;
+        if (mBuffer == 0) {
+            // -- Each byte has 8 bits, each bit needs a 32-bit RMT item
+            int size = size_in_bytes * 8 * 4;
+
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
         }
+        mCurPulse = 0;
+    }
 
-        mCurColor++;
-        if (mCurColor == NUM_COLOR_CHANNELS) mCurColor = 0;
+    // -- Convert a byte into RMT pulses
+    //    This function is only used when the built-in RMT driver is chosen
+    void convertByte(uint32_t byteval)
+    {
+        // -- Write one byte's worth of RMT pulses to the big buffer
+        byteval <<= 24;
+        for (register uint32_t j = 0; j < 8; j++) {
+            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+            byteval <<= 1;
+            mCurPulse++;
+        }
+    }
+};
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+private:
+
+    // -- The actual controller object for ESP32
+    ESP32RMTController * mRMTController;
 
-        return byte;
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+
+    // -- Make sure we can't call show() too quickly
+    CMinWait<50>   mWait;
+
+public:
+
+    void init()
+    {
+        mRMTController = new ESP32RMTController(DATA_PIN, T1, T2, T3);
     }
 
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
 
-    // NO LONGER USED
-    // -- Fill the RMT buffer
-    //    This function fills the next 32 slots in the RMT write
-    //    buffer with pixel data. It also handles the case where the
-    //    pixel data is exhausted, so we need to fill the RMT buffer
-    //    with zeros to signal that it's done.
-    virtual void IRAM_ATTR fillHalfRMTBuffer()
+    // -- Load pixel data
+    //    This method loads all of the pixel data into a separate buffer for use by
+    //    by the RMT driver. Copying does two important jobs: it fixes the color
+    //    order for the pixels, and it performs the scaling/adjusting ahead of time.
+    void loadPixelData(PixelController<RGB_ORDER> & pixels)
     {
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-
-        // -- Convert (up to) 32 bits of the raw pixel data into
-        //    into RMT pulses that encode the zeros and ones.
-        int pulses = 0;
-        register uint32_t byteval;
-        while (pulses < 32 && mPixels->has(1)) {
-            // -- Get one byte
-            // -- Cycle through the color channels
-            switch (mCurColor) {
-            case 0: 
-                byteval = mPixels->loadAndScale0();
-                break;
-            case 1: 
-                byteval = mPixels->loadAndScale1();
-                break;
-            case 2: 
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                break;
-            default:
-                // -- This is bad!
-                byteval = 0;
-            }
+        // -- Make sure the buffer is allocated
+        int size = pixels.size() * 3;
+        uint8_t * pData = mRMTController->getPixelData(size);
+
+        // -- Read out the pixel data using the pixel controller methods that
+        //    perform the scaling and adjustments 
+        int count = 0;
+        uint8_t byteval;
+        while (pixels.has(1)) {
+            *pData++ = pixels.loadAndScale0();
+            *pData++ = pixels.loadAndScale1();
+            *pData++ = pixels.loadAndScale2();
+            pixels.advanceData();
+            pixels.stepDithering();
+            count += 3;
+        }
 
-            mCurColor++;
-            if (mCurColor == NUM_COLOR_CHANNELS) mCurColor = 0;
-        
-            // byteval = getNextByte();
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                * mRMT_mem_ptr++ = val;
-                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-            pulses += 8;
+        assert(count == size);
+    }
+
+    // -- Show pixels
+    //    This is the main entry point for the controller.
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            ESP32RMTController::init();
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+
+#if FASTLED_ESP32_FLASH_LOCK == 1
+            // -- Make sure no flash operations happen right now
+            spi_flash_op_lock();
+#endif
         }
 
-        // -- When we reach the end of the pixel data, fill the rest of the
-        //    RMT buffer with 0's, which signals to the device that we're done.
-        if ( ! mPixels->has(1) ) {
-            while (pulses < 32) {
-                * mRMT_mem_ptr++ = 0;
-                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulses++;
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            convertAllPixelData(pixels);
+        } else {
+            loadPixelData(pixels);
+        }
+
+        // -- Keep track of the number of strips we've seen
+        gNumStarted++;
+
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual worl
+        if (gNumStarted == gNumControllers) {
+            gNext = 0;
+
+            // -- First, fill all the available channels
+            int channel = 0;
+            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                ESP32RMTController::startNext(channel);
+                channel++;
+            }
+
+            // -- Make sure it's been at least 50ms since last show
+            mWait.wait();
+
+            // -- Start them all
+            for (int i = 0; i < channel; i++) {
+                ESP32RMTController * pController = gControllers[i];
+                pController->tx_start();
             }
+
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the RMT buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+
+            mWait.mark();
+
+            // -- Reset the counters
+            gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+
+#if FASTLED_ESP32_FLASH_LOCK == 1
+            // -- Release the lock on flash operations
+            spi_flash_op_unlock();
+#endif
         }
-        
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse == MAX_PULSES) {
-            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-            mCurPulse = 0;
+    }
+
+    // -- Convert all pixels to RMT pulses
+    //    This function is only used when the user chooses to use the
+    //    built-in RMT driver, which needs all of the RMT pulses
+    //    up-front.
+    void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
+    {
+        // -- Make sure the data buffer is allocated
+        mRMTController->initPulseBuffer(pixels.size() * 3);
+
+        // -- Cycle through the R,G, and B values in the right order,
+        //    storing the pulses in the big buffer
+
+        uint32_t byteval;
+        while (pixels.has(1)) {
+            byteval = pixels.loadAndScale0();
+            mRMTController->convertByte(byteval);
+            byteval = pixels.loadAndScale1();
+            mRMTController->convertByte(byteval);
+            byteval = pixels.loadAndScale2();
+            mRMTController->convertByte(byteval);
+            pixels.advanceData();
+            pixels.stepDithering();
         }
     }
 };
 
+
 FASTLED_NAMESPACE_END

From 8db0db5e303c04f4e09522fc40ed75ac9458dfdc Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 12 Jun 2020 23:06:51 -0400
Subject: [PATCH 165/204] Total redesign of the RMT driver using a separate
 class that handles all of the interactions with the RMT peripheral. This
 design gets around the problem that methods in template classes cannot have
 the IRAM_ATTR attribute, which is crucial for the interrupt handler.

---
 platforms/esp/32/clockless_rmt_esp32.cpp | 379 +++++++++++++++++++++++
 platforms/esp/32/clockless_rmt_esp32.h   | 368 ++--------------------
 2 files changed, 412 insertions(+), 335 deletions(-)
 create mode 100644 platforms/esp/32/clockless_rmt_esp32.cpp

diff --git a/platforms/esp/32/clockless_rmt_esp32.cpp b/platforms/esp/32/clockless_rmt_esp32.cpp
new file mode 100644
index 0000000000..b3f106da5e
--- /dev/null
+++ b/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -0,0 +1,379 @@
+
+#ifdef ESP32
+
+#define FASTLED_INTERNAL
+#include "FastLED.h"
+
+// -- Forward reference
+class ESP32RMTController;
+
+// -- Array of all controllers
+static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static ESP32RMTController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
+
+static intr_handle_t gRMT_intr_handle = NULL;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+static bool gInitialized = false;
+
+ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
+    : mPixelData(0), mSize(0), mCur(0), mWhichHalf(0)
+{
+    // -- Precompute rmt items corresponding to a zero bit and a one bit
+    //    according to the timing values given in the template instantiation
+    // T1H
+    mOne.level0 = 1;
+    mOne.duration0 = ESP_TO_RMT_CYCLES(T1+T2); // TO_RMT_CYCLES(T1+T2);
+    // T1L
+    mOne.level1 = 0;
+    mOne.duration1 = ESP_TO_RMT_CYCLES(T3); // TO_RMT_CYCLES(T3);
+
+    // T0H
+    mZero.level0 = 1;
+    mZero.duration0 = ESP_TO_RMT_CYCLES(T1); // TO_RMT_CYCLES(T1);
+    // T0L
+    mZero.level1 = 0;
+    mZero.duration1 = ESP_TO_RMT_CYCLES(T2+T3); // TO_RMT_CYCLES(T2 + T3);
+
+    gControllers[gNumControllers] = this;
+    gNumControllers++;
+
+    mPin = gpio_num_t(DATA_PIN);
+}
+
+// -- Getters and setters for use in ClocklessController
+uint8_t * ESP32RMTController::getPixelData(int size_in_bytes)
+{
+    if (mPixelData == 0) {
+        mSize = size_in_bytes;
+        mPixelData = (uint8_t *) calloc( mSize, sizeof(uint8_t));
+    }
+    return mPixelData;
+}
+
+// -- Initialize RMT subsystem
+//    This only needs to be done once
+void ESP32RMTController::init()
+{
+    if (gInitialized) return;
+
+    for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+        gOnChannel[i] = NULL;
+
+        // -- RMT configuration for transmission
+        rmt_config_t rmt_tx;
+        rmt_tx.channel = rmt_channel_t(i);
+        rmt_tx.rmt_mode = RMT_MODE_TX;
+        rmt_tx.gpio_num = gpio_num_t(0);  // The particular pin will be assigned later
+        rmt_tx.mem_block_num = 1;
+        rmt_tx.clk_div = DIVIDER;
+        rmt_tx.tx_config.loop_en = false;
+        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+        rmt_tx.tx_config.carrier_en = false;
+        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+        rmt_tx.tx_config.idle_output_en = true;
+
+        // -- Apply the configuration
+        rmt_config(&rmt_tx);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            rmt_driver_install(rmt_channel_t(i), 0, 0);
+        } else {
+            // -- Set up the RMT to send 1 pixel of the pulse buffer and then
+            //    generate an interrupt. When we get this interrupt we
+            //    fill the other part in preparation (kind of like double-buffering)
+            rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, PULSES_PER_FILL);
+        }
+    }
+
+    // -- Create a semaphore to block execution until all the controllers are done
+    if (gTX_sem == NULL) {
+        gTX_sem = xSemaphoreCreateBinary();
+        xSemaphoreGive(gTX_sem);
+    }
+                
+    if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+        // -- Allocate the interrupt if we have not done so yet. This
+        //    interrupt handler must work for all different kinds of
+        //    strips, so it delegates to the refill function for each
+        //    specific instantiation of ClocklessController.
+        if (gRMT_intr_handle == NULL)
+            esp_intr_alloc(ETS_RMT_INTR_SOURCE, ESP_INTR_FLAG_LEVEL3, interruptHandler, 0, &gRMT_intr_handle);
+    }
+
+    gInitialized = true;
+}
+
+// -- Show this string of pixels
+//    This is the main entry point for the pixel controller
+void ESP32RMTController::showPixels()
+{
+    if (gNumStarted == 0) {
+        // -- First controller: make sure everything is set up
+        ESP32RMTController::init();
+        xSemaphoreTake(gTX_sem, portMAX_DELAY);
+
+#if FASTLED_ESP32_FLASH_LOCK == 1
+        // -- Make sure no flash operations happen right now
+        spi_flash_op_lock();
+#endif
+    }
+
+    // -- Keep track of the number of strips we've seen
+    gNumStarted++;
+
+    // -- The last call to showPixels is the one responsible for doing
+    //    all of the actual worl
+    if (gNumStarted == gNumControllers) {
+        gNext = 0;
+
+        // -- First, fill all the available channels
+        int channel = 0;
+        while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+            ESP32RMTController::startNext(channel);
+            channel++;
+        }
+
+        // -- Make sure it's been at least 50us since last show
+        mWait.wait();
+
+        // -- Start them all
+        for (int i = 0; i < channel; i++) {
+            ESP32RMTController * pController = gControllers[i];
+            pController->tx_start();
+        }
+
+        // -- Wait here while the rest of the data is sent. The interrupt handler
+        //    will keep refilling the RMT buffers until it is all sent; then it
+        //    gives the semaphore back.
+        xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        xSemaphoreGive(gTX_sem);
+
+        mWait.mark();
+
+        // -- Reset the counters
+        gNumStarted = 0;
+        gNumDone = 0;
+        gNext = 0;
+
+#if FASTLED_ESP32_FLASH_LOCK == 1
+        // -- Release the lock on flash operations
+        spi_flash_op_unlock();
+#endif
+    }
+}
+
+// -- Start up the next controller
+//    This method is static so that it can dispatch to the
+//    appropriate startOnChannel method of the given controller.
+void ESP32RMTController::startNext(int channel)
+{
+    if (gNext < gNumControllers) {
+        ESP32RMTController * pController = gControllers[gNext];
+        pController->startOnChannel(channel);
+        gNext++;
+    }
+}
+
+// -- Start this controller on the given channel
+//    This function just initiates the RMT write; it does not wait
+//    for it to finish.
+void ESP32RMTController::startOnChannel(int channel)
+{
+    // -- Assign this channel and configure the RMT
+    mRMT_channel = rmt_channel_t(channel);
+
+    // -- Store a reference to this controller, so we can get it
+    //    inside the interrupt handler
+    gOnChannel[channel] = this;
+
+    // -- Assign the pin to this channel
+    rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+    if (FASTLED_RMT_BUILTIN_DRIVER) {
+        // -- Use the built-in RMT driver to send all the data in one shot
+        rmt_register_tx_end_callback(doneOnChannel, 0);
+        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+    } else {
+        // -- Use our custom driver to send the data incrementally
+
+        // -- Initialize the counters that keep track of where we are in
+        //    the pixel data.
+        mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+        mCur = 0;
+        mWhichHalf = 0;
+
+        // -- Store 2 pixels worth of data (two "buffers" full)
+        fillNext();
+        fillNext();
+
+        // -- Turn on the interrupts
+        rmt_set_tx_intr_en(mRMT_channel, true);
+    }
+}
+
+// -- Start RMT transmission
+//    Setting this RMT flag is what actually kicks off the peripheral
+void ESP32RMTController::tx_start()
+{
+    rmt_tx_start(mRMT_channel, true);
+}
+
+// -- A controller is done 
+//    This function is called when a controller finishes writing
+//    its data. It is called either by the custom interrupt
+//    handler (below), or as a callback from the built-in
+//    interrupt handler. It is static because we don't know which
+//    controller is done until we look it up.
+void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
+{
+    ESP32RMTController * pController = gOnChannel[channel];
+    portBASE_TYPE HPTaskAwoken = 0;
+
+    // -- Turn off output on the pin
+    gpio_matrix_out(pController->mPin, 0x100, 0, 0);
+
+    gOnChannel[channel] = NULL;
+    gNumDone++;
+
+    if (gNumDone == gNumControllers) {
+        // -- If this is the last controller, signal that we are all done
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            xSemaphoreGive(gTX_sem);
+        } else {
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        }
+    } else {
+        // -- Otherwise, if there are still controllers waiting, then
+        //    start the next one on this channel
+        if (gNext < gNumControllers) {
+            startNext(channel);
+            pController->tx_start();
+        }
+    }
+}
+    
+// -- Custom interrupt handler
+//    This interrupt handler handles two cases: a controller is
+//    done writing its data, or a controller needs to fill the
+//    next half of the RMT buffer with data.
+void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
+{
+    // -- The basic structure of this code is borrowed from the
+    //    interrupt handler in esp-idf/components/driver/rmt.c
+    uint32_t intr_st = RMT.int_st.val;
+    uint8_t channel;
+
+    for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
+        int tx_done_bit = channel * 3;
+        int tx_next_bit = channel + 24;
+
+        ESP32RMTController * pController = gOnChannel[channel];
+        if (pController != NULL) {
+
+            // -- More to send on this channel
+            if (intr_st & BIT(tx_next_bit)) {
+                RMT.int_clr.val |= BIT(tx_next_bit);
+                    
+                // -- Refill the half of the buffer that we just finished,
+                //    allowing the other half to proceed.
+                pController->fillNext();
+            } else {
+                // -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+                    doneOnChannel(rmt_channel_t(channel), 0);
+                }
+            }
+        }
+    }
+}
+
+// -- Fill RMT buffer
+//    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
+//    Each data bit is represented by a 32-bit RMT item that specifies how
+//    long to hold the signal high, followed by how long to hold it low.
+void IRAM_ATTR ESP32RMTController::fillNext()
+{
+    if (mCur < mSize) {
+        // -- Get the zero and one values into local variables
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+
+        // -- Fill 32 slots in the RMT memory
+        uint8_t a = mPixelData[mCur++];
+        uint8_t b = mPixelData[mCur++];
+        uint8_t c = mPixelData[mCur++];
+        uint8_t d = mPixelData[mCur++];
+        register uint32_t pixeldata = a << 24 | b << 16 | c << 8 | d;
+
+        // -- Use locals for speed
+        volatile register uint32_t * pItem =  mRMT_mem_ptr;
+            
+        // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+        // rmt_item32_t value corresponding to the buffered bit value
+        for (register uint32_t j = 0; j < PULSES_PER_FILL; j++) {
+            *pItem++ = (pixeldata & 0x80000000L) ? one_val : zero_val;
+            // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+
+            pixeldata <<= 1;
+        }
+
+        // -- Flip to the other half, resetting the pointer if necessary
+        mWhichHalf++;
+        if (mWhichHalf == 2) {
+            pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+            mWhichHalf = 0;
+        }
+
+        // -- Store the new pointer back into the object
+        mRMT_mem_ptr = pItem;
+    } else {
+        // -- No more data; signal to the RMT we are done
+        for (uint32_t j = 0; j < PULSES_PER_FILL; j++) {
+            * mRMT_mem_ptr++ = 0;
+        }
+    }
+}
+
+// -- Init pulse buffer
+//    Set up the buffer that will hold all of the pulse items for this
+//    controller. 
+//    This function is only used when the built-in RMT driver is chosen
+void ESP32RMTController::initPulseBuffer(int size_in_bytes)
+{
+    if (mBuffer == 0) {
+        // -- Each byte has 8 bits, each bit needs a 32-bit RMT item
+        int size = size_in_bytes * 8 * 4;
+
+        mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+    }
+    mCurPulse = 0;
+}
+
+// -- Convert a byte into RMT pulses
+//    This function is only used when the built-in RMT driver is chosen
+void ESP32RMTController::convertByte(uint32_t byteval)
+{
+    // -- Write one byte's worth of RMT pulses to the big buffer
+    byteval <<= 24;
+    for (register uint32_t j = 0; j < 8; j++) {
+        mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+        byteval <<= 1;
+        mCurPulse++;
+    }
+}
+
+#endif
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index bdd71fff1b..de70a00ae5 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -163,29 +163,6 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define FASTLED_RMT_MAX_CHANNELS 8
 #endif
 
-// -- Forward reference
-class ESP32RMTController;
-
-// -- Array of all controllers
-static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static ESP32RMTController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-static bool gInitialized = false;
-
 class ESP32RMTController
 {
 private:
@@ -201,7 +178,7 @@ class ESP32RMTController
     rmt_item32_t   mOne;
 
     // -- Pixel data
-    uint32_t *     mPixelData;
+    uint8_t *      mPixelData;
     int            mSize;
     int            mCur;
 
@@ -215,154 +192,40 @@ class ESP32RMTController
     uint16_t       mBufferSize;
     int            mCurPulse;
 
+    // -- Make sure we can't call show() too quickly
+    CMinWait<50>   mWait;
+
 public:
 
-    ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
-        : mPixelData(0), mSize(0), mCur(0), mWhichHalf(0)
-    {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = ESP_TO_RMT_CYCLES(T1+T2); // TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = ESP_TO_RMT_CYCLES(T3); // TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = ESP_TO_RMT_CYCLES(T1); // TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = ESP_TO_RMT_CYCLES(T2+T3); // TO_RMT_CYCLES(T2 + T3);
-
-        gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-        mPin = gpio_num_t(DATA_PIN);
-    }
+    // -- Constructor
+    //    Mainly just stores the template parameters from the LEDController as
+    //    member variables.
+    ESP32RMTController(int DATA_PIN, int T1, int T2, int T3);
 
     // -- Getters and setters for use in ClocklessController
-    uint8_t * getPixelData(int size_in_bytes)
-    {
-        if (mPixelData == 0) {
-            // -- Round up
-            mSize = ((size_in_bytes-1) / sizeof(uint32_t)) + 1;
-            mPixelData = (uint32_t *) calloc( mSize, sizeof(uint32_t));
-        }
-        return (uint8_t *) mPixelData;
-    }
+    uint8_t * getPixelData(int size_in_bytes);
 
     // -- Initialize RMT subsystem
     //    This only needs to be done once
-    static void init()
-    {
-        if (gInitialized) return;
-
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-            gOnChannel[i] = NULL;
-
-            // -- RMT configuration for transmission
-            rmt_config_t rmt_tx;
-            rmt_tx.channel = rmt_channel_t(i);
-            rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = gpio_num_t(0);  // The particular pin will be assigned later
-            rmt_tx.mem_block_num = 1;
-            rmt_tx.clk_div = DIVIDER;
-            rmt_tx.tx_config.loop_en = false;
-            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-            rmt_tx.tx_config.carrier_en = false;
-            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-            rmt_tx.tx_config.idle_output_en = true;
-
-            // -- Apply the configuration
-            rmt_config(&rmt_tx);
-
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                rmt_driver_install(rmt_channel_t(i), 0, 0);
-            } else {
-                // -- Set up the RMT to send 1 pixel of the pulse buffer and then
-                //    generate an interrupt. When we get this interrupt we
-                //    fill the other part in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, PULSES_PER_FILL);
-            }
-        }
+    static void init();
 
-        // -- Create a semaphore to block execution until all the controllers are done
-        if (gTX_sem == NULL) {
-            gTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(gTX_sem);
-        }
-                
-        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, ESP_INTR_FLAG_LEVEL3, interruptHandler, 0, &gRMT_intr_handle);
-        }
-
-        gInitialized = true;
-    }
-
-public:
+    // -- Show this string of pixels
+    //    This is the main entry point for the pixel controller
+    void IRAM_ATTR showPixels();
 
     // -- Start up the next controller
     //    This method is static so that it can dispatch to the
     //    appropriate startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-        if (gNext < gNumControllers) {
-            ESP32RMTController * pController = gControllers[gNext];
-            pController->startOnChannel(channel);
-            gNext++;
-        }
-    }
+    static void IRAM_ATTR startNext(int channel);
 
     // -- Start this controller on the given channel
     //    This function just initiates the RMT write; it does not wait
     //    for it to finish.
-    void startOnChannel(int channel)
-    {
-        // -- Assign this channel and configure the RMT
-        mRMT_channel = rmt_channel_t(channel);
-
-        // -- Store a reference to this controller, so we can get it
-        //    inside the interrupt handler
-        gOnChannel[channel] = this;
-
-        // -- Assign the pin to this channel
-        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Use the built-in RMT driver to send all the data in one shot
-            rmt_register_tx_end_callback(doneOnChannel, 0);
-            rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-        } else {
-            // -- Use our custom driver to send the data incrementally
-
-            // -- Initialize the counters that keep track of where we are in
-            //    the pixel data.
-            mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-            mCur = 0;
-            mWhichHalf = 0;
-
-            // -- Store 2 pixels worth of data (two "buffers" full)
-            fillNext();
-            fillNext();
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        }
-    }
+    void IRAM_ATTR startOnChannel(int channel);
 
     // -- Start RMT transmission
     //    Setting this RMT flag is what actually kicks off the peripheral
-    void tx_start()
-    {
-        rmt_tx_start(mRMT_channel, true);
-    }
+    void IRAM_ATTR tx_start();
 
     // -- A controller is done 
     //    This function is called when a controller finishes writing
@@ -370,142 +233,29 @@ class ESP32RMTController
     //    handler (below), or as a callback from the built-in
     //    interrupt handler. It is static because we don't know which
     //    controller is done until we look it up.
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-        ESP32RMTController * pController = gOnChannel[channel];
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        // -- Turn off output on the pin
-        gpio_matrix_out(pController->mPin, 0x100, 0, 0);
-
-        gOnChannel[channel] = NULL;
-        gNumDone++;
-
-        if (gNumDone == gNumControllers) {
-            // -- If this is the last controller, signal that we are all done
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                xSemaphoreGive(gTX_sem);
-            } else {
-                xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-                if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-            }
-        } else {
-            // -- Otherwise, if there are still controllers waiting, then
-            //    start the next one on this channel
-            if (gNext < gNumControllers) {
-                startNext(channel);
-                pController->tx_start();
-            }
-        }
-    }
+    static void IRAM_ATTR doneOnChannel(rmt_channel_t channel, void * arg);
     
     // -- Custom interrupt handler
     //    This interrupt handler handles two cases: a controller is
     //    done writing its data, or a controller needs to fill the
     //    next half of the RMT buffer with data.
-    static void IRAM_ATTR interruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            ESP32RMTController * pController = gOnChannel[channel];
-            if (pController != NULL) {
-
-                // -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-                    
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    pController->fillNext();
-                } else {
-                    // -- Transmission is complete on this channel
-                    if (intr_st & BIT(tx_done_bit)) {
-                        RMT.int_clr.val |= BIT(tx_done_bit);
-                        doneOnChannel(rmt_channel_t(channel), 0);
-                    }
-                }
-            }
-        }
-    }
+    static void IRAM_ATTR interruptHandler(void *arg);
 
     // -- Fill RMT buffer
     //    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
     //    Each data bit is represented by a 32-bit RMT item that specifies how
     //    long to hold the signal high, followed by how long to hold it low.
-    void fillNext()
-    {
-        if (mCur < mSize) {
-            // -- Get the zero and one values into local variables
-            uint32_t one_val = mOne.val;
-            uint32_t zero_val = mZero.val;
-
-            // -- Fill 32 slots in the RMT memory
-            register uint32_t pixeldata = mPixelData[mCur];
-
-            // -- Use locals for speed
-            volatile register uint32_t * pItem =  mRMT_mem_ptr;
-            
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < PULSES_PER_FILL; j++) {
-                uint32_t val = (pixeldata & 0x80000000L) ? one_val : zero_val;
-                *pItem++ = val;
-                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-
-                pixeldata <<= 1;
-            }
-
-            // -- Flip to the other half, resetting the pointer if necessary
-            mWhichHalf++;
-            if (mWhichHalf == 2) {
-                pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-                mWhichHalf = 0;
-            }
-
-            // -- Store the new pointer back into the object
-            mRMT_mem_ptr = pItem;
-        } else {
-            // -- No more data; signal to the RMT we are done
-            for (uint32_t j = 0; j < PULSES_PER_FILL; j++) {
-                * mRMT_mem_ptr++ = 0;
-            }
-        }   
-    }
+    void IRAM_ATTR fillNext();
 
     // -- Init pulse buffer
     //    Set up the buffer that will hold all of the pulse items for this
     //    controller. 
     //    This function is only used when the built-in RMT driver is chosen
-    void initPulseBuffer(int size_in_bytes)
-    {
-        if (mBuffer == 0) {
-            // -- Each byte has 8 bits, each bit needs a 32-bit RMT item
-            int size = size_in_bytes * 8 * 4;
-
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-        mCurPulse = 0;
-    }
+    void initPulseBuffer(int size_in_bytes);
 
     // -- Convert a byte into RMT pulses
     //    This function is only used when the built-in RMT driver is chosen
-    void convertByte(uint32_t byteval)
-    {
-        // -- Write one byte's worth of RMT pulses to the big buffer
-        byteval <<= 24;
-        for (register uint32_t j = 0; j < 8; j++) {
-            mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-            byteval <<= 1;
-            mCurPulse++;
-        }
-    }
+    void convertByte(uint32_t byteval);
 };
 
 template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
@@ -514,19 +264,20 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 private:
 
     // -- The actual controller object for ESP32
-    ESP32RMTController * mRMTController;
+    ESP32RMTController mRMTController;
 
     // -- This instantiation forces a check on the pin choice
     FastPin<DATA_PIN> mFastPin;
 
-    // -- Make sure we can't call show() too quickly
-    CMinWait<50>   mWait;
-
 public:
 
+    ClocklessController()
+        : mRMTController(DATA_PIN, T1, T2, T3)
+        {}
+
     void init()
     {
-        mRMTController = new ESP32RMTController(DATA_PIN, T1, T2, T3);
+        // mRMTController = new ESP32RMTController(DATA_PIN, T1, T2, T3);
     }
 
     virtual uint16_t getMaxRefreshRate() const { return 400; }
@@ -541,12 +292,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         // -- Make sure the buffer is allocated
         int size = pixels.size() * 3;
-        uint8_t * pData = mRMTController->getPixelData(size);
+        uint8_t * pData = mRMTController.getPixelData(size);
 
         // -- Read out the pixel data using the pixel controller methods that
         //    perform the scaling and adjustments 
         int count = 0;
-        uint8_t byteval;
         while (pixels.has(1)) {
             *pData++ = pixels.loadAndScale0();
             *pData++ = pixels.loadAndScale1();
@@ -563,65 +313,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    This is the main entry point for the controller.
     virtual void showPixels(PixelController<RGB_ORDER> & pixels)
     {
-        if (gNumStarted == 0) {
-            // -- First controller: make sure everything is set up
-            ESP32RMTController::init();
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-
-#if FASTLED_ESP32_FLASH_LOCK == 1
-            // -- Make sure no flash operations happen right now
-            spi_flash_op_lock();
-#endif
-        }
-
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             convertAllPixelData(pixels);
         } else {
             loadPixelData(pixels);
         }
 
-        // -- Keep track of the number of strips we've seen
-        gNumStarted++;
-
-        // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual worl
-        if (gNumStarted == gNumControllers) {
-            gNext = 0;
-
-            // -- First, fill all the available channels
-            int channel = 0;
-            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                ESP32RMTController::startNext(channel);
-                channel++;
-            }
-
-            // -- Make sure it's been at least 50ms since last show
-            mWait.wait();
-
-            // -- Start them all
-            for (int i = 0; i < channel; i++) {
-                ESP32RMTController * pController = gControllers[i];
-                pController->tx_start();
-            }
-
-            // -- Wait here while the rest of the data is sent. The interrupt handler
-            //    will keep refilling the RMT buffers until it is all sent; then it
-            //    gives the semaphore back.
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-            xSemaphoreGive(gTX_sem);
-
-            mWait.mark();
-
-            // -- Reset the counters
-            gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
-
-#if FASTLED_ESP32_FLASH_LOCK == 1
-            // -- Release the lock on flash operations
-            spi_flash_op_unlock();
-#endif
-        }
+        mRMTController.showPixels();
     }
 
     // -- Convert all pixels to RMT pulses
@@ -631,7 +329,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     void convertAllPixelData(PixelController<RGB_ORDER> & pixels)
     {
         // -- Make sure the data buffer is allocated
-        mRMTController->initPulseBuffer(pixels.size() * 3);
+        mRMTController.initPulseBuffer(pixels.size() * 3);
 
         // -- Cycle through the R,G, and B values in the right order,
         //    storing the pulses in the big buffer
@@ -639,11 +337,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         uint32_t byteval;
         while (pixels.has(1)) {
             byteval = pixels.loadAndScale0();
-            mRMTController->convertByte(byteval);
+            mRMTController.convertByte(byteval);
             byteval = pixels.loadAndScale1();
-            mRMTController->convertByte(byteval);
+            mRMTController.convertByte(byteval);
             byteval = pixels.loadAndScale2();
-            mRMTController->convertByte(byteval);
+            mRMTController.convertByte(byteval);
             pixels.advanceData();
             pixels.stepDithering();
         }

From 3b7a63feebf69c32b216ec41b12c13f21e9962ba Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 13 Jun 2020 21:57:00 -0400
Subject: [PATCH 166/204] Cleanup

---
 platforms/esp/32/clockless_esp32.h.orig | 786 ------------------------
 1 file changed, 786 deletions(-)
 delete mode 100644 platforms/esp/32/clockless_esp32.h.orig

diff --git a/platforms/esp/32/clockless_esp32.h.orig b/platforms/esp/32/clockless_esp32.h.orig
deleted file mode 100644
index e0cd00dae9..0000000000
--- a/platforms/esp/32/clockless_esp32.h.orig
+++ /dev/null
@@ -1,786 +0,0 @@
-/*
- * Integration into FastLED ClocklessController 2017 Thomas Basler
- *
- * Modifications Copyright (c) 2017 Martin F. Falatic
- *
- * Modifications Copyright (c) 2018 Samuel Z. Guyer
- *
- * ESP32 support is provided using the RMT peripheral device -- a unit
- * on the chip designed specifically for generating (and receiving)
- * precisely-timed digital signals. Nominally for use in infrared
- * remote controls, we use it to generate the signals for clockless
- * LED strips. The main advantage of using the RMT device is that,
- * once programmed, it generates the signal asynchronously, allowing
- * the CPU to continue executing other code. It is also not vulnerable
- * to interrupts or other timing problems that could disrupt the signal.
- *
- * The implementation strategy is borrowed from previous work and from
- * the RMT support built into the ESP32 IDF. The RMT device has 8
- * channels, which can be programmed independently to send sequences
- * of high/low bits. Memory for each channel is limited, however, so
- * in order to send a long sequence of bits, we need to continuously
- * refill the buffer until all the data is sent. To do this, we fill
- * half the buffer and then set an interrupt to go off when that half
- * is sent. Then we refill that half while the second half is being
- * sent. This strategy effectively overlaps computation (by the CPU)
- * and communication (by the RMT).
- *
- * Since the RMT device only has 8 channels, we need a strategy to
- * allow more than 8 LED controllers. Our driver assigns controllers
- * to channels on the fly, queuing up controllers as necessary until a
- * channel is free. The main showPixels routine just fires off the
- * first 8 controllers; the interrupt handler starts new controllers
- * asynchronously as previous ones finish. So, for example, it can
- * send the data for 8 controllers simultaneously, but 16 controllers
- * would take approximately twice as much time.
- *
- * There is a #define that allows a program to control the total
- * number of channels that the driver is allowed to use. It defaults
- * to 8 -- use all the channels. Setting it to 1, for example, results
- * in fully serial output:
- *
- *     #define FASTLED_RMT_MAX_CHANNELS 1
- *
- * OTHER RMT APPLICATIONS
- *
- * The default FastLED driver takes over control of the RMT interrupt
- * handler, making it hard to use the RMT device for other
- * (non-FastLED) purposes. You can change it's behavior to use the ESP
- * core driver instead, allowing other RMT applications to
- * co-exist. To switch to this mode, add the following directive
- * before you include FastLED.h:
- *
- *      #define FASTLED_RMT_BUILTIN_DRIVER
- *
- * There may be a performance penalty for using this mode. We need to
- * compute the RMT signal for the entire LED strip ahead of time,
- * rather than overlapping it with communication. We also need a large
- * buffer to hold the signal specification. Each bit of pixel data is
- * represented by a 32-bit pulse specification, so it is a 32X blow-up
- * in memory use.
- *
- *
- * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
- * http://insentricity.com *
- *
- */
-/*
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#pragma once
-
-FASTLED_NAMESPACE_BEGIN
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "esp32-hal.h"
-#include "esp_intr.h"
-#include "driver/gpio.h"
-#include "driver/rmt.h"
-#include "driver/periph_ctrl.h"
-#include "freertos/semphr.h"
-#include "soc/rmt_struct.h"
-
-#include "esp_log.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-  uint32_t cyc;
-  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-  return cyc;
-}
-
-#define FASTLED_HAS_CLOCKLESS 1
-
-// -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
-
-// -- Convert ESP32 cycles back into nanoseconds
-#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
-
-// -- Convert nanoseconds into RMT cycles
-#define F_CPU_RMT       (  80000000L)
-#define NS_PER_SEC      (1000000000L)
-#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
-#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
-#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
-
-// -- Convert ESP32 cycles to RMT cycles
-#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
-
-// -- Number of cycles to signal the strip to latch
-#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
-
-// -- Core or custom driver
-#ifndef FASTLED_RMT_BUILTIN_DRIVER
-#define FASTLED_RMT_BUILTIN_DRIVER false
-#endif
-
-// -- Max number of controllers we can support
-#ifndef FASTLED_RMT_MAX_CONTROLLERS
-#define FASTLED_RMT_MAX_CONTROLLERS 32
-#endif
-
-// -- Number of RMT channels to use (up to 8)
-//    Redefine this value to 1 to force serial output
-#ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
-#endif
-
-// -- Array of all controllers
-static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-static bool gInitialized = false;
-
-template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
-class ClocklessController : public CPixelLEDController<RGB_ORDER>
-{
-    // -- RMT has 8 channels, numbered 0 to 7
-    rmt_channel_t  mRMT_channel;
-
-    // -- Store the GPIO pin
-    gpio_num_t     mPin;
-<<<<<<< HEAD
-
-    // -- This instantiation forces a check on the pin choice
-    FastPin<DATA_PIN> mFastPin;
-
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
-=======
-
-    // -- Timing values for zero and one bits, derived from T1, T2, and T3
-    rmt_item32_t   mZero;
-    rmt_item32_t   mOne;
-
->>>>>>> upstream/master
-    // -- State information for keeping track of where we are in the pixel data
-    PixelController<RGB_ORDER> * mPixels = NULL;
-    void *         mPixelSpace = NULL;
-    uint8_t        mRGB_channel;
-    uint16_t       mCurPulse;
-
-    // -- Buffer to hold all of the pulses. For the version that uses
-    //    the RMT driver built into the ESP core.
-    rmt_item32_t * mBuffer;
-    uint16_t       mBufferSize;
-
-public:
-
-    virtual void init()
-    {
-        // -- Precompute rmt items corresponding to a zero bit and a one bit
-        //    according to the timing values given in the template instantiation
-        // T1H
-        mOne.level0 = 1;
-        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
-        // T1L
-        mOne.level1 = 0;
-        mOne.duration1 = TO_RMT_CYCLES(T3);
-
-        // T0H
-        mZero.level0 = 1;
-        mZero.duration0 = TO_RMT_CYCLES(T1);
-        // T0L
-        mZero.level1 = 0;
-        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
-
-<<<<<<< HEAD
-        gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-        mPin = gpio_num_t(DATA_PIN);
-=======
-	gControllers[gNumControllers] = this;
-        gNumControllers++;
-
-	mPin = gpio_num_t(DATA_PIN);
->>>>>>> upstream/master
-    }
-
-    virtual uint16_t getMaxRefreshRate() const { return 400; }
-
-protected:
-
-    void initRMT()
-    {
-<<<<<<< HEAD
-        // -- Only need to do this once
-        if (gInitialized) return;
-
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-            gOnChannel[i] = NULL;
-
-            // -- RMT configuration for transmission
-            rmt_config_t rmt_tx;
-            rmt_tx.channel = rmt_channel_t(i);
-            rmt_tx.rmt_mode = RMT_MODE_TX;
-            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-            rmt_tx.mem_block_num = 1;
-            rmt_tx.clk_div = DIVIDER;
-            rmt_tx.tx_config.loop_en = false;
-            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-            rmt_tx.tx_config.carrier_en = false;
-            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-            rmt_tx.tx_config.idle_output_en = true;
-                
-            // -- Apply the configuration
-            rmt_config(&rmt_tx);
-
-            if (FASTLED_RMT_BUILTIN_DRIVER) {
-                rmt_driver_install(rmt_channel_t(i), 0, 0);
-            } else {
-                // -- Set up the RMT to send 1/2 of the pulse buffer and then
-                //    generate an interrupt. When we get this interrupt we
-                //    fill the other half in preparation (kind of like double-buffering)
-                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-            }
-        }
-
-        // -- Create a semaphore to block execution until all the controllers are done
-        if (gTX_sem == NULL) {
-            gTX_sem = xSemaphoreCreateBinary();
-            xSemaphoreGive(gTX_sem);
-        }
-                
-        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Allocate the interrupt if we have not done so yet. This
-            //    interrupt handler must work for all different kinds of
-            //    strips, so it delegates to the refill function for each
-            //    specific instantiation of ClocklessController.
-            if (gRMT_intr_handle == NULL)
-                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-        }
-
-        gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-        if (gNumStarted == 0) {
-            // -- First controller: make sure everything is set up
-            initRMT();
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-        }
-
-        // -- Initialize the local state, save a pointer to the pixel
-        //    data. We need to make a copy because pixels is a local
-        //    variable in the calling function, and this data structure
-        //    needs to outlive this call to showPixels.
-
-        if (mPixels != NULL) delete mPixels;
-        mPixels = new PixelController<RGB_ORDER>(pixels);
-        
-        // -- Keep track of the number of strips we've seen
-        gNumStarted++;
-
-        // -- The last call to showPixels is the one responsible for doing
-        //    all of the actual worl
-        if (gNumStarted == gNumControllers) {
-            gNext = 0;
-
-            // -- First, fill all the available channels
-            int channel = 0;
-            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-                startNext(channel);
-                channel++;
-            }
-
-            // -- Wait here while the rest of the data is sent. The interrupt handler
-            //    will keep refilling the RMT buffers until it is all sent; then it
-            //    gives the semaphore back.
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-            xSemaphoreGive(gTX_sem);
-
-            // -- Reset the counters
-            gNumStarted = 0;
-            gNumDone = 0;
-            gNext = 0;
-        }
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-        if (gNext < gNumControllers) {
-            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-            pController->startOnChannel(channel);
-            gNext++;
-        }
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-        // -- Assign this channel and configure the RMT
-        mRMT_channel = rmt_channel_t(channel);
-
-        // -- Store a reference to this controller, so we can get it
-        //    inside the interrupt handler
-        gOnChannel[channel] = this;
-
-        // -- Assign the pin to this channel
-        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            // -- Use the built-in RMT driver to send all the data in one shot
-            rmt_register_tx_end_callback(doneOnChannel, 0);
-            writeAllRMTItems();
-        } else {
-            // -- Use our custom driver to send the data incrementally
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-        
-            // -- Initialize the counters that keep track of where we are in
-            //    the pixel data.
-            mCurPulse = 0;
-            mRGB_channel = 0;
-
-            // -- Fill both halves of the buffer
-            fillHalfRMTBuffer();
-            fillHalfRMTBuffer();
-
-            // -- Turn on the interrupts
-            rmt_set_tx_intr_en(mRMT_channel, true);
-            
-            // -- Start the RMT TX operation
-            rmt_tx_start(mRMT_channel, true);
-        }
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-        // -- Turn off output on the pin
-        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-        gOnChannel[channel] = NULL;
-        gNumDone++;
-
-        if (gNumDone == gNumControllers) {
-            // -- If this is the last controller, signal that we are all done
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-        } else {
-            // -- Otherwise, if there are still controllers waiting, then
-            //    start the next one on this channel
-            if (gNext < gNumControllers)
-                startNext(channel);
-        }
-=======
-	// -- Only need to do this once
-	if (gInitialized) return;
-
-	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-	    gOnChannel[i] = NULL;
-
-	    // -- RMT configuration for transmission
-	    rmt_config_t rmt_tx;
-	    rmt_tx.channel = rmt_channel_t(i);
-	    rmt_tx.rmt_mode = RMT_MODE_TX;
-	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
-	    rmt_tx.mem_block_num = 1;
-	    rmt_tx.clk_div = DIVIDER;
-	    rmt_tx.tx_config.loop_en = false;
-	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-	    rmt_tx.tx_config.carrier_en = false;
-	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-	    rmt_tx.tx_config.idle_output_en = true;
-		
-	    // -- Apply the configuration
-	    rmt_config(&rmt_tx);
-
-	    if (FASTLED_RMT_BUILTIN_DRIVER) {
-		rmt_driver_install(rmt_channel_t(i), 0, 0);
-	    } else {
-		// -- Set up the RMT to send 1/2 of the pulse buffer and then
-		//    generate an interrupt. When we get this interrupt we
-		//    fill the other half in preparation (kind of like double-buffering)
-		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
-	    }
-	}
-
-	// -- Create a semaphore to block execution until all the controllers are done
-	if (gTX_sem == NULL) {
-	    gTX_sem = xSemaphoreCreateBinary();
-	    xSemaphoreGive(gTX_sem);
-	}
-		
-	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Allocate the interrupt if we have not done so yet. This
-	    //    interrupt handler must work for all different kinds of
-	    //    strips, so it delegates to the refill function for each
-	    //    specific instantiation of ClocklessController.
-	    if (gRMT_intr_handle == NULL)
-		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
-	}
-
-	gInitialized = true;
-    }
-
-    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
-    {
-	if (gNumStarted == 0) {
-	    // -- First controller: make sure everything is set up
-	    initRMT();
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	}
-
-	// -- Initialize the local state, save a pointer to the pixel
-	//    data. We need to make a copy because pixels is a local
-	//    variable in the calling function, and this data structure
-	//    needs to outlive this call to showPixels.
-
-	if (mPixels != NULL) delete mPixels;
-	mPixels = new PixelController<RGB_ORDER>(pixels);
-	
-	// -- Keep track of the number of strips we've seen
-	gNumStarted++;
-
-	// -- The last call to showPixels is the one responsible for doing
-	//    all of the actual worl
-	if (gNumStarted == gNumControllers) {
-	    gNext = 0;
-
-	    // -- First, fill all the available channels
-	    int channel = 0;
-	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-		startNext(channel);
-		channel++;
-	    }
-
-	    // -- Wait here while the rest of the data is sent. The interrupt handler
-	    //    will keep refilling the RMT buffers until it is all sent; then it
-	    //    gives the semaphore back.
-	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
-	    xSemaphoreGive(gTX_sem);
-
-	    // -- Reset the counters
-	    gNumStarted = 0;
-	    gNumDone = 0;
-	    gNext = 0;
-	}
-    }
-
-    // -- Start up the next controller
-    //    This method is static so that it can dispatch to the appropriate
-    //    startOnChannel method of the given controller.
-    static void startNext(int channel)
-    {
-	if (gNext < gNumControllers) {
-	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
-	    pController->startOnChannel(channel);
-	    gNext++;
-	}
-    }
-
-    virtual void startOnChannel(int channel)
-    {
-	// -- Assign this channel and configure the RMT
-	mRMT_channel = rmt_channel_t(channel);
-
-	// -- Store a reference to this controller, so we can get it
-	//    inside the interrupt handler
-	gOnChannel[channel] = this;
-
-	// -- Assign the pin to this channel
-	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-	if (FASTLED_RMT_BUILTIN_DRIVER) {
-	    // -- Use the built-in RMT driver to send all the data in one shot
-	    rmt_register_tx_end_callback(doneOnChannel, 0);
-	    writeAllRMTItems();
-	} else {
-	    // -- Use our custom driver to send the data incrementally
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	
-	    // -- Initialize the counters that keep track of where we are in
-	    //    the pixel data.
-	    mCurPulse = 0;
-	    mRGB_channel = 0;
-
-	    // -- Fill both halves of the buffer
-	    fillHalfRMTBuffer();
-	    fillHalfRMTBuffer();
-
-	    // -- Turn on the interrupts
-	    rmt_set_tx_intr_en(mRMT_channel, true);
-	    
-	    // -- Start the RMT TX operation
-	    rmt_tx_start(mRMT_channel, true);
-	}
-    }
-
-    static void doneOnChannel(rmt_channel_t channel, void * arg)
-    {
-	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-        portBASE_TYPE HPTaskAwoken = 0;
-
-	// -- Turn off output on the pin
-	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
-
-	gOnChannel[channel] = NULL;
-	gNumDone++;
-
-	if (gNumDone == gNumControllers) {
-	    // -- If this is the last controller, signal that we are all done
-	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-	} else {
-	    // -- Otherwise, if there are still controllers waiting, then
-	    //    start the next one on this channel
-	    if (gNext < gNumControllers)
-		startNext(channel);
-	}
->>>>>>> upstream/master
-    }
-    
-    static IRAM_ATTR void interruptHandler(void *arg)
-    {
-        // -- The basic structure of this code is borrowed from the
-        //    interrupt handler in esp-idf/components/driver/rmt.c
-        uint32_t intr_st = RMT.int_st.val;
-        uint8_t channel;
-
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-            int tx_done_bit = channel * 3;
-            int tx_next_bit = channel + 24;
-
-            if (gOnChannel[channel] != NULL) {
-
-<<<<<<< HEAD
-                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-                // -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-                    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-                    controller->fillHalfRMTBuffer();
-                }
-
-                // -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    doneOnChannel(rmt_channel_t(channel), 0);
-=======
-		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
-
-		// -- More to send on this channel
-                if (intr_st & BIT(tx_next_bit)) {
-		    RMT.int_clr.val |= BIT(tx_next_bit);
-
-                    // -- Refill the half of the buffer that we just finished,
-                    //    allowing the other half to proceed.
-		    controller->fillHalfRMTBuffer();
-                }
-
-		// -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-		    doneOnChannel(rmt_channel_t(channel), 0);
->>>>>>> upstream/master
-                }
-            }
-        }
-    }
-
-    virtual void fillHalfRMTBuffer()
-    {
-        // -- Fill half of the RMT pulse buffer
-
-        //    The buffer holds 64 total pulse items, so this loop converts
-        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
-        //    32 items). In our case, each pixel consists of three bytes,
-        //    each bit turns into one pulse item -- 24 items per pixel. So,
-        //    each half of the buffer can hold 1 and 1/3 of a pixel.
-
-        //    The member variable mCurPulse keeps track of which of the 64
-        //    items we are writing. During the first call to this method it
-        //    fills 0-31; in the second call it fills 32-63, and then wraps
-        //    back around to zero.
-
-        //    When we run out of pixel data, just fill the remaining items
-        //    with zero pulses.
-
-        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
-        uint32_t byteval = 0;
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-        bool done_strip = false;
-
-        while (pulse_count < MAX_PULSES) {
-            if (! mPixels->has(1)) {
-<<<<<<< HEAD
-                if (mCurPulse > 0) {
-                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
-                    //    sure if this is really necessary.
-                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-                }
-=======
->>>>>>> upstream/master
-                done_strip = true;
-                break;
-            }
-
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-                byteval <<= 1;
-                mCurPulse++;
-                pulse_count++;
-            }
-<<<<<<< HEAD
-=======
-
-	    if (done_strip)
-		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
->>>>>>> upstream/master
-        }
-        
-        if (done_strip) {
-            // -- And fill the remaining items with zero pulses. The zero values triggers
-            //    the tx_done interrupt.
-            while (pulse_count < MAX_PULSES) {
-                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulse_count++;
-            }
-        }
-
-        // -- When we have filled the back half the buffer, reset the position to the first half
-        if (mCurPulse >= MAX_PULSES*2)
-            mCurPulse = 0;
-    }
-
-    virtual void writeAllRMTItems()
-    {
-        // -- Compute the pulse values for the whole strip at once.
-        //    Requires a large buffer
-<<<<<<< HEAD
-        mBufferSize = mPixels->size() * 3 * 8;
-=======
-	mBufferSize = mPixels->size() * 3 * 8;
->>>>>>> upstream/master
-
-        // TODO: need a specific number here
-        if (mBuffer == NULL) {
-            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-        }
-
-        mCurPulse = 0;
-        mRGB_channel = 0;
-        uint32_t byteval = 0;
-        while (mPixels->has(1)) {
-            // -- Cycle through the R,G, and B values in the right order
-            switch (mRGB_channel) {
-            case 0:
-                byteval = mPixels->loadAndScale0();
-                mRGB_channel = 1;
-                break;
-            case 1:
-                byteval = mPixels->loadAndScale1();
-                mRGB_channel = 2;
-                break;
-            case 2:
-                byteval = mPixels->loadAndScale2();
-                mPixels->advanceData();
-                mPixels->stepDithering();
-                mRGB_channel = 0;
-                break;
-            default:
-                break;
-            }
-
-            byteval <<= 24;
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
-                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-                byteval <<= 1;
-                mCurPulse++;
-            }
-        }
-
-        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
-        assert(mCurPulse == mBufferSize);
-
-<<<<<<< HEAD
-        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-=======
-	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
->>>>>>> upstream/master
-    }
-};
-
-FASTLED_NAMESPACE_END

From b70d8689c0451a742689899ad103bc0e7851d857 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 13 Jun 2020 22:11:39 -0400
Subject: [PATCH 167/204] Cleanup and commenting

---
 platforms/esp/32/clockless_rmt_esp32.cpp | 10 +++++++++-
 platforms/esp/32/clockless_rmt_esp32.h   | 23 +++++++++++++++++++----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.cpp b/platforms/esp/32/clockless_rmt_esp32.cpp
index b3f106da5e..db9c334126 100644
--- a/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -8,6 +8,8 @@
 class ESP32RMTController;
 
 // -- Array of all controllers
+//    This array is filled at the time controllers are registered 
+//    (Usually when the sketch calls addLeds)
 static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
 
 // -- Current set of active controllers, indexed by the RMT
@@ -28,7 +30,13 @@ static xSemaphoreHandle gTX_sem = NULL;
 static bool gInitialized = false;
 
 ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
-    : mPixelData(0), mSize(0), mCur(0), mWhichHalf(0)
+    : mPixelData(0), 
+      mSize(0), 
+      mCur(0), 
+      mWhichHalf(0),
+      mBuffer(0),
+      mBufferSize(0),
+      mCurPulse(0)
 {
     // -- Precompute rmt items corresponding to a zero bit and a one bit
     //    according to the timing values given in the template instantiation
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index de70a00ae5..2f02ac8b9e 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -1,6 +1,6 @@
 /*
  * Integration into FastLED ClocklessController
- * Copyright (c) 2018 Samuel Z. Guyer
+ * Copyright (c) 2018,2019,2020 Samuel Z. Guyer
  * Copyright (c) 2017 Thomas Basler
  * Copyright (c) 2017 Martin F. Falatic
  *
@@ -66,6 +66,20 @@
  *
  * #define FASTLED_ESP32_FLASH_LOCK 1
  *
+ * NEW (June 2020): The RMT controller has been split into two
+ *      classes: ClocklessController, which is an instantiation of the
+ *      FastLED CPixelLEDController template, and ESP32RMTController,
+ *      which just handles driving the RMT peripheral. One benefit of
+ *      this design is that ESP32RMTContoller is not a template, so
+ *      its methods can be marked with the IRAM_ATTR and end up in
+ *      IRAM memory. Another benefit is that all of the color channel
+ *      processing is done up-front, in the templated class, so we
+ *      can fill the RMT buffers more quickly.
+ *
+ *      IN THEORY, this design would also allow FastLED.show() to 
+ *      send the data while the program continues to prepare the next
+ *      frame of data.
+ *
  * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
  * http://insentricity.com *
  *
@@ -124,12 +138,13 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define FASTLED_HAS_CLOCKLESS 1
 #define NUM_COLOR_CHANNELS 3
 
+// NOT CURRENTLY IMPLEMENTED:
 // -- Set to true to print debugging information about timing
 //    Useful for finding out if timing is being messed up by other things
 //    on the processor (WiFi, for example)
-#ifndef FASTLED_RMT_SHOW_TIMER
-#define FASTLED_RMT_SHOW_TIMER false
-#endif
+//#ifndef FASTLED_RMT_SHOW_TIMER
+//#define FASTLED_RMT_SHOW_TIMER false
+//#endif
 
 // -- Configuration constants
 #define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */

From cb435ef80603d8b59dff5fc589071abcffaaaedf Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 13 Jun 2020 22:15:40 -0400
Subject: [PATCH 168/204] Restructure for 1.5.x Arduino libraries: all of the
 code goes under the src directory

---
 FastLED.cpp => src/FastLED.cpp                                    | 0
 FastLED.h => src/FastLED.h                                        | 0
 bitswap.cpp => src/bitswap.cpp                                    | 0
 bitswap.h => src/bitswap.h                                        | 0
 chipsets.h => src/chipsets.h                                      | 0
 color.h => src/color.h                                            | 0
 colorpalettes.cpp => src/colorpalettes.cpp                        | 0
 colorpalettes.h => src/colorpalettes.h                            | 0
 colorutils.cpp => src/colorutils.cpp                              | 0
 colorutils.h => src/colorutils.h                                  | 0
 controller.h => src/controller.h                                  | 0
 cpp_compat.h => src/cpp_compat.h                                  | 0
 dmx.h => src/dmx.h                                                | 0
 fastled_config.h => src/fastled_config.h                          | 0
 fastled_delay.h => src/fastled_delay.h                            | 0
 fastled_progmem.h => src/fastled_progmem.h                        | 0
 fastpin.h => src/fastpin.h                                        | 0
 fastspi.h => src/fastspi.h                                        | 0
 fastspi_bitbang.h => src/fastspi_bitbang.h                        | 0
 fastspi_dma.h => src/fastspi_dma.h                                | 0
 fastspi_nop.h => src/fastspi_nop.h                                | 0
 fastspi_ref.h => src/fastspi_ref.h                                | 0
 fastspi_types.h => src/fastspi_types.h                            | 0
 hsv2rgb.cpp => src/hsv2rgb.cpp                                    | 0
 hsv2rgb.h => src/hsv2rgb.h                                        | 0
 led_sysdefs.h => src/led_sysdefs.h                                | 0
 lib8tion.cpp => src/lib8tion.cpp                                  | 0
 lib8tion.h => src/lib8tion.h                                      | 0
 {lib8tion => src/lib8tion}/math8.h                                | 0
 {lib8tion => src/lib8tion}/random8.h                              | 0
 {lib8tion => src/lib8tion}/scale8.h                               | 0
 {lib8tion => src/lib8tion}/trig8.h                                | 0
 noise.cpp => src/noise.cpp                                        | 0
 noise.h => src/noise.h                                            | 0
 pixelset.h => src/pixelset.h                                      | 0
 pixeltypes.h => src/pixeltypes.h                                  | 0
 platforms.cpp => src/platforms.cpp                                | 0
 platforms.h => src/platforms.h                                    | 0
 {platforms => src/platforms}/apollo3/clockless_apollo3.h          | 0
 {platforms => src/platforms}/apollo3/fastled_apollo3.h            | 0
 {platforms => src/platforms}/apollo3/fastpin_apollo3.h            | 0
 {platforms => src/platforms}/apollo3/fastspi_apollo3.h            | 0
 {platforms => src/platforms}/apollo3/led_sysdefs_apollo3.h        | 0
 {platforms => src/platforms}/arm/common/m0clockless.h             | 0
 {platforms => src/platforms}/arm/d21/clockless_arm_d21.h          | 0
 {platforms => src/platforms}/arm/d21/fastled_arm_d21.h            | 0
 {platforms => src/platforms}/arm/d21/fastpin_arm_d21.h            | 0
 {platforms => src/platforms}/arm/d21/led_sysdefs_arm_d21.h        | 0
 {platforms => src/platforms}/arm/d51/README.txt                   | 0
 {platforms => src/platforms}/arm/d51/clockless_arm_d51.h          | 0
 {platforms => src/platforms}/arm/d51/fastled_arm_d51.h            | 0
 {platforms => src/platforms}/arm/d51/fastpin_arm_d51.h            | 0
 {platforms => src/platforms}/arm/d51/led_sysdefs_arm_d51.h        | 0
 {platforms => src/platforms}/arm/k20/clockless_arm_k20.h          | 0
 {platforms => src/platforms}/arm/k20/clockless_block_arm_k20.h    | 0
 {platforms => src/platforms}/arm/k20/fastled_arm_k20.h            | 0
 {platforms => src/platforms}/arm/k20/fastpin_arm_k20.h            | 0
 {platforms => src/platforms}/arm/k20/fastspi_arm_k20.h            | 0
 {platforms => src/platforms}/arm/k20/led_sysdefs_arm_k20.h        | 0
 {platforms => src/platforms}/arm/k20/octows2811_controller.h      | 0
 {platforms => src/platforms}/arm/k20/smartmatrix_t3.h             | 0
 {platforms => src/platforms}/arm/k20/ws2812serial_controller.h    | 0
 {platforms => src/platforms}/arm/k66/clockless_arm_k66.h          | 0
 {platforms => src/platforms}/arm/k66/clockless_block_arm_k66.h    | 0
 {platforms => src/platforms}/arm/k66/fastled_arm_k66.h            | 0
 {platforms => src/platforms}/arm/k66/fastpin_arm_k66.h            | 0
 {platforms => src/platforms}/arm/k66/fastspi_arm_k66.h            | 0
 {platforms => src/platforms}/arm/k66/led_sysdefs_arm_k66.h        | 0
 {platforms => src/platforms}/arm/kl26/clockless_arm_kl26.h        | 0
 {platforms => src/platforms}/arm/kl26/fastled_arm_kl26.h          | 0
 {platforms => src/platforms}/arm/kl26/fastpin_arm_kl26.h          | 0
 {platforms => src/platforms}/arm/kl26/fastspi_arm_kl26.h          | 0
 {platforms => src/platforms}/arm/kl26/led_sysdefs_arm_kl26.h      | 0
 .../platforms}/arm/mxrt1062/block_clockless_arm_mxrt1062.h        | 0
 .../platforms}/arm/mxrt1062/clockless_arm_mxrt1062.h              | 0
 {platforms => src/platforms}/arm/mxrt1062/fastled_arm_mxrt1062.h  | 0
 {platforms => src/platforms}/arm/mxrt1062/fastpin_arm_mxrt1062.h  | 0
 {platforms => src/platforms}/arm/mxrt1062/fastspi_arm_mxrt1062.h  | 0
 .../platforms}/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h            | 0
 {platforms => src/platforms}/arm/nrf51/clockless_arm_nrf51.h      | 0
 {platforms => src/platforms}/arm/nrf51/fastled_arm_nrf51.h        | 0
 {platforms => src/platforms}/arm/nrf51/fastpin_arm_nrf51.h        | 0
 {platforms => src/platforms}/arm/nrf51/fastspi_arm_nrf51.h        | 0
 {platforms => src/platforms}/arm/nrf51/led_sysdefs_arm_nrf51.h    | 0
 {platforms => src/platforms}/arm/nrf52/arbiter_nrf52.h            | 0
 {platforms => src/platforms}/arm/nrf52/clockless_arm_nrf52.h      | 0
 {platforms => src/platforms}/arm/nrf52/fastled_arm_nrf52.h        | 0
 {platforms => src/platforms}/arm/nrf52/fastpin_arm_nrf52.h        | 0
 .../platforms}/arm/nrf52/fastpin_arm_nrf52_variants.h             | 0
 {platforms => src/platforms}/arm/nrf52/fastspi_arm_nrf52.h        | 0
 {platforms => src/platforms}/arm/nrf52/led_sysdefs_arm_nrf52.h    | 0
 {platforms => src/platforms}/arm/sam/clockless_arm_sam.h          | 0
 {platforms => src/platforms}/arm/sam/clockless_block_arm_sam.h    | 0
 {platforms => src/platforms}/arm/sam/fastled_arm_sam.h            | 0
 {platforms => src/platforms}/arm/sam/fastpin_arm_sam.h            | 0
 {platforms => src/platforms}/arm/sam/fastspi_arm_sam.h            | 0
 {platforms => src/platforms}/arm/sam/led_sysdefs_arm_sam.h        | 0
 {platforms => src/platforms}/arm/stm32/clockless_arm_stm32.h      | 0
 {platforms => src/platforms}/arm/stm32/cm3_regs.h                 | 0
 {platforms => src/platforms}/arm/stm32/fastled_arm_stm32.h        | 0
 {platforms => src/platforms}/arm/stm32/fastpin_arm_stm32.h        | 0
 {platforms => src/platforms}/arm/stm32/led_sysdefs_arm_stm32.h    | 0
 {platforms => src/platforms}/avr/clockless_trinket.h              | 0
 {platforms => src/platforms}/avr/fastled_avr.h                    | 0
 {platforms => src/platforms}/avr/fastpin_avr.h                    | 0
 {platforms => src/platforms}/avr/fastspi_avr.h                    | 0
 {platforms => src/platforms}/avr/led_sysdefs_avr.h                | 0
 {platforms => src/platforms}/esp/32/clockless_block_esp32.h       | 0
 {platforms => src/platforms}/esp/32/clockless_i2s_esp32.h         | 0
 {platforms => src/platforms}/esp/32/clockless_rmt_esp32.cpp       | 0
 {platforms => src/platforms}/esp/32/clockless_rmt_esp32.h         | 0
 {platforms => src/platforms}/esp/32/fastled_esp32.h               | 0
 {platforms => src/platforms}/esp/32/fastpin_esp32.h               | 0
 {platforms => src/platforms}/esp/32/led_sysdefs_esp32.h           | 0
 {platforms => src/platforms}/esp/8266/clockless_block_esp8266.h   | 0
 {platforms => src/platforms}/esp/8266/clockless_esp8266.h         | 0
 {platforms => src/platforms}/esp/8266/fastled_esp8266.h           | 0
 {platforms => src/platforms}/esp/8266/fastpin_esp8266.h           | 0
 {platforms => src/platforms}/esp/8266/led_sysdefs_esp8266.h       | 0
 power_mgt.cpp => src/power_mgt.cpp                                | 0
 power_mgt.h => src/power_mgt.h                                    | 0
 wiring.cpp => src/wiring.cpp                                      | 0
 122 files changed, 0 insertions(+), 0 deletions(-)
 rename FastLED.cpp => src/FastLED.cpp (100%)
 rename FastLED.h => src/FastLED.h (100%)
 rename bitswap.cpp => src/bitswap.cpp (100%)
 rename bitswap.h => src/bitswap.h (100%)
 rename chipsets.h => src/chipsets.h (100%)
 rename color.h => src/color.h (100%)
 rename colorpalettes.cpp => src/colorpalettes.cpp (100%)
 rename colorpalettes.h => src/colorpalettes.h (100%)
 rename colorutils.cpp => src/colorutils.cpp (100%)
 rename colorutils.h => src/colorutils.h (100%)
 rename controller.h => src/controller.h (100%)
 rename cpp_compat.h => src/cpp_compat.h (100%)
 rename dmx.h => src/dmx.h (100%)
 rename fastled_config.h => src/fastled_config.h (100%)
 rename fastled_delay.h => src/fastled_delay.h (100%)
 rename fastled_progmem.h => src/fastled_progmem.h (100%)
 rename fastpin.h => src/fastpin.h (100%)
 rename fastspi.h => src/fastspi.h (100%)
 rename fastspi_bitbang.h => src/fastspi_bitbang.h (100%)
 rename fastspi_dma.h => src/fastspi_dma.h (100%)
 rename fastspi_nop.h => src/fastspi_nop.h (100%)
 rename fastspi_ref.h => src/fastspi_ref.h (100%)
 rename fastspi_types.h => src/fastspi_types.h (100%)
 rename hsv2rgb.cpp => src/hsv2rgb.cpp (100%)
 rename hsv2rgb.h => src/hsv2rgb.h (100%)
 rename led_sysdefs.h => src/led_sysdefs.h (100%)
 rename lib8tion.cpp => src/lib8tion.cpp (100%)
 rename lib8tion.h => src/lib8tion.h (100%)
 rename {lib8tion => src/lib8tion}/math8.h (100%)
 rename {lib8tion => src/lib8tion}/random8.h (100%)
 rename {lib8tion => src/lib8tion}/scale8.h (100%)
 rename {lib8tion => src/lib8tion}/trig8.h (100%)
 rename noise.cpp => src/noise.cpp (100%)
 rename noise.h => src/noise.h (100%)
 rename pixelset.h => src/pixelset.h (100%)
 rename pixeltypes.h => src/pixeltypes.h (100%)
 rename platforms.cpp => src/platforms.cpp (100%)
 rename platforms.h => src/platforms.h (100%)
 rename {platforms => src/platforms}/apollo3/clockless_apollo3.h (100%)
 rename {platforms => src/platforms}/apollo3/fastled_apollo3.h (100%)
 rename {platforms => src/platforms}/apollo3/fastpin_apollo3.h (100%)
 rename {platforms => src/platforms}/apollo3/fastspi_apollo3.h (100%)
 rename {platforms => src/platforms}/apollo3/led_sysdefs_apollo3.h (100%)
 rename {platforms => src/platforms}/arm/common/m0clockless.h (100%)
 rename {platforms => src/platforms}/arm/d21/clockless_arm_d21.h (100%)
 rename {platforms => src/platforms}/arm/d21/fastled_arm_d21.h (100%)
 rename {platforms => src/platforms}/arm/d21/fastpin_arm_d21.h (100%)
 rename {platforms => src/platforms}/arm/d21/led_sysdefs_arm_d21.h (100%)
 rename {platforms => src/platforms}/arm/d51/README.txt (100%)
 rename {platforms => src/platforms}/arm/d51/clockless_arm_d51.h (100%)
 rename {platforms => src/platforms}/arm/d51/fastled_arm_d51.h (100%)
 rename {platforms => src/platforms}/arm/d51/fastpin_arm_d51.h (100%)
 rename {platforms => src/platforms}/arm/d51/led_sysdefs_arm_d51.h (100%)
 rename {platforms => src/platforms}/arm/k20/clockless_arm_k20.h (100%)
 rename {platforms => src/platforms}/arm/k20/clockless_block_arm_k20.h (100%)
 rename {platforms => src/platforms}/arm/k20/fastled_arm_k20.h (100%)
 rename {platforms => src/platforms}/arm/k20/fastpin_arm_k20.h (100%)
 rename {platforms => src/platforms}/arm/k20/fastspi_arm_k20.h (100%)
 rename {platforms => src/platforms}/arm/k20/led_sysdefs_arm_k20.h (100%)
 rename {platforms => src/platforms}/arm/k20/octows2811_controller.h (100%)
 rename {platforms => src/platforms}/arm/k20/smartmatrix_t3.h (100%)
 rename {platforms => src/platforms}/arm/k20/ws2812serial_controller.h (100%)
 rename {platforms => src/platforms}/arm/k66/clockless_arm_k66.h (100%)
 rename {platforms => src/platforms}/arm/k66/clockless_block_arm_k66.h (100%)
 rename {platforms => src/platforms}/arm/k66/fastled_arm_k66.h (100%)
 rename {platforms => src/platforms}/arm/k66/fastpin_arm_k66.h (100%)
 rename {platforms => src/platforms}/arm/k66/fastspi_arm_k66.h (100%)
 rename {platforms => src/platforms}/arm/k66/led_sysdefs_arm_k66.h (100%)
 rename {platforms => src/platforms}/arm/kl26/clockless_arm_kl26.h (100%)
 rename {platforms => src/platforms}/arm/kl26/fastled_arm_kl26.h (100%)
 rename {platforms => src/platforms}/arm/kl26/fastpin_arm_kl26.h (100%)
 rename {platforms => src/platforms}/arm/kl26/fastspi_arm_kl26.h (100%)
 rename {platforms => src/platforms}/arm/kl26/led_sysdefs_arm_kl26.h (100%)
 rename {platforms => src/platforms}/arm/mxrt1062/block_clockless_arm_mxrt1062.h (100%)
 rename {platforms => src/platforms}/arm/mxrt1062/clockless_arm_mxrt1062.h (100%)
 rename {platforms => src/platforms}/arm/mxrt1062/fastled_arm_mxrt1062.h (100%)
 rename {platforms => src/platforms}/arm/mxrt1062/fastpin_arm_mxrt1062.h (100%)
 rename {platforms => src/platforms}/arm/mxrt1062/fastspi_arm_mxrt1062.h (100%)
 rename {platforms => src/platforms}/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h (100%)
 rename {platforms => src/platforms}/arm/nrf51/clockless_arm_nrf51.h (100%)
 rename {platforms => src/platforms}/arm/nrf51/fastled_arm_nrf51.h (100%)
 rename {platforms => src/platforms}/arm/nrf51/fastpin_arm_nrf51.h (100%)
 rename {platforms => src/platforms}/arm/nrf51/fastspi_arm_nrf51.h (100%)
 rename {platforms => src/platforms}/arm/nrf51/led_sysdefs_arm_nrf51.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/arbiter_nrf52.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/clockless_arm_nrf52.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/fastled_arm_nrf52.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/fastpin_arm_nrf52.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/fastpin_arm_nrf52_variants.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/fastspi_arm_nrf52.h (100%)
 rename {platforms => src/platforms}/arm/nrf52/led_sysdefs_arm_nrf52.h (100%)
 rename {platforms => src/platforms}/arm/sam/clockless_arm_sam.h (100%)
 rename {platforms => src/platforms}/arm/sam/clockless_block_arm_sam.h (100%)
 rename {platforms => src/platforms}/arm/sam/fastled_arm_sam.h (100%)
 rename {platforms => src/platforms}/arm/sam/fastpin_arm_sam.h (100%)
 rename {platforms => src/platforms}/arm/sam/fastspi_arm_sam.h (100%)
 rename {platforms => src/platforms}/arm/sam/led_sysdefs_arm_sam.h (100%)
 rename {platforms => src/platforms}/arm/stm32/clockless_arm_stm32.h (100%)
 rename {platforms => src/platforms}/arm/stm32/cm3_regs.h (100%)
 rename {platforms => src/platforms}/arm/stm32/fastled_arm_stm32.h (100%)
 rename {platforms => src/platforms}/arm/stm32/fastpin_arm_stm32.h (100%)
 rename {platforms => src/platforms}/arm/stm32/led_sysdefs_arm_stm32.h (100%)
 rename {platforms => src/platforms}/avr/clockless_trinket.h (100%)
 rename {platforms => src/platforms}/avr/fastled_avr.h (100%)
 rename {platforms => src/platforms}/avr/fastpin_avr.h (100%)
 rename {platforms => src/platforms}/avr/fastspi_avr.h (100%)
 rename {platforms => src/platforms}/avr/led_sysdefs_avr.h (100%)
 rename {platforms => src/platforms}/esp/32/clockless_block_esp32.h (100%)
 rename {platforms => src/platforms}/esp/32/clockless_i2s_esp32.h (100%)
 rename {platforms => src/platforms}/esp/32/clockless_rmt_esp32.cpp (100%)
 rename {platforms => src/platforms}/esp/32/clockless_rmt_esp32.h (100%)
 rename {platforms => src/platforms}/esp/32/fastled_esp32.h (100%)
 rename {platforms => src/platforms}/esp/32/fastpin_esp32.h (100%)
 rename {platforms => src/platforms}/esp/32/led_sysdefs_esp32.h (100%)
 rename {platforms => src/platforms}/esp/8266/clockless_block_esp8266.h (100%)
 rename {platforms => src/platforms}/esp/8266/clockless_esp8266.h (100%)
 rename {platforms => src/platforms}/esp/8266/fastled_esp8266.h (100%)
 rename {platforms => src/platforms}/esp/8266/fastpin_esp8266.h (100%)
 rename {platforms => src/platforms}/esp/8266/led_sysdefs_esp8266.h (100%)
 rename power_mgt.cpp => src/power_mgt.cpp (100%)
 rename power_mgt.h => src/power_mgt.h (100%)
 rename wiring.cpp => src/wiring.cpp (100%)

diff --git a/FastLED.cpp b/src/FastLED.cpp
similarity index 100%
rename from FastLED.cpp
rename to src/FastLED.cpp
diff --git a/FastLED.h b/src/FastLED.h
similarity index 100%
rename from FastLED.h
rename to src/FastLED.h
diff --git a/bitswap.cpp b/src/bitswap.cpp
similarity index 100%
rename from bitswap.cpp
rename to src/bitswap.cpp
diff --git a/bitswap.h b/src/bitswap.h
similarity index 100%
rename from bitswap.h
rename to src/bitswap.h
diff --git a/chipsets.h b/src/chipsets.h
similarity index 100%
rename from chipsets.h
rename to src/chipsets.h
diff --git a/color.h b/src/color.h
similarity index 100%
rename from color.h
rename to src/color.h
diff --git a/colorpalettes.cpp b/src/colorpalettes.cpp
similarity index 100%
rename from colorpalettes.cpp
rename to src/colorpalettes.cpp
diff --git a/colorpalettes.h b/src/colorpalettes.h
similarity index 100%
rename from colorpalettes.h
rename to src/colorpalettes.h
diff --git a/colorutils.cpp b/src/colorutils.cpp
similarity index 100%
rename from colorutils.cpp
rename to src/colorutils.cpp
diff --git a/colorutils.h b/src/colorutils.h
similarity index 100%
rename from colorutils.h
rename to src/colorutils.h
diff --git a/controller.h b/src/controller.h
similarity index 100%
rename from controller.h
rename to src/controller.h
diff --git a/cpp_compat.h b/src/cpp_compat.h
similarity index 100%
rename from cpp_compat.h
rename to src/cpp_compat.h
diff --git a/dmx.h b/src/dmx.h
similarity index 100%
rename from dmx.h
rename to src/dmx.h
diff --git a/fastled_config.h b/src/fastled_config.h
similarity index 100%
rename from fastled_config.h
rename to src/fastled_config.h
diff --git a/fastled_delay.h b/src/fastled_delay.h
similarity index 100%
rename from fastled_delay.h
rename to src/fastled_delay.h
diff --git a/fastled_progmem.h b/src/fastled_progmem.h
similarity index 100%
rename from fastled_progmem.h
rename to src/fastled_progmem.h
diff --git a/fastpin.h b/src/fastpin.h
similarity index 100%
rename from fastpin.h
rename to src/fastpin.h
diff --git a/fastspi.h b/src/fastspi.h
similarity index 100%
rename from fastspi.h
rename to src/fastspi.h
diff --git a/fastspi_bitbang.h b/src/fastspi_bitbang.h
similarity index 100%
rename from fastspi_bitbang.h
rename to src/fastspi_bitbang.h
diff --git a/fastspi_dma.h b/src/fastspi_dma.h
similarity index 100%
rename from fastspi_dma.h
rename to src/fastspi_dma.h
diff --git a/fastspi_nop.h b/src/fastspi_nop.h
similarity index 100%
rename from fastspi_nop.h
rename to src/fastspi_nop.h
diff --git a/fastspi_ref.h b/src/fastspi_ref.h
similarity index 100%
rename from fastspi_ref.h
rename to src/fastspi_ref.h
diff --git a/fastspi_types.h b/src/fastspi_types.h
similarity index 100%
rename from fastspi_types.h
rename to src/fastspi_types.h
diff --git a/hsv2rgb.cpp b/src/hsv2rgb.cpp
similarity index 100%
rename from hsv2rgb.cpp
rename to src/hsv2rgb.cpp
diff --git a/hsv2rgb.h b/src/hsv2rgb.h
similarity index 100%
rename from hsv2rgb.h
rename to src/hsv2rgb.h
diff --git a/led_sysdefs.h b/src/led_sysdefs.h
similarity index 100%
rename from led_sysdefs.h
rename to src/led_sysdefs.h
diff --git a/lib8tion.cpp b/src/lib8tion.cpp
similarity index 100%
rename from lib8tion.cpp
rename to src/lib8tion.cpp
diff --git a/lib8tion.h b/src/lib8tion.h
similarity index 100%
rename from lib8tion.h
rename to src/lib8tion.h
diff --git a/lib8tion/math8.h b/src/lib8tion/math8.h
similarity index 100%
rename from lib8tion/math8.h
rename to src/lib8tion/math8.h
diff --git a/lib8tion/random8.h b/src/lib8tion/random8.h
similarity index 100%
rename from lib8tion/random8.h
rename to src/lib8tion/random8.h
diff --git a/lib8tion/scale8.h b/src/lib8tion/scale8.h
similarity index 100%
rename from lib8tion/scale8.h
rename to src/lib8tion/scale8.h
diff --git a/lib8tion/trig8.h b/src/lib8tion/trig8.h
similarity index 100%
rename from lib8tion/trig8.h
rename to src/lib8tion/trig8.h
diff --git a/noise.cpp b/src/noise.cpp
similarity index 100%
rename from noise.cpp
rename to src/noise.cpp
diff --git a/noise.h b/src/noise.h
similarity index 100%
rename from noise.h
rename to src/noise.h
diff --git a/pixelset.h b/src/pixelset.h
similarity index 100%
rename from pixelset.h
rename to src/pixelset.h
diff --git a/pixeltypes.h b/src/pixeltypes.h
similarity index 100%
rename from pixeltypes.h
rename to src/pixeltypes.h
diff --git a/platforms.cpp b/src/platforms.cpp
similarity index 100%
rename from platforms.cpp
rename to src/platforms.cpp
diff --git a/platforms.h b/src/platforms.h
similarity index 100%
rename from platforms.h
rename to src/platforms.h
diff --git a/platforms/apollo3/clockless_apollo3.h b/src/platforms/apollo3/clockless_apollo3.h
similarity index 100%
rename from platforms/apollo3/clockless_apollo3.h
rename to src/platforms/apollo3/clockless_apollo3.h
diff --git a/platforms/apollo3/fastled_apollo3.h b/src/platforms/apollo3/fastled_apollo3.h
similarity index 100%
rename from platforms/apollo3/fastled_apollo3.h
rename to src/platforms/apollo3/fastled_apollo3.h
diff --git a/platforms/apollo3/fastpin_apollo3.h b/src/platforms/apollo3/fastpin_apollo3.h
similarity index 100%
rename from platforms/apollo3/fastpin_apollo3.h
rename to src/platforms/apollo3/fastpin_apollo3.h
diff --git a/platforms/apollo3/fastspi_apollo3.h b/src/platforms/apollo3/fastspi_apollo3.h
similarity index 100%
rename from platforms/apollo3/fastspi_apollo3.h
rename to src/platforms/apollo3/fastspi_apollo3.h
diff --git a/platforms/apollo3/led_sysdefs_apollo3.h b/src/platforms/apollo3/led_sysdefs_apollo3.h
similarity index 100%
rename from platforms/apollo3/led_sysdefs_apollo3.h
rename to src/platforms/apollo3/led_sysdefs_apollo3.h
diff --git a/platforms/arm/common/m0clockless.h b/src/platforms/arm/common/m0clockless.h
similarity index 100%
rename from platforms/arm/common/m0clockless.h
rename to src/platforms/arm/common/m0clockless.h
diff --git a/platforms/arm/d21/clockless_arm_d21.h b/src/platforms/arm/d21/clockless_arm_d21.h
similarity index 100%
rename from platforms/arm/d21/clockless_arm_d21.h
rename to src/platforms/arm/d21/clockless_arm_d21.h
diff --git a/platforms/arm/d21/fastled_arm_d21.h b/src/platforms/arm/d21/fastled_arm_d21.h
similarity index 100%
rename from platforms/arm/d21/fastled_arm_d21.h
rename to src/platforms/arm/d21/fastled_arm_d21.h
diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/src/platforms/arm/d21/fastpin_arm_d21.h
similarity index 100%
rename from platforms/arm/d21/fastpin_arm_d21.h
rename to src/platforms/arm/d21/fastpin_arm_d21.h
diff --git a/platforms/arm/d21/led_sysdefs_arm_d21.h b/src/platforms/arm/d21/led_sysdefs_arm_d21.h
similarity index 100%
rename from platforms/arm/d21/led_sysdefs_arm_d21.h
rename to src/platforms/arm/d21/led_sysdefs_arm_d21.h
diff --git a/platforms/arm/d51/README.txt b/src/platforms/arm/d51/README.txt
similarity index 100%
rename from platforms/arm/d51/README.txt
rename to src/platforms/arm/d51/README.txt
diff --git a/platforms/arm/d51/clockless_arm_d51.h b/src/platforms/arm/d51/clockless_arm_d51.h
similarity index 100%
rename from platforms/arm/d51/clockless_arm_d51.h
rename to src/platforms/arm/d51/clockless_arm_d51.h
diff --git a/platforms/arm/d51/fastled_arm_d51.h b/src/platforms/arm/d51/fastled_arm_d51.h
similarity index 100%
rename from platforms/arm/d51/fastled_arm_d51.h
rename to src/platforms/arm/d51/fastled_arm_d51.h
diff --git a/platforms/arm/d51/fastpin_arm_d51.h b/src/platforms/arm/d51/fastpin_arm_d51.h
similarity index 100%
rename from platforms/arm/d51/fastpin_arm_d51.h
rename to src/platforms/arm/d51/fastpin_arm_d51.h
diff --git a/platforms/arm/d51/led_sysdefs_arm_d51.h b/src/platforms/arm/d51/led_sysdefs_arm_d51.h
similarity index 100%
rename from platforms/arm/d51/led_sysdefs_arm_d51.h
rename to src/platforms/arm/d51/led_sysdefs_arm_d51.h
diff --git a/platforms/arm/k20/clockless_arm_k20.h b/src/platforms/arm/k20/clockless_arm_k20.h
similarity index 100%
rename from platforms/arm/k20/clockless_arm_k20.h
rename to src/platforms/arm/k20/clockless_arm_k20.h
diff --git a/platforms/arm/k20/clockless_block_arm_k20.h b/src/platforms/arm/k20/clockless_block_arm_k20.h
similarity index 100%
rename from platforms/arm/k20/clockless_block_arm_k20.h
rename to src/platforms/arm/k20/clockless_block_arm_k20.h
diff --git a/platforms/arm/k20/fastled_arm_k20.h b/src/platforms/arm/k20/fastled_arm_k20.h
similarity index 100%
rename from platforms/arm/k20/fastled_arm_k20.h
rename to src/platforms/arm/k20/fastled_arm_k20.h
diff --git a/platforms/arm/k20/fastpin_arm_k20.h b/src/platforms/arm/k20/fastpin_arm_k20.h
similarity index 100%
rename from platforms/arm/k20/fastpin_arm_k20.h
rename to src/platforms/arm/k20/fastpin_arm_k20.h
diff --git a/platforms/arm/k20/fastspi_arm_k20.h b/src/platforms/arm/k20/fastspi_arm_k20.h
similarity index 100%
rename from platforms/arm/k20/fastspi_arm_k20.h
rename to src/platforms/arm/k20/fastspi_arm_k20.h
diff --git a/platforms/arm/k20/led_sysdefs_arm_k20.h b/src/platforms/arm/k20/led_sysdefs_arm_k20.h
similarity index 100%
rename from platforms/arm/k20/led_sysdefs_arm_k20.h
rename to src/platforms/arm/k20/led_sysdefs_arm_k20.h
diff --git a/platforms/arm/k20/octows2811_controller.h b/src/platforms/arm/k20/octows2811_controller.h
similarity index 100%
rename from platforms/arm/k20/octows2811_controller.h
rename to src/platforms/arm/k20/octows2811_controller.h
diff --git a/platforms/arm/k20/smartmatrix_t3.h b/src/platforms/arm/k20/smartmatrix_t3.h
similarity index 100%
rename from platforms/arm/k20/smartmatrix_t3.h
rename to src/platforms/arm/k20/smartmatrix_t3.h
diff --git a/platforms/arm/k20/ws2812serial_controller.h b/src/platforms/arm/k20/ws2812serial_controller.h
similarity index 100%
rename from platforms/arm/k20/ws2812serial_controller.h
rename to src/platforms/arm/k20/ws2812serial_controller.h
diff --git a/platforms/arm/k66/clockless_arm_k66.h b/src/platforms/arm/k66/clockless_arm_k66.h
similarity index 100%
rename from platforms/arm/k66/clockless_arm_k66.h
rename to src/platforms/arm/k66/clockless_arm_k66.h
diff --git a/platforms/arm/k66/clockless_block_arm_k66.h b/src/platforms/arm/k66/clockless_block_arm_k66.h
similarity index 100%
rename from platforms/arm/k66/clockless_block_arm_k66.h
rename to src/platforms/arm/k66/clockless_block_arm_k66.h
diff --git a/platforms/arm/k66/fastled_arm_k66.h b/src/platforms/arm/k66/fastled_arm_k66.h
similarity index 100%
rename from platforms/arm/k66/fastled_arm_k66.h
rename to src/platforms/arm/k66/fastled_arm_k66.h
diff --git a/platforms/arm/k66/fastpin_arm_k66.h b/src/platforms/arm/k66/fastpin_arm_k66.h
similarity index 100%
rename from platforms/arm/k66/fastpin_arm_k66.h
rename to src/platforms/arm/k66/fastpin_arm_k66.h
diff --git a/platforms/arm/k66/fastspi_arm_k66.h b/src/platforms/arm/k66/fastspi_arm_k66.h
similarity index 100%
rename from platforms/arm/k66/fastspi_arm_k66.h
rename to src/platforms/arm/k66/fastspi_arm_k66.h
diff --git a/platforms/arm/k66/led_sysdefs_arm_k66.h b/src/platforms/arm/k66/led_sysdefs_arm_k66.h
similarity index 100%
rename from platforms/arm/k66/led_sysdefs_arm_k66.h
rename to src/platforms/arm/k66/led_sysdefs_arm_k66.h
diff --git a/platforms/arm/kl26/clockless_arm_kl26.h b/src/platforms/arm/kl26/clockless_arm_kl26.h
similarity index 100%
rename from platforms/arm/kl26/clockless_arm_kl26.h
rename to src/platforms/arm/kl26/clockless_arm_kl26.h
diff --git a/platforms/arm/kl26/fastled_arm_kl26.h b/src/platforms/arm/kl26/fastled_arm_kl26.h
similarity index 100%
rename from platforms/arm/kl26/fastled_arm_kl26.h
rename to src/platforms/arm/kl26/fastled_arm_kl26.h
diff --git a/platforms/arm/kl26/fastpin_arm_kl26.h b/src/platforms/arm/kl26/fastpin_arm_kl26.h
similarity index 100%
rename from platforms/arm/kl26/fastpin_arm_kl26.h
rename to src/platforms/arm/kl26/fastpin_arm_kl26.h
diff --git a/platforms/arm/kl26/fastspi_arm_kl26.h b/src/platforms/arm/kl26/fastspi_arm_kl26.h
similarity index 100%
rename from platforms/arm/kl26/fastspi_arm_kl26.h
rename to src/platforms/arm/kl26/fastspi_arm_kl26.h
diff --git a/platforms/arm/kl26/led_sysdefs_arm_kl26.h b/src/platforms/arm/kl26/led_sysdefs_arm_kl26.h
similarity index 100%
rename from platforms/arm/kl26/led_sysdefs_arm_kl26.h
rename to src/platforms/arm/kl26/led_sysdefs_arm_kl26.h
diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
similarity index 100%
rename from platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
rename to src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
similarity index 100%
rename from platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
rename to src/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
diff --git a/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
similarity index 100%
rename from platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
rename to src/platforms/arm/mxrt1062/fastled_arm_mxrt1062.h
diff --git a/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
similarity index 100%
rename from platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
rename to src/platforms/arm/mxrt1062/fastpin_arm_mxrt1062.h
diff --git a/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
similarity index 100%
rename from platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
rename to src/platforms/arm/mxrt1062/fastspi_arm_mxrt1062.h
diff --git a/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
similarity index 100%
rename from platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
rename to src/platforms/arm/mxrt1062/led_sysdefs_arm_mxrt1062.h
diff --git a/platforms/arm/nrf51/clockless_arm_nrf51.h b/src/platforms/arm/nrf51/clockless_arm_nrf51.h
similarity index 100%
rename from platforms/arm/nrf51/clockless_arm_nrf51.h
rename to src/platforms/arm/nrf51/clockless_arm_nrf51.h
diff --git a/platforms/arm/nrf51/fastled_arm_nrf51.h b/src/platforms/arm/nrf51/fastled_arm_nrf51.h
similarity index 100%
rename from platforms/arm/nrf51/fastled_arm_nrf51.h
rename to src/platforms/arm/nrf51/fastled_arm_nrf51.h
diff --git a/platforms/arm/nrf51/fastpin_arm_nrf51.h b/src/platforms/arm/nrf51/fastpin_arm_nrf51.h
similarity index 100%
rename from platforms/arm/nrf51/fastpin_arm_nrf51.h
rename to src/platforms/arm/nrf51/fastpin_arm_nrf51.h
diff --git a/platforms/arm/nrf51/fastspi_arm_nrf51.h b/src/platforms/arm/nrf51/fastspi_arm_nrf51.h
similarity index 100%
rename from platforms/arm/nrf51/fastspi_arm_nrf51.h
rename to src/platforms/arm/nrf51/fastspi_arm_nrf51.h
diff --git a/platforms/arm/nrf51/led_sysdefs_arm_nrf51.h b/src/platforms/arm/nrf51/led_sysdefs_arm_nrf51.h
similarity index 100%
rename from platforms/arm/nrf51/led_sysdefs_arm_nrf51.h
rename to src/platforms/arm/nrf51/led_sysdefs_arm_nrf51.h
diff --git a/platforms/arm/nrf52/arbiter_nrf52.h b/src/platforms/arm/nrf52/arbiter_nrf52.h
similarity index 100%
rename from platforms/arm/nrf52/arbiter_nrf52.h
rename to src/platforms/arm/nrf52/arbiter_nrf52.h
diff --git a/platforms/arm/nrf52/clockless_arm_nrf52.h b/src/platforms/arm/nrf52/clockless_arm_nrf52.h
similarity index 100%
rename from platforms/arm/nrf52/clockless_arm_nrf52.h
rename to src/platforms/arm/nrf52/clockless_arm_nrf52.h
diff --git a/platforms/arm/nrf52/fastled_arm_nrf52.h b/src/platforms/arm/nrf52/fastled_arm_nrf52.h
similarity index 100%
rename from platforms/arm/nrf52/fastled_arm_nrf52.h
rename to src/platforms/arm/nrf52/fastled_arm_nrf52.h
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52.h b/src/platforms/arm/nrf52/fastpin_arm_nrf52.h
similarity index 100%
rename from platforms/arm/nrf52/fastpin_arm_nrf52.h
rename to src/platforms/arm/nrf52/fastpin_arm_nrf52.h
diff --git a/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h b/src/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
similarity index 100%
rename from platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
rename to src/platforms/arm/nrf52/fastpin_arm_nrf52_variants.h
diff --git a/platforms/arm/nrf52/fastspi_arm_nrf52.h b/src/platforms/arm/nrf52/fastspi_arm_nrf52.h
similarity index 100%
rename from platforms/arm/nrf52/fastspi_arm_nrf52.h
rename to src/platforms/arm/nrf52/fastspi_arm_nrf52.h
diff --git a/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h b/src/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
similarity index 100%
rename from platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
rename to src/platforms/arm/nrf52/led_sysdefs_arm_nrf52.h
diff --git a/platforms/arm/sam/clockless_arm_sam.h b/src/platforms/arm/sam/clockless_arm_sam.h
similarity index 100%
rename from platforms/arm/sam/clockless_arm_sam.h
rename to src/platforms/arm/sam/clockless_arm_sam.h
diff --git a/platforms/arm/sam/clockless_block_arm_sam.h b/src/platforms/arm/sam/clockless_block_arm_sam.h
similarity index 100%
rename from platforms/arm/sam/clockless_block_arm_sam.h
rename to src/platforms/arm/sam/clockless_block_arm_sam.h
diff --git a/platforms/arm/sam/fastled_arm_sam.h b/src/platforms/arm/sam/fastled_arm_sam.h
similarity index 100%
rename from platforms/arm/sam/fastled_arm_sam.h
rename to src/platforms/arm/sam/fastled_arm_sam.h
diff --git a/platforms/arm/sam/fastpin_arm_sam.h b/src/platforms/arm/sam/fastpin_arm_sam.h
similarity index 100%
rename from platforms/arm/sam/fastpin_arm_sam.h
rename to src/platforms/arm/sam/fastpin_arm_sam.h
diff --git a/platforms/arm/sam/fastspi_arm_sam.h b/src/platforms/arm/sam/fastspi_arm_sam.h
similarity index 100%
rename from platforms/arm/sam/fastspi_arm_sam.h
rename to src/platforms/arm/sam/fastspi_arm_sam.h
diff --git a/platforms/arm/sam/led_sysdefs_arm_sam.h b/src/platforms/arm/sam/led_sysdefs_arm_sam.h
similarity index 100%
rename from platforms/arm/sam/led_sysdefs_arm_sam.h
rename to src/platforms/arm/sam/led_sysdefs_arm_sam.h
diff --git a/platforms/arm/stm32/clockless_arm_stm32.h b/src/platforms/arm/stm32/clockless_arm_stm32.h
similarity index 100%
rename from platforms/arm/stm32/clockless_arm_stm32.h
rename to src/platforms/arm/stm32/clockless_arm_stm32.h
diff --git a/platforms/arm/stm32/cm3_regs.h b/src/platforms/arm/stm32/cm3_regs.h
similarity index 100%
rename from platforms/arm/stm32/cm3_regs.h
rename to src/platforms/arm/stm32/cm3_regs.h
diff --git a/platforms/arm/stm32/fastled_arm_stm32.h b/src/platforms/arm/stm32/fastled_arm_stm32.h
similarity index 100%
rename from platforms/arm/stm32/fastled_arm_stm32.h
rename to src/platforms/arm/stm32/fastled_arm_stm32.h
diff --git a/platforms/arm/stm32/fastpin_arm_stm32.h b/src/platforms/arm/stm32/fastpin_arm_stm32.h
similarity index 100%
rename from platforms/arm/stm32/fastpin_arm_stm32.h
rename to src/platforms/arm/stm32/fastpin_arm_stm32.h
diff --git a/platforms/arm/stm32/led_sysdefs_arm_stm32.h b/src/platforms/arm/stm32/led_sysdefs_arm_stm32.h
similarity index 100%
rename from platforms/arm/stm32/led_sysdefs_arm_stm32.h
rename to src/platforms/arm/stm32/led_sysdefs_arm_stm32.h
diff --git a/platforms/avr/clockless_trinket.h b/src/platforms/avr/clockless_trinket.h
similarity index 100%
rename from platforms/avr/clockless_trinket.h
rename to src/platforms/avr/clockless_trinket.h
diff --git a/platforms/avr/fastled_avr.h b/src/platforms/avr/fastled_avr.h
similarity index 100%
rename from platforms/avr/fastled_avr.h
rename to src/platforms/avr/fastled_avr.h
diff --git a/platforms/avr/fastpin_avr.h b/src/platforms/avr/fastpin_avr.h
similarity index 100%
rename from platforms/avr/fastpin_avr.h
rename to src/platforms/avr/fastpin_avr.h
diff --git a/platforms/avr/fastspi_avr.h b/src/platforms/avr/fastspi_avr.h
similarity index 100%
rename from platforms/avr/fastspi_avr.h
rename to src/platforms/avr/fastspi_avr.h
diff --git a/platforms/avr/led_sysdefs_avr.h b/src/platforms/avr/led_sysdefs_avr.h
similarity index 100%
rename from platforms/avr/led_sysdefs_avr.h
rename to src/platforms/avr/led_sysdefs_avr.h
diff --git a/platforms/esp/32/clockless_block_esp32.h b/src/platforms/esp/32/clockless_block_esp32.h
similarity index 100%
rename from platforms/esp/32/clockless_block_esp32.h
rename to src/platforms/esp/32/clockless_block_esp32.h
diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/src/platforms/esp/32/clockless_i2s_esp32.h
similarity index 100%
rename from platforms/esp/32/clockless_i2s_esp32.h
rename to src/platforms/esp/32/clockless_i2s_esp32.h
diff --git a/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
similarity index 100%
rename from platforms/esp/32/clockless_rmt_esp32.cpp
rename to src/platforms/esp/32/clockless_rmt_esp32.cpp
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/src/platforms/esp/32/clockless_rmt_esp32.h
similarity index 100%
rename from platforms/esp/32/clockless_rmt_esp32.h
rename to src/platforms/esp/32/clockless_rmt_esp32.h
diff --git a/platforms/esp/32/fastled_esp32.h b/src/platforms/esp/32/fastled_esp32.h
similarity index 100%
rename from platforms/esp/32/fastled_esp32.h
rename to src/platforms/esp/32/fastled_esp32.h
diff --git a/platforms/esp/32/fastpin_esp32.h b/src/platforms/esp/32/fastpin_esp32.h
similarity index 100%
rename from platforms/esp/32/fastpin_esp32.h
rename to src/platforms/esp/32/fastpin_esp32.h
diff --git a/platforms/esp/32/led_sysdefs_esp32.h b/src/platforms/esp/32/led_sysdefs_esp32.h
similarity index 100%
rename from platforms/esp/32/led_sysdefs_esp32.h
rename to src/platforms/esp/32/led_sysdefs_esp32.h
diff --git a/platforms/esp/8266/clockless_block_esp8266.h b/src/platforms/esp/8266/clockless_block_esp8266.h
similarity index 100%
rename from platforms/esp/8266/clockless_block_esp8266.h
rename to src/platforms/esp/8266/clockless_block_esp8266.h
diff --git a/platforms/esp/8266/clockless_esp8266.h b/src/platforms/esp/8266/clockless_esp8266.h
similarity index 100%
rename from platforms/esp/8266/clockless_esp8266.h
rename to src/platforms/esp/8266/clockless_esp8266.h
diff --git a/platforms/esp/8266/fastled_esp8266.h b/src/platforms/esp/8266/fastled_esp8266.h
similarity index 100%
rename from platforms/esp/8266/fastled_esp8266.h
rename to src/platforms/esp/8266/fastled_esp8266.h
diff --git a/platforms/esp/8266/fastpin_esp8266.h b/src/platforms/esp/8266/fastpin_esp8266.h
similarity index 100%
rename from platforms/esp/8266/fastpin_esp8266.h
rename to src/platforms/esp/8266/fastpin_esp8266.h
diff --git a/platforms/esp/8266/led_sysdefs_esp8266.h b/src/platforms/esp/8266/led_sysdefs_esp8266.h
similarity index 100%
rename from platforms/esp/8266/led_sysdefs_esp8266.h
rename to src/platforms/esp/8266/led_sysdefs_esp8266.h
diff --git a/power_mgt.cpp b/src/power_mgt.cpp
similarity index 100%
rename from power_mgt.cpp
rename to src/power_mgt.cpp
diff --git a/power_mgt.h b/src/power_mgt.h
similarity index 100%
rename from power_mgt.h
rename to src/power_mgt.h
diff --git a/wiring.cpp b/src/wiring.cpp
similarity index 100%
rename from wiring.cpp
rename to src/wiring.cpp

From 050c0b3cbc3c99566fdd1a0698cf6c238f8312da Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 13 Jun 2020 22:46:14 -0400
Subject: [PATCH 169/204] One more crucial change: tell the ESP-IDF that our
 interrupt resides in IRAM and therefore does not need to be disabled during
 flash operations

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index db9c334126..a5dde1bee6 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -117,7 +117,7 @@ void ESP32RMTController::init()
         //    strips, so it delegates to the refill function for each
         //    specific instantiation of ClocklessController.
         if (gRMT_intr_handle == NULL)
-            esp_intr_alloc(ETS_RMT_INTR_SOURCE, ESP_INTR_FLAG_LEVEL3, interruptHandler, 0, &gRMT_intr_handle);
+            esp_intr_alloc(ETS_RMT_INTR_SOURCE, ESP_INTR_FLAG_IRAM | ESP_INTR_FLAG_LEVEL3, interruptHandler, 0, &gRMT_intr_handle);
     }
 
     gInitialized = true;

From f5fff3db79a276111085f7316f347416327a54b7 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 15 Jun 2020 22:02:38 -0400
Subject: [PATCH 170/204] Update the config file for PlatformIO, so it finds
 the new cpp file in platforms/esp/32

---
 library.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/library.json b/library.json
index bcb24070f2..7b1dd4710e 100644
--- a/library.json
+++ b/library.json
@@ -48,7 +48,8 @@
         "srcFilter": [
             "+<*.c>",
             "+<*.cpp>",
-            "+<*.h>"
+            "+<*.h>",
+            "+<platforms/esp/32/*.cpp>"
         ],
         "libArchive": false
     }

From fbd9dea985bc402bafc923c1d404c22c75684125 Mon Sep 17 00:00:00 2001
From: ngyl88 <ng_yl88@hotmail.com>
Date: Sat, 11 Jul 2020 13:08:36 +0800
Subject: [PATCH 171/204] Putting back logic to silence compiler warnings that
 is explicitly intended for avr

- See commits
  - 5387b844f44b9d0554b5c374f974f75b33a23eb8
    (unsilence without context, perhaps debugging for work-in-progress?)
  - b7e967263943a56b246d2c7b81e85d99a1c94322
    (silence compiler warnings that is intended for fall through)
---
 platforms/avr/clockless_trinket.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index 824553feff..ae963e223e 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -323,10 +323,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define DUSE (0xFF - (DADVANCE-1))
 
 // Silence compiler warnings about switch/case that is explicitly intended to fall through.
-//#define FL_FALLTHROUGH __attribute__ ((fallthrough));
+#define FL_FALLTHROUGH __attribute__ ((fallthrough));
 
-// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
-// gcc will use register Y for the this pointer.
+	// This method is made static to force making register Y available to use for data on AVR - if the method is non-static, then
+	// gcc will use register Y for the this pointer.
 	static void /*__attribute__((optimize("O0")))*/  /*__attribute__ ((always_inline))*/  showRGBInternal(PixelController<RGB_ORDER> & pixels)  {
 		uint8_t *data = (uint8_t*)pixels.mData;
 		data_ptr_t port = FastPin<DATA_PIN>::port();
@@ -406,9 +406,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC14(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 				MOV_ADDDE14(b0,b1,d1,e1) _D2(4) LO1 _D3(0)
@@ -422,9 +422,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC24(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 
@@ -441,9 +441,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				HI1 _D1(1) QLO2(b0, 1) RORSC04(b1,7) 	_D2(4)	LO1 RORCLC2(b1) 	_D3(2)
 				HI1 _D1(1) QLO2(b0, 0)
 				switch(XTRA0) {
-					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
-					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
-					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  /* fall through */
+					case 4: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 3: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
+					case 2: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)  FL_FALLTHROUGH
 					case 1: _D2(0) LO1 _D3(0) HI1 _D1(1) QLO2(b0,0)
 				}
 				MOV_ADDDE04(b0,b1,d0,e0) _D2(4) LO1 _D3(5)

From 46c51f13909a05df76330a7b9db3084df1359e2d Mon Sep 17 00:00:00 2001
From: ngyl88 <ng_yl88@hotmail.com>
Date: Sat, 11 Jul 2020 12:49:51 +0800
Subject: [PATCH 172/204] Merge changes from issue #716 for ATmega4809

- Credit to @Jueff
  (https://github.com/FastLED/FastLED/issues/716#issuecomment-620912730)
  - `fastpin_avr.h`: SPI support is commented out, due to no test case
    - SPI_DATA, SPI_CLOCK, SPI_SELECT pins in the comments are updated
      according to
      (https://github.com/FastLED/FastLED/issues/716#issuecomment-620720120)
    - Logic flipped for `// Register name equivalent (using flat names)`
      to avoid changes in `#define _FL_IO` and `#define _FL_DEFPIN` for
      avr platforms that are not explicitly  mentioned
  - changes in `platform.cpp` shifted to `leds_sysdefs_avr.h`, absorbing
    @david-nc 's comment
    (https://github.com/FastLED/FastLED/issues/716#issuecomment-647146670)
---
 led_sysdefs.h                     |  2 +-
 platforms/avr/clockless_trinket.h | 10 +++++++---
 platforms/avr/fastpin_avr.h       | 29 +++++++++++++++++++++++++++++
 platforms/avr/led_sysdefs_avr.h   |  5 +++++
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/led_sysdefs.h b/led_sysdefs.h
index 1301a1a034..8ee568bfca 100644
--- a/led_sysdefs.h
+++ b/led_sysdefs.h
@@ -32,7 +32,7 @@
 #include "platforms/esp/8266/led_sysdefs_esp8266.h"
 #elif defined(ESP32)
 #include "platforms/esp/32/led_sysdefs_esp32.h"
-#elif defined(__AVR__)
+#elif defined(__AVR__) || defined(__AVR_ATmega4809__)
 // AVR platforms
 #include "platforms/avr/led_sysdefs_avr.h"
 #elif defined(ARDUINO_ARCH_APOLLO3)
diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index ae963e223e..6a8eb0299e 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -167,6 +167,12 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	}
 #define USE_ASM_MACROS
 
+#if defined(__AVR_ATmega4809__)
+#define ASM_VAR_PORT "r" (((PORT_t*)FastPin<DATA_PIN>::port())->OUT)
+#else
+#define ASM_VAR_PORT "M" (FastPin<DATA_PIN>::port() - 0x20)
+#endif
+
 // The variables that our various asm statements use.  The same block of variables needs to be declared for
 // all the asm blocks because GCC is pretty stupid and it would clobber variables happily or optimize code away too aggressively
 #define ASM_VARS : /* write variables */				\
@@ -189,13 +195,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 				[e0] "r" (e0),							\
 				[e1] "r" (e1),							\
 				[e2] "r" (e2),							\
-				[PORT] "M" (FastPin<DATA_PIN>::port()-0x20),		\
+				[PORT] ASM_VAR_PORT,                    \
 				[O0] "M" (RGB_BYTE0(RGB_ORDER)),		\
 				[O1] "M" (RGB_BYTE1(RGB_ORDER)),		\
 				[O2] "M" (RGB_BYTE2(RGB_ORDER))		\
 				: "cc" /* clobber registers */
-
-
 // Note: the code in the else in HI1/LO1 will be turned into an sts (2 cycle, 2 word) opcode
 // 1 cycle, write hi to the port
 #define HI1 FASTLED_SLOW_CLOCK_ADJUST if((int)(FastPin<DATA_PIN>::port())-0x20 < 64) { asm __volatile__("out %[PORT], %[hi]" ASM_VARS ); } else { *FastPin<DATA_PIN>::port()=hi; }
diff --git a/platforms/avr/fastpin_avr.h b/platforms/avr/fastpin_avr.h
index 956e00a9d5..6589c7c773 100644
--- a/platforms/avr/fastpin_avr.h
+++ b/platforms/avr/fastpin_avr.h
@@ -48,8 +48,17 @@ template<uint8_t PIN, uint8_t _MASK, typename _PORT, typename _DDR, typename _PI
 typedef volatile uint8_t & reg8_t;
 #define _R(T) struct __gen_struct_ ## T
 #define _RD8(T) struct __gen_struct_ ## T { static inline reg8_t r() { return T; }};
+
+// Register name equivalent (using flat names)
+#if defined(AVR_ATtinyxy7) || defined(AVR_ATtinyxy6) || defined(AVR_ATtinyxy4) || defined(AVR_ATtinyxy2) || defined(__AVR_ATmega4809__)
+// ATtiny series 0/1 and ATmega series 0
+#define _FL_IO(L,C) _RD8(PORT ## L ## _DIR); _RD8(PORT ## L ## _OUT); _RD8(PORT ## L ## _IN); _FL_DEFINE_PORT3(L, C, _R(PORT ## L ## _OUT));
+#define _FL_DEFPIN(_PIN, BIT, L) template<> class FastPin<_PIN> : public _AVRPIN<_PIN, 1<<BIT, _R(PORT ## L ## _OUT), _R(PORT ## L ## _DIR), _R(PORT ## L ## _IN)> {};
+#else
+// Others
 #define _FL_IO(L,C) _RD8(DDR ## L); _RD8(PORT ## L); _RD8(PIN ## L); _FL_DEFINE_PORT3(L, C, _R(PORT ## L));
 #define _FL_DEFPIN(_PIN, BIT, L) template<> class FastPin<_PIN> : public _AVRPIN<_PIN, 1<<BIT, _R(PORT ## L), _R(DDR ## L), _R(PIN ## L)> {};
+#endif
 
 // Pre-do all the port definitions
 #ifdef PORTA
@@ -195,6 +204,26 @@ _FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19,
 #define SPI_UART0_CLOCK 12
 #endif
 
+#elif defined(__AVR_ATmega4809__)
+
+#define MAX_PIN 21
+_FL_DEFPIN(0, 4, C); _FL_DEFPIN(1, 5, C); _FL_DEFPIN(2, 0, A); _FL_DEFPIN(3, 5, F);
+_FL_DEFPIN(4, 6, C); _FL_DEFPIN(5, 2, B); _FL_DEFPIN(6, 4, F); _FL_DEFPIN(7, 1, A);
+_FL_DEFPIN(8, 3, E); _FL_DEFPIN(9, 0, B); _FL_DEFPIN(10, 1, B); _FL_DEFPIN(11, 0, E);
+_FL_DEFPIN(12, 1, E); _FL_DEFPIN(13, 2, E); _FL_DEFPIN(14, 3, D); _FL_DEFPIN(15, 2, D);
+_FL_DEFPIN(16, 1, D); _FL_DEFPIN(17, 0, D); _FL_DEFPIN(18, 2, A); _FL_DEFPIN(19, 3, A);
+_FL_DEFPIN(20, 4, D); _FL_DEFPIN(21, 5, D);
+
+// To confirm for the SPI interfaces
+//#define SPI_DATA 18
+//#define SPI_CLOCK 13
+//#define SPI_SELECT 19
+//#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+//#define SPI_UART0_DATA 1
+//#define SPI_UART0_CLOCK 4
+
 #elif defined(__AVR_ATmega328P__) || defined(__AVR_ATmega328PB__) || defined(__AVR_ATmega328__) || defined(__AVR_ATmega168__) || defined(__AVR_ATmega168P__) || defined(__AVR_ATmega8__)
 
 #define MAX_PIN 19
diff --git a/platforms/avr/led_sysdefs_avr.h b/platforms/avr/led_sysdefs_avr.h
index 2d9722d077..5c51833a67 100644
--- a/platforms/avr/led_sysdefs_avr.h
+++ b/platforms/avr/led_sysdefs_avr.h
@@ -52,6 +52,11 @@ extern volatile unsigned long timer0_millis;
 #  endif
 };
 
+// special defs for mega environments
+#if defined(__AVR_ATmega4809__)
+    volatile unsigned long timer0_millis = 0;
+#endif
+
 // special defs for the tiny environments
 #if defined(__AVR_ATmega32U2__) || defined(__AVR_ATmega16U2__) || defined(__AVR_ATmega8U2__) || defined(__AVR_AT90USB162__) || defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__) || defined(__AVR_ATtiny167__) || defined(__AVR_ATtiny87__) || defined(__AVR_ATtinyX41__) || defined(__AVR_ATtiny841__) || defined(__AVR_ATtiny441__)
 #define LIB8_ATTINY 1

From 9da981f7b3c456455a8b8ca0ba21b683367bf219 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sat, 11 Jul 2020 22:56:09 -0400
Subject: [PATCH 173/204] Removed a function call in the interrupt handler to a
 function that is not in IRAM

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index a5dde1bee6..3cf77ed960 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -235,6 +235,7 @@ void ESP32RMTController::startOnChannel(int channel)
 //    Setting this RMT flag is what actually kicks off the peripheral
 void ESP32RMTController::tx_start()
 {
+    // dev->conf_ch[channel].conf1.tx_start = 1;
     rmt_tx_start(mRMT_channel, true);
 }
 
@@ -250,7 +251,8 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
     portBASE_TYPE HPTaskAwoken = 0;
 
     // -- Turn off output on the pin
-    gpio_matrix_out(pController->mPin, 0x100, 0, 0);
+    // SZG: Do I really need to do this?
+    // gpio_matrix_out(pController->mPin, 0x100, 0, 0);
 
     gOnChannel[channel] = NULL;
     gNumDone++;

From 96608342147167ca45289b96e687ab2d73cbaf1c Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Thu, 16 Jul 2020 18:11:42 +0200
Subject: [PATCH 174/204] make default constructor default for CRGB and CHSV

---
 pixeltypes.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pixeltypes.h b/pixeltypes.h
index 4abba01ca2..bd98f4c3e8 100644
--- a/pixeltypes.h
+++ b/pixeltypes.h
@@ -51,9 +51,7 @@ struct CHSV {
     }
 
     /// default values are UNITIALIZED
-    inline CHSV() __attribute__((always_inline))
-    {
-    }
+    inline CHSV() __attribute__((always_inline)) = default;
 
     /// allow construction from H, S, V
     inline CHSV( uint8_t ih, uint8_t is, uint8_t iv) __attribute__((always_inline))
@@ -120,9 +118,7 @@ struct CRGB {
     }
 
     // default values are UNINITIALIZED
-	inline CRGB() __attribute__((always_inline))
-    {
-    }
+    inline CRGB() __attribute__((always_inline)) = default;
 
     /// allow construction from R, G, B
     inline CRGB( uint8_t ir, uint8_t ig, uint8_t ib)  __attribute__((always_inline))

From 0a96b861901f45a2a241556007e9f1f4e7b12404 Mon Sep 17 00:00:00 2001
From: Brian Bulkowski <brian@bulkowski.org>
Date: Sat, 18 Jul 2020 19:17:07 -0700
Subject: [PATCH 175/204] Fix allocate by zero

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 2 +-
 src/platforms/esp/32/clockless_rmt_esp32.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 3cf77ed960..34395995a2 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -366,7 +366,7 @@ void ESP32RMTController::initPulseBuffer(int size_in_bytes)
 {
     if (mBuffer == 0) {
         // -- Each byte has 8 bits, each bit needs a 32-bit RMT item
-        int size = size_in_bytes * 8 * 4;
+        mBufferSize size = size_in_bytes * 8 * 4;
 
         mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
     }
diff --git a/src/platforms/esp/32/clockless_rmt_esp32.h b/src/platforms/esp/32/clockless_rmt_esp32.h
index 2f02ac8b9e..1900fd756f 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.h
+++ b/src/platforms/esp/32/clockless_rmt_esp32.h
@@ -204,7 +204,7 @@ class ESP32RMTController
     // -- Buffer to hold all of the pulses. For the version that uses
     //    the RMT driver built into the ESP core.
     rmt_item32_t * mBuffer;
-    uint16_t       mBufferSize;
+    uint16_t       mBufferSize; // bytes
     int            mCurPulse;
 
     // -- Make sure we can't call show() too quickly

From 92a62102ad7ea4a01ad5039cab083627a52e759f Mon Sep 17 00:00:00 2001
From: ngyl88 <ng_yl88@hotmail.com>
Date: Sun, 19 Jul 2020 14:58:39 +0800
Subject: [PATCH 176/204] Remove redundant definition of `timer0_millis` as
 extern already defined

---
 platforms/avr/led_sysdefs_avr.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/platforms/avr/led_sysdefs_avr.h b/platforms/avr/led_sysdefs_avr.h
index 5c51833a67..2d9722d077 100644
--- a/platforms/avr/led_sysdefs_avr.h
+++ b/platforms/avr/led_sysdefs_avr.h
@@ -52,11 +52,6 @@ extern volatile unsigned long timer0_millis;
 #  endif
 };
 
-// special defs for mega environments
-#if defined(__AVR_ATmega4809__)
-    volatile unsigned long timer0_millis = 0;
-#endif
-
 // special defs for the tiny environments
 #if defined(__AVR_ATmega32U2__) || defined(__AVR_ATmega16U2__) || defined(__AVR_ATmega8U2__) || defined(__AVR_AT90USB162__) || defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__) || defined(__AVR_ATtiny25__) || defined(__AVR_ATtiny45__) || defined(__AVR_ATtiny85__) || defined(__AVR_ATtiny167__) || defined(__AVR_ATtiny87__) || defined(__AVR_ATtinyX41__) || defined(__AVR_ATtiny841__) || defined(__AVR_ATtiny441__)
 #define LIB8_ATTINY 1

From 55c0243dba4d6f5a5f341f1cbe54d9c5f832f8c2 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Wed, 22 Jul 2020 09:29:32 -0400
Subject: [PATCH 177/204] Major change to the strategy: in this version the
 interrupt handler just records which channels need more data and which
 channels are done; the actual work is done outside the handler

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 86 ++++++++++++++++----
 1 file changed, 72 insertions(+), 14 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 3cf77ed960..da7da09acd 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -16,6 +16,12 @@ static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
 //    channel assigned to them.
 static ESP32RMTController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
 
+// -- Channels that need a buffer refill
+static bool gRefillChannel[FASTLED_RMT_MAX_CHANNELS];
+
+// -- Channels that are done
+static bool gDoneChannel[FASTLED_RMT_MAX_CHANNELS];
+
 static int gNumControllers = 0;
 static int gNumStarted = 0;
 static int gNumDone = 0;
@@ -29,6 +35,12 @@ static xSemaphoreHandle gTX_sem = NULL;
 
 static bool gInitialized = false;
 
+// -- Timing stuff
+static uint32_t gTiming[500];
+static int gTimeIndex;
+static uint32_t gLastTime;
+ 
+
 ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
     : mPixelData(0), 
       mSize(0), 
@@ -132,6 +144,11 @@ void ESP32RMTController::showPixels()
         ESP32RMTController::init();
         xSemaphoreTake(gTX_sem, portMAX_DELAY);
 
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+            gRefillChannel[i] = false;
+            gDoneChannel[i] = false;
+        }
+
 #if FASTLED_ESP32_FLASH_LOCK == 1
         // -- Make sure no flash operations happen right now
         spi_flash_op_lock();
@@ -146,6 +163,10 @@ void ESP32RMTController::showPixels()
     if (gNumStarted == gNumControllers) {
         gNext = 0;
 
+#if FASTLED_ESP32_SHOWTIMING == 1
+        gTimeIndex = 0;
+#endif
+
         // -- First, fill all the available channels
         int channel = 0;
         while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
@@ -160,13 +181,32 @@ void ESP32RMTController::showPixels()
         for (int i = 0; i < channel; i++) {
             ESP32RMTController * pController = gControllers[i];
             pController->tx_start();
+#if FASTLED_ESP32_SHOWTIMING == 1
+            gLastTime = __clock_cycles();
+#endif
         }
 
-        // -- Wait here while the rest of the data is sent. The interrupt handler
-        //    will keep refilling the RMT buffers until it is all sent; then it
-        //    gives the semaphore back.
-        xSemaphoreTake(gTX_sem, portMAX_DELAY);
-        xSemaphoreGive(gTX_sem);
+        bool all_done = false;
+        do {
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+
+            for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+                if (gRefillChannel[i]) {
+                    gOnChannel[i]->fillNext();
+                    gRefillChannel[i] = false;
+                }
+
+                if (gDoneChannel[i]) {
+                    doneOnChannel(rmt_channel_t(i), 0);
+                    if (gNumDone == gNumControllers) {
+                        all_done = true;
+                    }
+                    gDoneChannel[i] = false;
+                }
+            }
+
+            xSemaphoreGive(gTX_sem);
+        } while ( ! all_done);
 
         mWait.mark();
 
@@ -180,6 +220,16 @@ void ESP32RMTController::showPixels()
         spi_flash_op_unlock();
 #endif
     }
+
+#if FASTLED_ESP32_SHOWTIMING == 1
+    for (int i = 0; i < gTimeIndex; i++) {
+        if (gTiming[i] > 10000) {
+            Serial.print(i);
+            Serial.print(" ");
+            Serial.println(gTiming[i]);
+        }
+    }
+#endif
 }
 
 // -- Start up the next controller
@@ -248,7 +298,6 @@ void ESP32RMTController::tx_start()
 void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
 {
     ESP32RMTController * pController = gOnChannel[channel];
-    portBASE_TYPE HPTaskAwoken = 0;
 
     // -- Turn off output on the pin
     // SZG: Do I really need to do this?
@@ -261,9 +310,6 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
         // -- If this is the last controller, signal that we are all done
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             xSemaphoreGive(gTX_sem);
-        } else {
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
         }
     } else {
         // -- Otherwise, if there are still controllers waiting, then
@@ -286,6 +332,13 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
     uint32_t intr_st = RMT.int_st.val;
     uint8_t channel;
 
+#if FASTLED_ESP32_SHOWTIMING == 1
+    uint32_t curt = __clock_cycles();
+    gTiming[gTimeIndex++] = curt - gLastTime;
+    gLastTime = curt;
+#endif
+
+    bool stuff_to_do = false;
     for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
         int tx_done_bit = channel * 3;
         int tx_next_bit = channel + 24;
@@ -296,19 +349,24 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
             // -- More to send on this channel
             if (intr_st & BIT(tx_next_bit)) {
                 RMT.int_clr.val |= BIT(tx_next_bit);
-                    
-                // -- Refill the half of the buffer that we just finished,
-                //    allowing the other half to proceed.
-                pController->fillNext();
+                gRefillChannel[channel] = true;
+                stuff_to_do = true;
             } else {
                 // -- Transmission is complete on this channel
                 if (intr_st & BIT(tx_done_bit)) {
                     RMT.int_clr.val |= BIT(tx_done_bit);
-                    doneOnChannel(rmt_channel_t(channel), 0);
+                    gDoneChannel[channel] = true;
+                    stuff_to_do = true;
                 }
             }
         }
     }
+
+    if (stuff_to_do) {
+        portBASE_TYPE HPTaskAwoken = 0;
+        xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+        if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+    }
 }
 
 // -- Fill RMT buffer

From e1d2fae82b8ec0674f1f78048b9b959d5c10c1b0 Mon Sep 17 00:00:00 2001
From: ngyl88 <ng_yl88@hotmail.com>
Date: Sat, 25 Jul 2020 13:35:22 +0800
Subject: [PATCH 178/204] Add pin mappings for Arduino Nano Every

- RX/TX pin is swapped when using the definition for __AVR_ATmega4809__
https://github.com/ngyl88-arduino/FastLED/issues/2
- Pin mappings used came from working branch
https://github.com/FastLED/FastLED/blob/avrmega/platforms/avrmega/fastpin_avrmega.h
---
 platforms/avr/fastpin_avr.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/platforms/avr/fastpin_avr.h b/platforms/avr/fastpin_avr.h
index 6589c7c773..10e1d1b9a0 100644
--- a/platforms/avr/fastpin_avr.h
+++ b/platforms/avr/fastpin_avr.h
@@ -204,6 +204,26 @@ _FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19,
 #define SPI_UART0_CLOCK 12
 #endif
 
+#elif defined(ARDUINO_AVR_NANO_EVERY)
+
+#define MAX_PIN 22
+_FL_DEFPIN(0, 5, C); _FL_DEFPIN(1, 4, C); _FL_DEFPIN(2, 0, A); _FL_DEFPIN(3, 5, F);
+_FL_DEFPIN(4, 6, C); _FL_DEFPIN(5, 2, B); _FL_DEFPIN(6, 4, F); _FL_DEFPIN(7, 1, A);
+_FL_DEFPIN(8, 3, E); _FL_DEFPIN(9, 0, B); _FL_DEFPIN(10, 1, B); _FL_DEFPIN(11, 0, E);
+_FL_DEFPIN(12, 1, E); _FL_DEFPIN(13, 2, E); _FL_DEFPIN(14, 3, D); _FL_DEFPIN(15, 2, D);
+_FL_DEFPIN(16, 1, D); _FL_DEFPIN(17, 0, D); _FL_DEFPIN(18, 2, A); _FL_DEFPIN(19, 3, A);
+_FL_DEFPIN(20, 4, D); _FL_DEFPIN(21, 5, D); _FL_DEFPIN(22, 2, A);
+
+// To confirm for the SPI interfaces
+//#define SPI_DATA 18
+//#define SPI_CLOCK 13
+//#define SPI_SELECT 19
+//#define AVR_HARDWARE_SPI 1
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+//#define SPI_UART0_DATA 1
+//#define SPI_UART0_CLOCK 4
+
 #elif defined(__AVR_ATmega4809__)
 
 #define MAX_PIN 21

From d0149ad9e0eec05d684db34334d9457776e7a1ab Mon Sep 17 00:00:00 2001
From: ngyl88 <ng_yl88@hotmail.com>
Date: Sat, 25 Jul 2020 14:08:29 +0800
Subject: [PATCH 179/204] Use `timer_millis` instead of `timer0_millis` for
 ATmega4809

https://github.com/ngyl88-arduino/FastLED/issues/1#issuecomment-663815766
---
 platforms/avr/led_sysdefs_avr.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/platforms/avr/led_sysdefs_avr.h b/platforms/avr/led_sysdefs_avr.h
index 2d9722d077..05d6e5ed56 100644
--- a/platforms/avr/led_sysdefs_avr.h
+++ b/platforms/avr/led_sysdefs_avr.h
@@ -46,6 +46,9 @@ extern volatile unsigned long timer0_millis_count;
 #  elif defined(ATTINY_CORE)
 extern volatile unsigned long millis_timer_millis;
 #    define MS_COUNTER millis_timer_millis
+#  elif defined(__AVR_ATmega4809__)
+extern volatile unsigned long timer_millis;
+#    define MS_COUNTER timer_millis
 #  else
 extern volatile unsigned long timer0_millis;
 #    define MS_COUNTER timer0_millis

From 78b69ef48e216cd4f3d89f2df0cdf901944167a5 Mon Sep 17 00:00:00 2001
From: Oliver <jayzakk@gmail.com>
Date: Sun, 2 Aug 2020 23:20:41 +0200
Subject: [PATCH 180/204] added support for LGT8F arduino nano and pro clones

---
 chipsets.h                        | 2 +-
 platforms/avr/clockless_trinket.h | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/chipsets.h b/chipsets.h
index 208b37b884..60457254e2 100644
--- a/chipsets.h
+++ b/chipsets.h
@@ -454,7 +454,7 @@ class SM16716Controller : public CPixelLEDController<RGB_ORDER> {
 
 // We want to force all avr's to use the Trinket controller when running at 8Mhz, because even the 328's at 8Mhz
 // need the more tightly defined timeframes.
-#if (CLOCKLESS_FREQUENCY == 8000000 || CLOCKLESS_FREQUENCY == 16000000 || CLOCKLESS_FREQUENCY == 24000000) //  || CLOCKLESS_FREQUENCY == 48000000 || CLOCKLESS_FREQUENCY == 96000000) // 125ns/clock
+#if defined(__LGT8F__) || (CLOCKLESS_FREQUENCY == 8000000 || CLOCKLESS_FREQUENCY == 16000000 || CLOCKLESS_FREQUENCY == 24000000) //  || CLOCKLESS_FREQUENCY == 48000000 || CLOCKLESS_FREQUENCY == 96000000) // 125ns/clock
 #define FMUL (CLOCKLESS_FREQUENCY/8000000)
 
 // GE8822
diff --git a/platforms/avr/clockless_trinket.h b/platforms/avr/clockless_trinket.h
index b6ff96b936..6a019cd46c 100644
--- a/platforms/avr/clockless_trinket.h
+++ b/platforms/avr/clockless_trinket.h
@@ -49,7 +49,11 @@ template<> __attribute__((always_inline)) inline void _dc<-2>(register uint8_t &
 template<> __attribute__((always_inline)) inline void _dc<-1>(register uint8_t & ) {}
 template<> __attribute__((always_inline)) inline void _dc< 0>(register uint8_t & ) {}
 template<> __attribute__((always_inline)) inline void _dc< 1>(register uint8_t & ) {asm __volatile__("mov r0,r0":::);}
+#if defined(__LGT8F__) 
+template<> __attribute__((always_inline)) inline void _dc< 2>(register uint8_t & loopvar) { _dc<1>(loopvar); _dc<1>(loopvar); }
+#else
 template<> __attribute__((always_inline)) inline void _dc< 2>(register uint8_t & ) {asm __volatile__("rjmp .+0":::);}
+#endif
 template<> __attribute__((always_inline)) inline void _dc< 3>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<1>(loopvar); }
 template<> __attribute__((always_inline)) inline void _dc< 4>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<2>(loopvar); }
 template<> __attribute__((always_inline)) inline void _dc< 5>(register uint8_t & loopvar) { _dc<2>(loopvar); _dc<3>(loopvar); }

From 8c8fc6090e07cec2f6faafabc197f4b94cf0fb7e Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 14 Aug 2020 22:10:20 -0400
Subject: [PATCH 181/204] More performance tweaks to the ESP32 driver. Getting
 the synchronization right and making sure the interrupt handler is faster

---
 .gitignore                                   |   1 +
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 179 +++++++++----------
 src/platforms/esp/32/clockless_rmt_esp32.h   |  54 ++++--
 3 files changed, 126 insertions(+), 108 deletions(-)

diff --git a/.gitignore b/.gitignore
index 60b7a717bd..1f554d4581 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 html/
 *.gch
+*~
diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index da7da09acd..20d2b217ca 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -16,12 +16,6 @@ static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
 //    channel assigned to them.
 static ESP32RMTController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
 
-// -- Channels that need a buffer refill
-static bool gRefillChannel[FASTLED_RMT_MAX_CHANNELS];
-
-// -- Channels that are done
-static bool gDoneChannel[FASTLED_RMT_MAX_CHANNELS];
-
 static int gNumControllers = 0;
 static int gNumStarted = 0;
 static int gNumDone = 0;
@@ -33,13 +27,17 @@ static intr_handle_t gRMT_intr_handle = NULL;
 //    Semaphore is not given until all data has been sent
 static xSemaphoreHandle gTX_sem = NULL;
 
+// -- Make sure we can't call show() too quickly
+CMinWait<50>   gWait;
+
 static bool gInitialized = false;
 
-// -- Timing stuff
-static uint32_t gTiming[500];
-static int gTimeIndex;
-static uint32_t gLastTime;
- 
+// -- SZG: For debugging purposes
+#if FASTLED_ESP32_SHOWTIMING == 1
+static uint32_t gLastFill[8];
+static int gTooSlow[8];
+static uint32_t gTotalTime[8];
+#endif
 
 ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
     : mPixelData(0), 
@@ -72,12 +70,14 @@ ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
     mPin = gpio_num_t(DATA_PIN);
 }
 
-// -- Getters and setters for use in ClocklessController
-uint8_t * ESP32RMTController::getPixelData(int size_in_bytes)
+// -- Get or create the buffer for the pixel data
+//    We can't allocate it ahead of time because we don't have
+//    the PixelController object until show is called.
+uint32_t * ESP32RMTController::getPixelBuffer(int size_in_bytes)
 {
     if (mPixelData == 0) {
-        mSize = size_in_bytes;
-        mPixelData = (uint8_t *) calloc( mSize, sizeof(uint8_t));
+        mSize = ((size_in_bytes-1) / sizeof(uint32_t)) + 1;
+        mPixelData = (uint32_t *) calloc( mSize, sizeof(uint32_t));
     }
     return mPixelData;
 }
@@ -110,9 +110,9 @@ void ESP32RMTController::init()
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             rmt_driver_install(rmt_channel_t(i), 0, 0);
         } else {
-            // -- Set up the RMT to send 1 pixel of the pulse buffer and then
+            // -- Set up the RMT to send 32 bits of the pulse buffer and then
             //    generate an interrupt. When we get this interrupt we
-            //    fill the other part in preparation (kind of like double-buffering)
+            //    fill the other part in preparation (like double-buffering)
             rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, PULSES_PER_FILL);
         }
     }
@@ -142,12 +142,6 @@ void ESP32RMTController::showPixels()
     if (gNumStarted == 0) {
         // -- First controller: make sure everything is set up
         ESP32RMTController::init();
-        xSemaphoreTake(gTX_sem, portMAX_DELAY);
-
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-            gRefillChannel[i] = false;
-            gDoneChannel[i] = false;
-        }
 
 #if FASTLED_ESP32_FLASH_LOCK == 1
         // -- Make sure no flash operations happen right now
@@ -163,9 +157,8 @@ void ESP32RMTController::showPixels()
     if (gNumStarted == gNumControllers) {
         gNext = 0;
 
-#if FASTLED_ESP32_SHOWTIMING == 1
-        gTimeIndex = 0;
-#endif
+        // -- This Take always succeeds immediately
+        xSemaphoreTake(gTX_sem, portMAX_DELAY);
 
         // -- First, fill all the available channels
         int channel = 0;
@@ -175,40 +168,25 @@ void ESP32RMTController::showPixels()
         }
 
         // -- Make sure it's been at least 50us since last show
-        mWait.wait();
+        gWait.wait();
 
         // -- Start them all
+        /* This turns out to be a bad idea. We don't want all of the interrupts
+           coming in at the same time.
         for (int i = 0; i < channel; i++) {
             ESP32RMTController * pController = gControllers[i];
             pController->tx_start();
-#if FASTLED_ESP32_SHOWTIMING == 1
-            gLastTime = __clock_cycles();
-#endif
         }
+        */
 
-        bool all_done = false;
-        do {
-            xSemaphoreTake(gTX_sem, portMAX_DELAY);
-
-            for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-                if (gRefillChannel[i]) {
-                    gOnChannel[i]->fillNext();
-                    gRefillChannel[i] = false;
-                }
-
-                if (gDoneChannel[i]) {
-                    doneOnChannel(rmt_channel_t(i), 0);
-                    if (gNumDone == gNumControllers) {
-                        all_done = true;
-                    }
-                    gDoneChannel[i] = false;
-                }
-            }
-
-            xSemaphoreGive(gTX_sem);
-        } while ( ! all_done);
+        // -- Wait here while the data is sent. The interrupt handler
+        //    will keep refilling the RMT buffers until it is all
+        //    done; then it gives the semaphore back.
+        xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        xSemaphoreGive(gTX_sem);
 
-        mWait.mark();
+        // -- Make sure we don't call showPixels too quickly
+        gWait.mark();
 
         // -- Reset the counters
         gNumStarted = 0;
@@ -219,17 +197,23 @@ void ESP32RMTController::showPixels()
         // -- Release the lock on flash operations
         spi_flash_op_unlock();
 #endif
-    }
 
 #if FASTLED_ESP32_SHOWTIMING == 1
-    for (int i = 0; i < gTimeIndex; i++) {
-        if (gTiming[i] > 10000) {
-            Serial.print(i);
-            Serial.print(" ");
-            Serial.println(gTiming[i]);
+        // uint32_t expected = (2080000L / (1000000000L/F_CPU));
+        for (int i = 0; i < gNumControllers; i++) {
+            if (gTooSlow[i] > 0) {
+                Serial.print("Channel ");
+                Serial.print(i);
+                Serial.print(" total time ");
+                Serial.print(gTotalTime[i]);
+                Serial.print(" too slow ");
+                Serial.print(gTooSlow[i]);
+                Serial.println();
+            }
         }
-    }
 #endif
+
+    }
 }
 
 // -- Start up the next controller
@@ -267,17 +251,21 @@ void ESP32RMTController::startOnChannel(int channel)
         // -- Use our custom driver to send the data incrementally
 
         // -- Initialize the counters that keep track of where we are in
-        //    the pixel data.
-        mRMT_mem_ptr = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+        //    the pixel data and the RMT buffer
+        mRMT_mem_start = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+        mRMT_mem_ptr = mRMT_mem_start;
         mCur = 0;
         mWhichHalf = 0;
 
-        // -- Store 2 pixels worth of data (two "buffers" full)
+        // -- Fill both halves of the RMT buffer (a totaly of 64 bits of pixel data)
         fillNext();
         fillNext();
 
         // -- Turn on the interrupts
         rmt_set_tx_intr_en(mRMT_channel, true);
+
+        // -- Kick off the transmission
+        tx_start();
     }
 }
 
@@ -285,8 +273,13 @@ void ESP32RMTController::startOnChannel(int channel)
 //    Setting this RMT flag is what actually kicks off the peripheral
 void ESP32RMTController::tx_start()
 {
-    // dev->conf_ch[channel].conf1.tx_start = 1;
     rmt_tx_start(mRMT_channel, true);
+
+#if FASTLED_ESP32_SHOWTIMING == 1
+    gLastFill[mRMT_channel] = __clock_cycles();
+    gTooSlow[mRMT_channel] = 0;
+    gTotalTime[mRMT_channel] = 0;
+#endif
 }
 
 // -- A controller is done 
@@ -310,13 +303,16 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
         // -- If this is the last controller, signal that we are all done
         if (FASTLED_RMT_BUILTIN_DRIVER) {
             xSemaphoreGive(gTX_sem);
+        } else {
+            portBASE_TYPE HPTaskAwoken = 0;
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
         }
     } else {
         // -- Otherwise, if there are still controllers waiting, then
         //    start the next one on this channel
         if (gNext < gNumControllers) {
             startNext(channel);
-            pController->tx_start();
         }
     }
 }
@@ -327,17 +323,15 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
 //    next half of the RMT buffer with data.
 void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
 {
+#if FASTLED_ESP32_SHOWTIMING == 1
+    uint32_t now = __clock_cycles();
+#endif
+
     // -- The basic structure of this code is borrowed from the
     //    interrupt handler in esp-idf/components/driver/rmt.c
     uint32_t intr_st = RMT.int_st.val;
     uint8_t channel;
 
-#if FASTLED_ESP32_SHOWTIMING == 1
-    uint32_t curt = __clock_cycles();
-    gTiming[gTimeIndex++] = curt - gLastTime;
-    gLastTime = curt;
-#endif
-
     bool stuff_to_do = false;
     for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
         int tx_done_bit = channel * 3;
@@ -345,28 +339,32 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
 
         ESP32RMTController * pController = gOnChannel[channel];
         if (pController != NULL) {
-
-            // -- More to send on this channel
             if (intr_st & BIT(tx_next_bit)) {
+                // -- More to send on this channel
                 RMT.int_clr.val |= BIT(tx_next_bit);
-                gRefillChannel[channel] = true;
-                stuff_to_do = true;
+                pController->fillNext();
+
+#if FASTLED_ESP32_SHOWTIMING == 1
+                uint32_t delta = (now - gLastFill[channel]);
+                if (delta > C_NS(50500)) {
+                    gTooSlow[channel]++;
+                }
+                gTotalTime[channel] += delta;
+                gLastFill[channel] = now;
+#endif
             } else {
                 // -- Transmission is complete on this channel
                 if (intr_st & BIT(tx_done_bit)) {
                     RMT.int_clr.val |= BIT(tx_done_bit);
-                    gDoneChannel[channel] = true;
-                    stuff_to_do = true;
+#if FASTLED_ESP32_SHOWTIMING == 1
+                    uint32_t delta = (now - gLastFill[channel]);
+                    gTotalTime[channel] += delta;
+#endif
+                    doneOnChannel(rmt_channel_t(channel), 0);
                 }
             }
         }
     }
-
-    if (stuff_to_do) {
-        portBASE_TYPE HPTaskAwoken = 0;
-        xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-        if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-    }
 }
 
 // -- Fill RMT buffer
@@ -377,18 +375,15 @@ void IRAM_ATTR ESP32RMTController::fillNext()
 {
     if (mCur < mSize) {
         // -- Get the zero and one values into local variables
-        uint32_t one_val = mOne.val;
-        uint32_t zero_val = mZero.val;
-
-        // -- Fill 32 slots in the RMT memory
-        uint8_t a = mPixelData[mCur++];
-        uint8_t b = mPixelData[mCur++];
-        uint8_t c = mPixelData[mCur++];
-        uint8_t d = mPixelData[mCur++];
-        register uint32_t pixeldata = a << 24 | b << 16 | c << 8 | d;
+        register uint32_t one_val = mOne.val;
+        register uint32_t zero_val = mZero.val;
 
         // -- Use locals for speed
         volatile register uint32_t * pItem =  mRMT_mem_ptr;
+
+        // -- Get the next four bytes of pixel data
+        register uint32_t pixeldata = mPixelData[mCur];
+        mCur++;
             
         // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
         // rmt_item32_t value corresponding to the buffered bit value
@@ -402,7 +397,7 @@ void IRAM_ATTR ESP32RMTController::fillNext()
         // -- Flip to the other half, resetting the pointer if necessary
         mWhichHalf++;
         if (mWhichHalf == 2) {
-            pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
+            pItem = mRMT_mem_start;
             mWhichHalf = 0;
         }
 
diff --git a/src/platforms/esp/32/clockless_rmt_esp32.h b/src/platforms/esp/32/clockless_rmt_esp32.h
index 2f02ac8b9e..604bd3d184 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.h
+++ b/src/platforms/esp/32/clockless_rmt_esp32.h
@@ -193,12 +193,13 @@ class ESP32RMTController
     rmt_item32_t   mOne;
 
     // -- Pixel data
-    uint8_t *      mPixelData;
+    uint32_t *     mPixelData;
     int            mSize;
     int            mCur;
 
     // -- RMT memory
     volatile uint32_t * mRMT_mem_ptr;
+    volatile uint32_t * mRMT_mem_start;
     int                 mWhichHalf;
 
     // -- Buffer to hold all of the pulses. For the version that uses
@@ -207,9 +208,6 @@ class ESP32RMTController
     uint16_t       mBufferSize;
     int            mCurPulse;
 
-    // -- Make sure we can't call show() too quickly
-    CMinWait<50>   mWait;
-
 public:
 
     // -- Constructor
@@ -217,8 +215,8 @@ class ESP32RMTController
     //    member variables.
     ESP32RMTController(int DATA_PIN, int T1, int T2, int T3);
 
-    // -- Getters and setters for use in ClocklessController
-    uint8_t * getPixelData(int size_in_bytes);
+    // -- Get or create the pixel data buffer
+    uint32_t * getPixelBuffer(int size_in_bytes);
 
     // -- Initialize RMT subsystem
     //    This only needs to be done once
@@ -303,25 +301,49 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     //    This method loads all of the pixel data into a separate buffer for use by
     //    by the RMT driver. Copying does two important jobs: it fixes the color
     //    order for the pixels, and it performs the scaling/adjusting ahead of time.
+    //    It also packs the bytes into 32 bit chunks with the right bit order.
     void loadPixelData(PixelController<RGB_ORDER> & pixels)
     {
         // -- Make sure the buffer is allocated
-        int size = pixels.size() * 3;
-        uint8_t * pData = mRMTController.getPixelData(size);
+        int size_in_bytes = pixels.size() * 3;
+        uint32_t * pData = mRMTController.getPixelBuffer(size_in_bytes);
 
         // -- Read out the pixel data using the pixel controller methods that
         //    perform the scaling and adjustments 
         int count = 0;
+        int which = 0;
         while (pixels.has(1)) {
-            *pData++ = pixels.loadAndScale0();
-            *pData++ = pixels.loadAndScale1();
-            *pData++ = pixels.loadAndScale2();
-            pixels.advanceData();
-            pixels.stepDithering();
-            count += 3;
+            // -- Get the next four bytes of data
+            uint8_t four[4] = {0,0,0,0};
+            for (int i = 0; i < 4; i++) {
+                switch (which) {
+                case 0: 
+                    four[i] = pixels.loadAndScale0();
+                    break;
+                case 1:
+                    four[i] = pixels.loadAndScale1();
+                    break;
+                case 2:
+                    four[i] = pixels.loadAndScale2();
+                    pixels.advanceData();
+                    pixels.stepDithering();
+                    break;
+                }
+                // -- Move to the next color
+                which++;
+                if (which > 2) which = 0;
+
+                // -- Stop if there's no more data
+                if ( ! pixels.has(1)) break;
+            }
+
+            // -- Pack the four bytes into a 32-bit value with the right bit order
+            uint8_t a = four[0];
+            uint8_t b = four[1];
+            uint8_t c = four[2];
+            uint8_t d = four[3];
+            pData[count++] = a << 24 | b << 16 | c << 8 | d;
         }
-
-        assert(count == size);
     }
 
     // -- Show pixels

From c8bd7832d030427bb6059a5bcc10ded3b9eeaa79 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 21 Aug 2020 22:23:45 -0400
Subject: [PATCH 182/204] Added logic to time the interval between buffer
 fills, and bail out if the interval is more than +50% of what is expected

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 25 +++++++++++++++-----
 src/platforms/esp/32/clockless_rmt_esp32.h   |  8 +++++++
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 20d2b217ca..907e37bfb8 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -32,9 +32,13 @@ CMinWait<50>   gWait;
 
 static bool gInitialized = false;
 
+// -- Bailout metric
+//    Keep track of how long it's been between buffer fills, and
+//    bail out on showing them if there's a big gap
+static uint32_t gLastFill[8];
+
 // -- SZG: For debugging purposes
 #if FASTLED_ESP32_SHOWTIMING == 1
-static uint32_t gLastFill[8];
 static int gTooSlow[8];
 static uint32_t gTotalTime[8];
 #endif
@@ -67,6 +71,13 @@ ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
     gControllers[gNumControllers] = this;
     gNumControllers++;
 
+    // -- Expected number of CPU cycles between buffer fills
+    mCyclesPerFill = (T1 + T2 + T3) * PULSES_PER_FILL;
+
+    // -- If there is ever an interval greater than 1.5 times
+    //    the expected time, then bail out.
+    mMaxCyclesPerFill = mCyclesPerFill + mCyclesPerFill/2;
+
     mPin = gpio_num_t(DATA_PIN);
 }
 
@@ -274,9 +285,9 @@ void ESP32RMTController::startOnChannel(int channel)
 void ESP32RMTController::tx_start()
 {
     rmt_tx_start(mRMT_channel, true);
+    gLastFill[mRMT_channel] = __clock_cycles();
 
 #if FASTLED_ESP32_SHOWTIMING == 1
-    gLastFill[mRMT_channel] = __clock_cycles();
     gTooSlow[mRMT_channel] = 0;
     gTotalTime[mRMT_channel] = 0;
 #endif
@@ -323,9 +334,7 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
 //    next half of the RMT buffer with data.
 void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
 {
-#if FASTLED_ESP32_SHOWTIMING == 1
     uint32_t now = __clock_cycles();
-#endif
 
     // -- The basic structure of this code is borrowed from the
     //    interrupt handler in esp-idf/components/driver/rmt.c
@@ -342,10 +351,14 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
             if (intr_st & BIT(tx_next_bit)) {
                 // -- More to send on this channel
                 RMT.int_clr.val |= BIT(tx_next_bit);
-                pController->fillNext();
+                uint32_t delta = (now - gLastFill[channel]);
+                if (delta > pController->getMaxCyclesPerFill()) {
+                    doneOnChannel(rmt_channel_t(channel), 0);
+                } else {
+                    pController->fillNext();
+                }
 
 #if FASTLED_ESP32_SHOWTIMING == 1
-                uint32_t delta = (now - gLastFill[channel]);
                 if (delta > C_NS(50500)) {
                     gTooSlow[channel]++;
                 }
diff --git a/src/platforms/esp/32/clockless_rmt_esp32.h b/src/platforms/esp/32/clockless_rmt_esp32.h
index 604bd3d184..e8ee0f9ce2 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.h
+++ b/src/platforms/esp/32/clockless_rmt_esp32.h
@@ -192,6 +192,11 @@ class ESP32RMTController
     rmt_item32_t   mZero;
     rmt_item32_t   mOne;
 
+    // -- Total expected time to send 32 bits
+    //    Each strip should get an interrupt roughly at this interval
+    uint32_t       mCyclesPerFill;
+    uint32_t       mMaxCyclesPerFill;
+
     // -- Pixel data
     uint32_t *     mPixelData;
     int            mSize;
@@ -215,6 +220,9 @@ class ESP32RMTController
     //    member variables.
     ESP32RMTController(int DATA_PIN, int T1, int T2, int T3);
 
+    // -- Get max cycles per fill
+    uint32_t IRAM_ATTR getMaxCyclesPerFill() const { return mMaxCyclesPerFill; }
+
     // -- Get or create the pixel data buffer
     uint32_t * getPixelBuffer(int size_in_bytes);
 

From 99d8256d27ea61301810adabd7dab88b45cc5352 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 21 Aug 2020 23:11:36 -0400
Subject: [PATCH 183/204] Better version (maybe) of the bailout code. This
 version does print a message when it bails, which can occasionally crash the
 processor. Comment it out to get better behavior

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 40 ++++++++++++--------
 src/platforms/esp/32/clockless_rmt_esp32.h   |  3 +-
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 907e37bfb8..5424eb4efd 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -32,13 +32,9 @@ CMinWait<50>   gWait;
 
 static bool gInitialized = false;
 
-// -- Bailout metric
-//    Keep track of how long it's been between buffer fills, and
-//    bail out on showing them if there's a big gap
-static uint32_t gLastFill[8];
-
 // -- SZG: For debugging purposes
 #if FASTLED_ESP32_SHOWTIMING == 1
+static uint32_t gLastFill[8];
 static int gTooSlow[8];
 static uint32_t gTotalTime[8];
 #endif
@@ -269,8 +265,8 @@ void ESP32RMTController::startOnChannel(int channel)
         mWhichHalf = 0;
 
         // -- Fill both halves of the RMT buffer (a totaly of 64 bits of pixel data)
-        fillNext();
-        fillNext();
+        fillNext(false);
+        fillNext(false);
 
         // -- Turn on the interrupts
         rmt_set_tx_intr_en(mRMT_channel, true);
@@ -285,9 +281,10 @@ void ESP32RMTController::startOnChannel(int channel)
 void ESP32RMTController::tx_start()
 {
     rmt_tx_start(mRMT_channel, true);
-    gLastFill[mRMT_channel] = __clock_cycles();
+    mLastFill = __clock_cycles();
 
 #if FASTLED_ESP32_SHOWTIMING == 1
+    gLastFill[mRMT_channel] = __clock_cycles();
     gTooSlow[mRMT_channel] = 0;
     gTotalTime[mRMT_channel] = 0;
 #endif
@@ -334,7 +331,9 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
 //    next half of the RMT buffer with data.
 void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
 {
+#if FASTLED_ESP32_SHOWTIMING == 1
     uint32_t now = __clock_cycles();
+#endif
 
     // -- The basic structure of this code is borrowed from the
     //    interrupt handler in esp-idf/components/driver/rmt.c
@@ -351,14 +350,10 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
             if (intr_st & BIT(tx_next_bit)) {
                 // -- More to send on this channel
                 RMT.int_clr.val |= BIT(tx_next_bit);
-                uint32_t delta = (now - gLastFill[channel]);
-                if (delta > pController->getMaxCyclesPerFill()) {
-                    doneOnChannel(rmt_channel_t(channel), 0);
-                } else {
-                    pController->fillNext();
-                }
+                pController->fillNext(true);
 
 #if FASTLED_ESP32_SHOWTIMING == 1
+                uint32_t delta = (now - gLastFill[channel]);
                 if (delta > C_NS(50500)) {
                     gTooSlow[channel]++;
                 }
@@ -384,8 +379,23 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
 //    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
 //    Each data bit is represented by a 32-bit RMT item that specifies how
 //    long to hold the signal high, followed by how long to hold it low.
-void IRAM_ATTR ESP32RMTController::fillNext()
+void IRAM_ATTR ESP32RMTController::fillNext(bool check_time)
 {
+    uint32_t now = __clock_cycles();
+    if (check_time) {
+        if (now > mLastFill) {
+            uint32_t delta = (now - mLastFill);
+            if (delta > mMaxCyclesPerFill) {
+                Serial.print(delta);
+                Serial.print(" BAIL ");
+                Serial.println(mCur);
+                mCur = mSize;
+                rmt_tx_stop(mRMT_channel);
+            }
+        }
+    }
+    mLastFill = now;
+
     if (mCur < mSize) {
         // -- Get the zero and one values into local variables
         register uint32_t one_val = mOne.val;
diff --git a/src/platforms/esp/32/clockless_rmt_esp32.h b/src/platforms/esp/32/clockless_rmt_esp32.h
index e8ee0f9ce2..f6842278d7 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.h
+++ b/src/platforms/esp/32/clockless_rmt_esp32.h
@@ -196,6 +196,7 @@ class ESP32RMTController
     //    Each strip should get an interrupt roughly at this interval
     uint32_t       mCyclesPerFill;
     uint32_t       mMaxCyclesPerFill;
+    uint32_t       mLastFill;
 
     // -- Pixel data
     uint32_t *     mPixelData;
@@ -266,7 +267,7 @@ class ESP32RMTController
     //    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
     //    Each data bit is represented by a 32-bit RMT item that specifies how
     //    long to hold the signal high, followed by how long to hold it low.
-    void IRAM_ATTR fillNext();
+    void IRAM_ATTR fillNext(bool check_time);
 
     // -- Init pulse buffer
     //    Set up the buffer that will hold all of the pulse items for this

From f6553c25aa26b5c535b8ffc17ac2a6cb7fe10da3 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Fri, 21 Aug 2020 23:17:51 -0400
Subject: [PATCH 184/204] Probably better not to print anything

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 5424eb4efd..f974075e9a 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -386,11 +386,11 @@ void IRAM_ATTR ESP32RMTController::fillNext(bool check_time)
         if (now > mLastFill) {
             uint32_t delta = (now - mLastFill);
             if (delta > mMaxCyclesPerFill) {
-                Serial.print(delta);
-                Serial.print(" BAIL ");
-                Serial.println(mCur);
+                //Serial.print(delta);
+                //Serial.print(" BAIL ");
+                //Serial.println(mCur);
                 mCur = mSize;
-                rmt_tx_stop(mRMT_channel);
+                //rmt_tx_stop(mRMT_channel);
             }
         }
     }

From 51683f97738ea10e40ab5df0d728da3ee13bb89c Mon Sep 17 00:00:00 2001
From: mwhch <manuel.weiss@protonmail.ch>
Date: Sat, 12 Sep 2020 04:32:16 +0200
Subject: [PATCH 185/204] implemented the option to place the matrix vertically

---
 examples/XYMatrix/XYMatrix.ino | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/examples/XYMatrix/XYMatrix.ino b/examples/XYMatrix/XYMatrix.ino
index 53c21411f7..010ffe7cf1 100644
--- a/examples/XYMatrix/XYMatrix.ino
+++ b/examples/XYMatrix/XYMatrix.ino
@@ -30,6 +30,7 @@ const uint8_t kMatrixHeight = 16;
 
 // Param for different pixel layouts
 const bool    kMatrixSerpentineLayout = true;
+const bool    kMatrixVertical = false;
 // Set 'kMatrixSerpentineLayout' to false if your pixels are 
 // laid out all running the same way, like this:
 //
@@ -88,17 +89,29 @@ uint16_t XY( uint8_t x, uint8_t y)
   uint16_t i;
   
   if( kMatrixSerpentineLayout == false) {
-    i = (y * kMatrixWidth) + x;
+    if (kMatrixVertical == false) {
+      i = (y * kMatrixWidth) + x;
+    } else {
+      i = kMatrixHeight * (kMatrixWidth - (x+1))+y;
+    }
   }
 
   if( kMatrixSerpentineLayout == true) {
-    if( y & 0x01) {
-      // Odd rows run backwards
-      uint8_t reverseX = (kMatrixWidth - 1) - x;
-      i = (y * kMatrixWidth) + reverseX;
-    } else {
-      // Even rows run forwards
-      i = (y * kMatrixWidth) + x;
+    if (kMatrixVertical == false) {
+      if( y & 0x01) {
+        // Odd rows run backwards
+        uint8_t reverseX = (kMatrixWidth - 1) - x;
+        i = (y * kMatrixWidth) + reverseX;
+      } else {
+        // Even rows run forwards
+        i = (y * kMatrixWidth) + x;
+      }
+    } else { // vertical positioning
+      if ( x & 0x01) {
+        i = kMatrixHeight * (kMatrixWidth - (x+1))+y;
+      } else {
+        i = kMatrixHeight * (kMatrixWidth - x) - (y+1);
+      }
     }
   }
   

From 9a59e75b733bd4294d6c2ed01e6383c02cd772f1 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Sun, 13 Sep 2020 22:31:33 -0400
Subject: [PATCH 186/204] Fix for WiFi interrupt problems (due to Brian
 Bulkowski): use two RMT memory blocks for each strip (a total of 128 bits),
 allowing the RMT to run longer before needing a refill, which makes it less
 sensitive to interrupts from other sources

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 129 ++++++-------------
 src/platforms/esp/32/clockless_rmt_esp32.h   |  23 +++-
 2 files changed, 58 insertions(+), 94 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index f974075e9a..31188474d7 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -32,13 +32,6 @@ CMinWait<50>   gWait;
 
 static bool gInitialized = false;
 
-// -- SZG: For debugging purposes
-#if FASTLED_ESP32_SHOWTIMING == 1
-static uint32_t gLastFill[8];
-static int gTooSlow[8];
-static uint32_t gTotalTime[8];
-#endif
-
 ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
     : mPixelData(0), 
       mSize(0), 
@@ -103,7 +96,7 @@ void ESP32RMTController::init()
         rmt_tx.channel = rmt_channel_t(i);
         rmt_tx.rmt_mode = RMT_MODE_TX;
         rmt_tx.gpio_num = gpio_num_t(0);  // The particular pin will be assigned later
-        rmt_tx.mem_block_num = 1;
+        rmt_tx.mem_block_num = FASTLED_RMT_MEM_BLOCKS;
         rmt_tx.clk_div = DIVIDER;
         rmt_tx.tx_config.loop_en = false;
         rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
@@ -171,21 +164,14 @@ void ESP32RMTController::showPixels()
         int channel = 0;
         while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
             ESP32RMTController::startNext(channel);
-            channel++;
+            // -- Important: when we use more than one memory block, we need to
+            //    skip the channels that would otherwise overlap in memory.
+            channel += FASTLED_RMT_MEM_BLOCKS;
         }
 
         // -- Make sure it's been at least 50us since last show
         gWait.wait();
 
-        // -- Start them all
-        /* This turns out to be a bad idea. We don't want all of the interrupts
-           coming in at the same time.
-        for (int i = 0; i < channel; i++) {
-            ESP32RMTController * pController = gControllers[i];
-            pController->tx_start();
-        }
-        */
-
         // -- Wait here while the data is sent. The interrupt handler
         //    will keep refilling the RMT buffers until it is all
         //    done; then it gives the semaphore back.
@@ -205,21 +191,6 @@ void ESP32RMTController::showPixels()
         spi_flash_op_unlock();
 #endif
 
-#if FASTLED_ESP32_SHOWTIMING == 1
-        // uint32_t expected = (2080000L / (1000000000L/F_CPU));
-        for (int i = 0; i < gNumControllers; i++) {
-            if (gTooSlow[i] > 0) {
-                Serial.print("Channel ");
-                Serial.print(i);
-                Serial.print(" total time ");
-                Serial.print(gTotalTime[i]);
-                Serial.print(" too slow ");
-                Serial.print(gTooSlow[i]);
-                Serial.println();
-            }
-        }
-#endif
-
     }
 }
 
@@ -282,12 +253,6 @@ void ESP32RMTController::tx_start()
 {
     rmt_tx_start(mRMT_channel, true);
     mLastFill = __clock_cycles();
-
-#if FASTLED_ESP32_SHOWTIMING == 1
-    gLastFill[mRMT_channel] = __clock_cycles();
-    gTooSlow[mRMT_channel] = 0;
-    gTotalTime[mRMT_channel] = 0;
-#endif
 }
 
 // -- A controller is done 
@@ -331,10 +296,6 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
 //    next half of the RMT buffer with data.
 void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
 {
-#if FASTLED_ESP32_SHOWTIMING == 1
-    uint32_t now = __clock_cycles();
-#endif
-
     // -- The basic structure of this code is borrowed from the
     //    interrupt handler in esp-idf/components/driver/rmt.c
     uint32_t intr_st = RMT.int_st.val;
@@ -351,23 +312,10 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
                 // -- More to send on this channel
                 RMT.int_clr.val |= BIT(tx_next_bit);
                 pController->fillNext(true);
-
-#if FASTLED_ESP32_SHOWTIMING == 1
-                uint32_t delta = (now - gLastFill[channel]);
-                if (delta > C_NS(50500)) {
-                    gTooSlow[channel]++;
-                }
-                gTotalTime[channel] += delta;
-                gLastFill[channel] = now;
-#endif
             } else {
                 // -- Transmission is complete on this channel
                 if (intr_st & BIT(tx_done_bit)) {
                     RMT.int_clr.val |= BIT(tx_done_bit);
-#if FASTLED_ESP32_SHOWTIMING == 1
-                    uint32_t delta = (now - gLastFill[channel]);
-                    gTotalTime[channel] += delta;
-#endif
                     doneOnChannel(rmt_channel_t(channel), 0);
                 }
             }
@@ -386,52 +334,55 @@ void IRAM_ATTR ESP32RMTController::fillNext(bool check_time)
         if (now > mLastFill) {
             uint32_t delta = (now - mLastFill);
             if (delta > mMaxCyclesPerFill) {
-                //Serial.print(delta);
-                //Serial.print(" BAIL ");
-                //Serial.println(mCur);
+                Serial.print(delta);
+                Serial.print(" BAIL ");
+                Serial.println(mCur);
                 mCur = mSize;
-                //rmt_tx_stop(mRMT_channel);
+                rmt_tx_stop(mRMT_channel);
             }
         }
     }
     mLastFill = now;
 
-    if (mCur < mSize) {
-        // -- Get the zero and one values into local variables
-        register uint32_t one_val = mOne.val;
-        register uint32_t zero_val = mZero.val;
+    // -- Get the zero and one values into local variables
+    register uint32_t one_val = mOne.val;
+    register uint32_t zero_val = mZero.val;
 
-        // -- Use locals for speed
-        volatile register uint32_t * pItem =  mRMT_mem_ptr;
+    // -- Use locals for speed
+    volatile register uint32_t * pItem =  mRMT_mem_ptr;
 
-        // -- Get the next four bytes of pixel data
-        register uint32_t pixeldata = mPixelData[mCur];
-        mCur++;
-            
-        // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-        // rmt_item32_t value corresponding to the buffered bit value
-        for (register uint32_t j = 0; j < PULSES_PER_FILL; j++) {
-            *pItem++ = (pixeldata & 0x80000000L) ? one_val : zero_val;
-            // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+    for (register int i = 0; i < PULSES_PER_FILL/32; i++) {
+        if (mCur < mSize) {
 
-            pixeldata <<= 1;
-        }
+            // -- Get the next four bytes of pixel data
+            register uint32_t pixeldata = mPixelData[mCur];
+            mCur++;
+            
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 32; j++) {
+                *pItem++ = (pixeldata & 0x80000000L) ? one_val : zero_val;
+                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
 
-        // -- Flip to the other half, resetting the pointer if necessary
-        mWhichHalf++;
-        if (mWhichHalf == 2) {
-            pItem = mRMT_mem_start;
-            mWhichHalf = 0;
+                pixeldata <<= 1;
+            }
+        } else {
+            // -- No more data; signal to the RMT we are done
+            for (uint32_t j = 0; j < 32; j++) {
+                * mRMT_mem_ptr++ = 0;
+            }
         }
+    }
 
-        // -- Store the new pointer back into the object
-        mRMT_mem_ptr = pItem;
-    } else {
-        // -- No more data; signal to the RMT we are done
-        for (uint32_t j = 0; j < PULSES_PER_FILL; j++) {
-            * mRMT_mem_ptr++ = 0;
-        }
+    // -- Flip to the other half, resetting the pointer if necessary
+    mWhichHalf++;
+    if (mWhichHalf == 2) {
+        pItem = mRMT_mem_start;
+        mWhichHalf = 0;
     }
+
+    // -- Store the new pointer back into the object
+    mRMT_mem_ptr = pItem;
 }
 
 // -- Init pulse buffer
diff --git a/src/platforms/esp/32/clockless_rmt_esp32.h b/src/platforms/esp/32/clockless_rmt_esp32.h
index f6842278d7..b4e7be2591 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.h
+++ b/src/platforms/esp/32/clockless_rmt_esp32.h
@@ -147,9 +147,20 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 //#endif
 
 // -- Configuration constants
-#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
-#define MAX_PULSES         64 /* A channel has a 64 "pulse" buffer */
-#define PULSES_PER_FILL    32 /* Half of the channel buffer */
+#define DIVIDER       2 /* 4, 8 still seem to work, but timings become marginal */
+
+// -- RMT memory configuration
+//    By default we use two memory blocks for each RMT channel instead of 1. The
+//    reason is that one memory block is only 64 bits, which causes the refill
+//    interrupt to fire too often. When combined with WiFi, this leads to conflicts
+//    between interrupts and weird flashy effects on the LEDs. Special thanks to
+//    Brian Bulkowski for finding this problem and developing a fix.
+#ifndef FASTLED_RMT_MEM_BLOCKS
+#define FASTLED_RMT_MEM_BLOCKS 2
+#endif
+
+#define MAX_PULSES         (64 * FASTLED_RMT_MEM_BLOCKS) /* One block has a 64 "pulse" buffer */
+#define PULSES_PER_FILL    (MAX_PULSES / 2)              /* Half of the channel buffer */
 
 // -- Convert ESP32 CPU cycles to RMT device cycles, taking into account the divider
 #define F_CPU_RMT                   (  80000000L)
@@ -172,10 +183,10 @@ __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
 #define FASTLED_RMT_MAX_CONTROLLERS 32
 #endif
 
-// -- Number of RMT channels to use (up to 8)
+// -- Number of RMT channels to use (up to 8, but 4 by default)
 //    Redefine this value to 1 to force serial output
 #ifndef FASTLED_RMT_MAX_CHANNELS
-#define FASTLED_RMT_MAX_CHANNELS 8
+#define FASTLED_RMT_MAX_CHANNELS (8/FASTLED_RMT_MEM_BLOCKS)
 #endif
 
 class ESP32RMTController
@@ -267,6 +278,8 @@ class ESP32RMTController
     //    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
     //    Each data bit is represented by a 32-bit RMT item that specifies how
     //    long to hold the signal high, followed by how long to hold it low.
+    //    NOTE: Now the default is to use 128-bit buffers, so half a buffer is
+    //          is 64 bits. See FASTLED_RMT_MEM_BLOCKS
     void IRAM_ATTR fillNext(bool check_time);
 
     // -- Init pulse buffer

From aa7577446c6bcd8777e021c0c853180b8f24babb Mon Sep 17 00:00:00 2001
From: Skyler Fly-Wilson <magic@skylerfly.com>
Date: Sat, 26 Sep 2020 20:24:50 -0600
Subject: [PATCH 187/204] Update fastpin_apollo3.h

Add support for Artemis Dev Kit
Still only supporting the Artemis Arduino core v1.2.1
---
 platforms/apollo3/fastpin_apollo3.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/platforms/apollo3/fastpin_apollo3.h b/platforms/apollo3/fastpin_apollo3.h
index 28e5b9670d..6d0f1e60be 100644
--- a/platforms/apollo3/fastpin_apollo3.h
+++ b/platforms/apollo3/fastpin_apollo3.h
@@ -125,6 +125,21 @@ _FL_DEFPIN(45, 45); _FL_DEFPIN(47, 47); _FL_DEFPIN(48, 48); _FL_DEFPIN(49, 49);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+#elif defined(ARDUINO_AM_AP3_SFE_ARTEMIS_DK)
+
+#define MAX_PIN 49
+_FL_DEFPIN(0, 0); _FL_DEFPIN(1, 1); _FL_DEFPIN(2, 2); _FL_DEFPIN(3, 3); _FL_DEFPIN(4, 4);
+_FL_DEFPIN(5, 5); _FL_DEFPIN(6, 6); _FL_DEFPIN(7, 7); _FL_DEFPIN(8, 8); _FL_DEFPIN(9, 9);
+_FL_DEFPIN(10, 10); _FL_DEFPIN(11, 11); _FL_DEFPIN(12, 12); _FL_DEFPIN(13, 13); _FL_DEFPIN(14, 14);
+_FL_DEFPIN(15, 15); _FL_DEFPIN(16, 16); _FL_DEFPIN(17, 17); _FL_DEFPIN(18, 18); _FL_DEFPIN(19, 19);
+_FL_DEFPIN(20, 20); _FL_DEFPIN(21, 21); _FL_DEFPIN(22, 22); _FL_DEFPIN(23, 23); _FL_DEFPIN(24, 24);
+_FL_DEFPIN(25, 25); _FL_DEFPIN(26, 26); _FL_DEFPIN(27, 27); _FL_DEFPIN(28, 28); _FL_DEFPIN(29, 29);
+_FL_DEFPIN(31, 31); _FL_DEFPIN(32, 32); _FL_DEFPIN(33, 33); _FL_DEFPIN(34, 34);
+_FL_DEFPIN(35, 35); _FL_DEFPIN(36, 36); _FL_DEFPIN(37, 37); _FL_DEFPIN(38, 38); _FL_DEFPIN(39, 39);
+_FL_DEFPIN(40, 40); _FL_DEFPIN(41, 41); _FL_DEFPIN(42, 42); _FL_DEFPIN(43, 43); _FL_DEFPIN(44, 44);
+_FL_DEFPIN(45, 45); _FL_DEFPIN(47, 47); _FL_DEFPIN(48, 48); _FL_DEFPIN(49, 49);
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
 #else
 
 #error "Unrecognised APOLLO3 board!"

From 5619c993537c17acb4acf17ce5b0f93efeed9bef Mon Sep 17 00:00:00 2001
From: Wirehead <wh-github@wirewd.com>
Date: Sun, 27 Sep 2020 18:18:43 -0700
Subject: [PATCH 188/204] Add support for non-P ATMega1284 device

The only difference between an ATMega1284P and a regular
ATMega1284 is that the P is the picopower version that
uses less power, so all one needs to do is add one more
define and it works.
---
 platforms/avr/fastpin_avr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/avr/fastpin_avr.h b/platforms/avr/fastpin_avr.h
index 956e00a9d5..22ee67920d 100644
--- a/platforms/avr/fastpin_avr.h
+++ b/platforms/avr/fastpin_avr.h
@@ -215,7 +215,7 @@ _FL_DEFPIN(16, 2, C); _FL_DEFPIN(17, 3, C); _FL_DEFPIN(18, 4, C); _FL_DEFPIN(19,
 #define SPI_UART0_CLOCK 4
 #endif
 
-#elif defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__) || defined(__AVR_ATmega32__) || defined(__AVR_ATmega16__)
+#elif defined(__AVR_ATmega1284__) || defined(__AVR_ATmega1284P__) || defined(__AVR_ATmega644P__) || defined(__AVR_ATmega32__) || defined(__AVR_ATmega16__)
 
 #define MAX_PIN 31
 _FL_DEFPIN(0, 0, B); _FL_DEFPIN(1, 1, B); _FL_DEFPIN(2, 2, B); _FL_DEFPIN(3, 3, B);

From f3bc5d077c5165b5036167f1f8938e77735f4d2b Mon Sep 17 00:00:00 2001
From: lady ada <limor@ladyada.net>
Date: Mon, 12 Oct 2020 13:11:25 -0400
Subject: [PATCH 189/204] add qt py support!

---
 platforms/arm/d21/fastpin_arm_d21.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/platforms/arm/d21/fastpin_arm_d21.h b/platforms/arm/d21/fastpin_arm_d21.h
index 9f9ef869b4..f3fa79cb60 100644
--- a/platforms/arm/d21/fastpin_arm_d21.h
+++ b/platforms/arm/d21/fastpin_arm_d21.h
@@ -215,6 +215,19 @@ _FL_DEFPIN( 3, 7, 0); _FL_DEFPIN( 4, 6, 0); _FL_DEFPIN( 7, 0, 0); _FL_DEFPIN( 8,
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+#elif defined(ADAFRUIT_QTPY_M0)
+
+#define MAX_PIN 10
+_FL_DEFPIN( 0, 2, 0); _FL_DEFPIN( 1, 3, 0); _FL_DEFPIN( 2, 4, 0); _FL_DEFPIN( 3, 5, 0);
+_FL_DEFPIN( 4, 16, 0); _FL_DEFPIN( 5, 17, 0); _FL_DEFPIN( 6, 6, 0); _FL_DEFPIN( 7, 7, 0);
+_FL_DEFPIN( 8, 11, 0); _FL_DEFPIN( 9, 9, 0); _FL_DEFPIN( 10, 10, 0);
+
+#define SPI_DATA  10
+#define SPI_CLOCK 8
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
+
 #elif defined(ADAFRUIT_ITSYBITSY_M0)
 
 #define MAX_PIN 16

From 45845fd5ae74675ec785ad748f43f51784830d8b Mon Sep 17 00:00:00 2001
From: thecthebest <59971979+thecthebest@users.noreply.github.com>
Date: Sat, 17 Oct 2020 20:33:38 +0100
Subject: [PATCH 190/204] fixed the typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0069329eb8..18039087aa 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ This is a library for easily & efficiently controlling a wide variety of LED chi
 sold by adafruit (Neopixel, DotStar, LPD8806), Sparkfun (WS2801), and aliexpress.  In addition to writing to the
 leds, this library also includes a number of functions for high-performing 8bit math for manipulating
 your RGB values, as well as low level classes for abstracting out access to pins and SPI hardware, while
-still keeping things as fast as possible.  Tested with Arduino up to 1.6.5 from arduino.cc.
+still keeping things as fast as possible. Tested with Arduino up to 1.6.5 from arduino.cc.
 
 Quick note for people installing from GitHub repo zips, rename the folder FastLED before copying it to your Arduino/libraries folder.  Github likes putting -branchname into the name of the folder, which unfortunately, makes Arduino cranky!
 

From 44c6f212e38ddf669b6f6e5ba4bda0db651064a3 Mon Sep 17 00:00:00 2001
From: root <akiraneko@gmail.com>
Date: Mon, 26 Oct 2020 10:13:02 +0900
Subject: [PATCH 191/204] Fix unused variable t1

---
 platforms/esp/32/clockless_rmt_esp32.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index 82a1b3b61a..1b83018f94 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -538,8 +538,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     void IRAM_ATTR fillNext()
     {
         if (mPixels->has(1)) {
-            uint32_t t1 = __clock_cycles();
-            
             uint32_t one_val = mOne.val;
             uint32_t zero_val = mZero.val;
 

From 0ec93f9a7ffdcc4f1c92396e6e24b591d164284d Mon Sep 17 00:00:00 2001
From: arduino12 <arduino12@users.noreply.github.com>
Date: Mon, 26 Oct 2020 12:27:00 +0200
Subject: [PATCH 192/204] Fix FastPin GPIO16 MASK for ESP8266

GPIO16 MASK is 1 not 1<<16 !
---
 platforms/esp/8266/fastpin_esp8266.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/platforms/esp/8266/fastpin_esp8266.h b/platforms/esp/8266/fastpin_esp8266.h
index 4d6cbaafc1..f42ec38a17 100644
--- a/platforms/esp/8266/fastpin_esp8266.h
+++ b/platforms/esp/8266/fastpin_esp8266.h
@@ -41,7 +41,7 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
     inline static bool isset() __attribute__ ((always_inline)) { return (PIN < 16) ? (GPO & MASK) : (GP16O & MASK); }
 };
 
-#define _FL_DEFPIN(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0xFF))> {};
+#define _FL_DEFPIN(PIN, REAL_PIN) template<> class FastPin<PIN> : public _ESPPIN<REAL_PIN, (1<<(REAL_PIN & 0x0F))> {};
 
 
 #ifdef FASTLED_ESP8266_RAW_PIN_ORDER

From 438b5c867c585d87efaacc135d8a427a94c57519 Mon Sep 17 00:00:00 2001
From: arduino12 <arduino12@users.noreply.github.com>
Date: Mon, 26 Oct 2020 12:29:17 +0200
Subject: [PATCH 193/204] Improve FastPin GPIO16 speed for ESP8266

GP16O us too slow for clockless LEDs when using |= and &=,
So a direct write to the whole output register does the job!
---
 platforms/esp/8266/fastpin_esp8266.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/platforms/esp/8266/fastpin_esp8266.h b/platforms/esp/8266/fastpin_esp8266.h
index f42ec38a17..d64119f95a 100644
--- a/platforms/esp/8266/fastpin_esp8266.h
+++ b/platforms/esp/8266/fastpin_esp8266.h
@@ -19,8 +19,8 @@ template<uint8_t PIN, uint32_t MASK> class _ESPPIN {
     inline static void setOutput() { pinMode(PIN, OUTPUT); }
     inline static void setInput() { pinMode(PIN, INPUT); }
 
-    inline static void hi() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOS = MASK; } else { GP16O |= MASK; } }
-    inline static void lo() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOC = MASK; } else { GP16O &= ~MASK; } }
+    inline static void hi() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOS = MASK; } else { GP16O = 1; } }
+    inline static void lo() __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPOC = MASK; } else { GP16O = 0; } }
     inline static void set(register port_t val) __attribute__ ((always_inline)) { if(PIN < 16) { _GPB._GPO = val; } else { GP16O = val; }}
 
     inline static void strobe() __attribute__ ((always_inline)) { toggle(); toggle(); }

From 2877f927ec76825add987844a12990aba816df86 Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Sat, 11 Apr 2020 02:37:58 +0200
Subject: [PATCH 194/204] use prefix notation for ++ and -- where possible

---
 FastLED.cpp                                   |   2 +-
 bitswap.h                                     |   4 +-
 colorutils.cpp                                | 106 +--
 colorutils.h                                  | 106 +--
 controller.h                                  |  16 +-
 examples/ColorPalette/ColorPalette.ino        |   4 +-
 hsv2rgb.cpp                                   |   6 +-
 lib8tion.cpp                                  |   4 +-
 lib8tion/trig8.h                              |   8 +-
 noise.cpp                                     |  36 +-
 pixeltypes.h                                  |  20 +-
 platforms.cpp                                 |   8 +-
 platforms/arm/common/m0clockless.h            | 608 +++++++-------
 platforms/arm/d51/clockless_arm_d51.h         |   2 +-
 platforms/arm/k20/clockless_arm_k20.h         |   2 +-
 platforms/arm/k20/clockless_block_arm_k20.h   |  10 +-
 platforms/arm/k20/fastspi_arm_k20.h           |   2 +-
 platforms/arm/k20/octows2811_controller.h     |  82 +-
 platforms/arm/k66/clockless_arm_k66.h         |   2 +-
 platforms/arm/k66/clockless_block_arm_k66.h   |  10 +-
 platforms/arm/k66/fastspi_arm_k66.h           |   2 +-
 .../mxrt1062/block_clockless_arm_mxrt1062.h   |  93 ++-
 .../arm/mxrt1062/clockless_arm_mxrt1062.h     |   2 +-
 platforms/arm/nrf52/clockless_arm_nrf52.h     |  82 +-
 platforms/arm/sam/clockless_arm_sam.h         |   2 +-
 platforms/arm/stm32/clockless_arm_stm32.h     |   2 +-
 platforms/esp/32/clockless_block_esp32.h      |  10 +-
 platforms/esp/32/clockless_esp32.h.orig       | 786 ++++++++++++++++++
 platforms/esp/32/clockless_i2s_esp32.h        |  46 +-
 platforms/esp/32/clockless_rmt_esp32.h        |  38 +-
 platforms/esp/8266/clockless_block_esp8266.h  |  10 +-
 platforms/esp/8266/clockless_esp8266.h        |  62 +-
 power_mgt.cpp                                 |   2 +-
 wiring.cpp                                    | 220 ++---
 34 files changed, 1593 insertions(+), 802 deletions(-)
 create mode 100644 platforms/esp/32/clockless_esp32.h.orig

diff --git a/FastLED.cpp b/FastLED.cpp
index b070e80847..255dcfa3e0 100644
--- a/FastLED.cpp
+++ b/FastLED.cpp
@@ -67,7 +67,7 @@ int CFastLED::count() {
     int x = 0;
 	CLEDController *pCur = CLEDController::head();
 	while( pCur) {
-        x++;
+        ++x;
 		pCur = pCur->next();
 	}
     return x;
diff --git a/bitswap.h b/bitswap.h
index 64fed49e3f..79eec540b2 100644
--- a/bitswap.h
+++ b/bitswap.h
@@ -126,7 +126,7 @@ __attribute__((always_inline)) inline void swapbits8(bitswap_type in, bitswap_ty
   // SWAPSB(b.c,1);
   // SWAPSB(b.d,0);
 
-  for(int i = 0; i < 8; i++) {
+  for(int i = 0; i < 8; ++i) {
     just8bits work;
     work.a3 = in.word[0] >> 31;
     work.a2 = in.word[0] >> 23;
@@ -145,7 +145,7 @@ __attribute__((always_inline)) inline void swapbits8(bitswap_type in, bitswap_ty
 /// Slow version of the 8 byte by 8 bit rotation
 __attribute__((always_inline)) inline void slowswap(unsigned char *A, unsigned char *B) {
 
-  for(int row = 0; row < 7; row++) {
+  for(int row = 0; row < 7; ++row) {
     uint8_t x = A[row];
 
     uint8_t bit = (1<<row);
diff --git a/colorutils.cpp b/colorutils.cpp
index 10d3592455..c40f486058 100644
--- a/colorutils.cpp
+++ b/colorutils.cpp
@@ -13,7 +13,7 @@ FASTLED_NAMESPACE_BEGIN
 void fill_solid( struct CRGB * leds, int numToFill,
                  const struct CRGB& color)
 {
-    for( int i = 0; i < numToFill; i++) {
+    for( int i = 0; i < numToFill; ++i) {
         leds[i] = color;
     }
 }
@@ -21,7 +21,7 @@ void fill_solid( struct CRGB * leds, int numToFill,
 void fill_solid( struct CHSV * targetArray, int numToFill,
                  const struct CHSV& hsvColor)
 {
-    for( int i = 0; i < numToFill; i++) {
+    for( int i = 0; i < numToFill; ++i) {
         targetArray[i] = hsvColor;
     }
 }
@@ -41,7 +41,7 @@ void fill_rainbow( struct CRGB * pFirstLED, int numToFill,
     hsv.hue = initialhue;
     hsv.val = 255;
     hsv.sat = 240;
-    for( int i = 0; i < numToFill; i++) {
+    for( int i = 0; i < numToFill; ++i) {
         pFirstLED[i] = hsv;
         hsv.hue += deltahue;
     }
@@ -55,7 +55,7 @@ void fill_rainbow( struct CHSV * targetArray, int numToFill,
     hsv.hue = initialhue;
     hsv.val = 255;
     hsv.sat = 240;
-    for( int i = 0; i < numToFill; i++) {
+    for( int i = 0; i < numToFill; ++i) {
         targetArray[i] = hsv;
         hsv.hue += deltahue;
     }
@@ -98,7 +98,7 @@ void fill_gradient_RGB( CRGB* leds,
     accum88 r88 = startcolor.r << 8;
     accum88 g88 = startcolor.g << 8;
     accum88 b88 = startcolor.b << 8;
-    for( uint16_t i = startpos; i <= endpos; i++) {
+    for( uint16_t i = startpos; i <= endpos; ++i) {
         leds[i] = CRGB( r88 >> 8, g88 >> 8, b88 >> 8);
         r88 += rdelta87;
         g88 += gdelta87;
@@ -171,7 +171,7 @@ void fill_gradient_RGB( CRGB* leds, uint16_t numLeds, const CRGB& c1, const CRGB
 
 void nscale8_video( CRGB* leds, uint16_t num_leds, uint8_t scale)
 {
-    for( uint16_t i = 0; i < num_leds; i++) {
+    for( uint16_t i = 0; i < num_leds; ++i) {
         leds[i].nscale8_video( scale);
     }
 }
@@ -204,7 +204,7 @@ void nscale8_raw( CRGB* leds, uint16_t num_leds, uint8_t scale)
 
 void nscale8( CRGB* leds, uint16_t num_leds, uint8_t scale)
 {
-    for( uint16_t i = 0; i < num_leds; i++) {
+    for( uint16_t i = 0; i < num_leds; ++i) {
         leds[i].nscale8( scale);
     }
 }
@@ -216,7 +216,7 @@ void fadeUsingColor( CRGB* leds, uint16_t numLeds, const CRGB& colormask)
     fg = colormask.g;
     fb = colormask.b;
 
-    for( uint16_t i = 0; i < numLeds; i++) {
+    for( uint16_t i = 0; i < numLeds; ++i) {
         leds[i].r = scale8_LEAVING_R1_DIRTY( leds[i].r, fr);
         leds[i].g = scale8_LEAVING_R1_DIRTY( leds[i].g, fg);
         leds[i].b = scale8                 ( leds[i].b, fb);
@@ -261,10 +261,10 @@ CRGB& nblend( CRGB& existing, const CRGB& overlay, fract8 amountOfOverlay )
 
 void nblend( CRGB* existing, CRGB* overlay, uint16_t count, fract8 amountOfOverlay)
 {
-    for( uint16_t i = count; i; i--) {
+    for( uint16_t i = count; i; --i) {
         nblend( *existing, *overlay, amountOfOverlay);
-        existing++;
-        overlay++;
+        ++existing;
+        ++overlay;
     }
 }
 
@@ -277,7 +277,7 @@ CRGB blend( const CRGB& p1, const CRGB& p2, fract8 amountOfP2 )
 
 CRGB* blend( const CRGB* src1, const CRGB* src2, CRGB* dest, uint16_t count, fract8 amountOfsrc2 )
 {
-    for( uint16_t i = 0; i < count; i++) {
+    for( uint16_t i = 0; i < count; ++i) {
         dest[i] = blend(src1[i], src2[i], amountOfsrc2);
     }
     return dest;
@@ -338,10 +338,10 @@ CHSV& nblend( CHSV& existing, const CHSV& overlay, fract8 amountOfOverlay, TGrad
 void nblend( CHSV* existing, CHSV* overlay, uint16_t count, fract8 amountOfOverlay, TGradientDirectionCode directionCode )
 {
     if(existing == overlay) return;
-    for( uint16_t i = count; i; i--) {
+    for( uint16_t i = count; i; --i) {
         nblend( *existing, *overlay, amountOfOverlay, directionCode);
-        existing++;
-        overlay++;
+        ++existing;
+        ++overlay;
     }
 }
 
@@ -354,7 +354,7 @@ CHSV blend( const CHSV& p1, const CHSV& p2, fract8 amountOfP2, TGradientDirectio
 
 CHSV* blend( const CHSV* src1, const CHSV* src2, CHSV* dest, uint16_t count, fract8 amountOfsrc2, TGradientDirectionCode directionCode )
 {
-    for( uint16_t i = 0; i < count; i++) {
+    for( uint16_t i = 0; i < count; ++i) {
         dest[i] = blend(src1[i], src2[i], amountOfsrc2, directionCode);
     }
     return dest;
@@ -385,7 +385,7 @@ void blur1d( CRGB* leds, uint16_t numLeds, fract8 blur_amount)
     uint8_t keep = 255 - blur_amount;
     uint8_t seep = blur_amount >> 1;
     CRGB carryover = CRGB::Black;
-    for( uint16_t i = 0; i < numLeds; i++) {
+    for( uint16_t i = 0; i < numLeds; ++i) {
         CRGB cur = leds[i];
         CRGB part = cur;
         part.nscale8( seep);
@@ -406,7 +406,7 @@ void blur2d( CRGB* leds, uint8_t width, uint8_t height, fract8 blur_amount)
 // blurRows: perform a blur1d on every row of a rectangular matrix
 void blurRows( CRGB* leds, uint8_t width, uint8_t height, fract8 blur_amount)
 {
-    for( uint8_t row = 0; row < height; row++) {
+    for( uint8_t row = 0; row < height; ++row) {
         CRGB* rowbase = leds + (row * width);
         blur1d( rowbase, width, blur_amount);
     }
@@ -418,9 +418,9 @@ void blurColumns(CRGB* leds, uint8_t width, uint8_t height, fract8 blur_amount)
     // blur columns
     uint8_t keep = 255 - blur_amount;
     uint8_t seep = blur_amount >> 1;
-    for( uint8_t col = 0; col < width; col++) {
+    for( uint8_t col = 0; col < width; ++col) {
         CRGB carryover = CRGB::Black;
-        for( uint8_t i = 0; i < height; i++) {
+        for( uint8_t i = 0; i < height; ++i) {
             CRGB cur = leds[XY(col,i)];
             CRGB part = cur;
             part.nscale8( seep);
@@ -529,7 +529,7 @@ CRGB ColorFromPalette( const CRGBPalette16& pal, uint8_t index, uint8_t brightne
         if( hi4 == 15 ) {
             entry = &(pal[0]);
         } else {
-            entry++;
+            ++entry;
         }
         
         uint8_t f2 = lo4 << 4;
@@ -556,25 +556,25 @@ CRGB ColorFromPalette( const CRGBPalette16& pal, uint8_t index, uint8_t brightne
     
     if( brightness != 255) {
         if( brightness ) {
-            brightness++; // adjust for rounding
+            ++brightness; // adjust for rounding
             // Now, since brightness is nonzero, we don't need the full scale8_video logic;
             // we can just to scale8 and then add one (unless scale8 fixed) to all nonzero inputs.
             if( red1 )   {
                 red1 = scale8_LEAVING_R1_DIRTY( red1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                red1++;
+                ++red1;
 #endif
             }
             if( green1 ) {
                 green1 = scale8_LEAVING_R1_DIRTY( green1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                green1++;
+                ++green1;
 #endif
             }
             if( blue1 )  {
                 blue1 = scale8_LEAVING_R1_DIRTY( blue1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                blue1++;
+                ++blue1;
 #endif
             }
             cleanup_R1();
@@ -634,25 +634,25 @@ CRGB ColorFromPalette( const TProgmemRGBPalette16& pal, uint8_t index, uint8_t b
 
     if( brightness != 255) {
         if( brightness ) {
-            brightness++; // adjust for rounding
+            ++brightness; // adjust for rounding
             // Now, since brightness is nonzero, we don't need the full scale8_video logic;
             // we can just to scale8 and then add one (unless scale8 fixed) to all nonzero inputs.
             if( red1 )   {
                 red1 = scale8_LEAVING_R1_DIRTY( red1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                red1++;
+                ++red1;
 #endif
             }
             if( green1 ) {
                 green1 = scale8_LEAVING_R1_DIRTY( green1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                green1++;
+                ++green1;
 #endif
             }
             if( blue1 )  {
                 blue1 = scale8_LEAVING_R1_DIRTY( blue1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                blue1++;
+                ++blue1;
 #endif
             }
             cleanup_R1();
@@ -698,7 +698,7 @@ CRGB ColorFromPalette( const CRGBPalette32& pal, uint8_t index, uint8_t brightne
         if( hi5 == 31 ) {
             entry = &(pal[0]);
         } else {
-            entry++;
+            ++entry;
         }
         
         uint8_t f2 = lo3 << 5;
@@ -725,25 +725,25 @@ CRGB ColorFromPalette( const CRGBPalette32& pal, uint8_t index, uint8_t brightne
     
     if( brightness != 255) {
         if( brightness ) {
-            brightness++; // adjust for rounding
+            ++brightness; // adjust for rounding
             // Now, since brightness is nonzero, we don't need the full scale8_video logic;
             // we can just to scale8 and then add one (unless scale8 fixed) to all nonzero inputs.
             if( red1 )   {
                 red1 = scale8_LEAVING_R1_DIRTY( red1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                red1++;
+                ++red1;
 #endif
             }
             if( green1 ) {
                 green1 = scale8_LEAVING_R1_DIRTY( green1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                green1++;
+                ++green1;
 #endif
             }
             if( blue1 )  {
                 blue1 = scale8_LEAVING_R1_DIRTY( blue1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                blue1++;
+                ++blue1;
 #endif
             }
             cleanup_R1();
@@ -809,25 +809,25 @@ CRGB ColorFromPalette( const TProgmemRGBPalette32& pal, uint8_t index, uint8_t b
     
     if( brightness != 255) {
         if( brightness ) {
-            brightness++; // adjust for rounding
+            ++brightness; // adjust for rounding
             // Now, since brightness is nonzero, we don't need the full scale8_video logic;
             // we can just to scale8 and then add one (unless scale8 fixed) to all nonzero inputs.
             if( red1 )   {
                 red1 = scale8_LEAVING_R1_DIRTY( red1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                red1++;
+                ++red1;
 #endif
             }
             if( green1 ) {
                 green1 = scale8_LEAVING_R1_DIRTY( green1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                green1++;
+                ++green1;
 #endif
             }
             if( blue1 )  {
                 blue1 = scale8_LEAVING_R1_DIRTY( blue1, brightness);
 #if !(FASTLED_SCALE8_FIXED==1)
-                blue1++;
+                ++blue1;
 #endif
             }
             cleanup_R1();
@@ -852,7 +852,7 @@ CRGB ColorFromPalette( const CRGBPalette256& pal, uint8_t index, uint8_t brightn
     uint8_t blue  = entry->blue;
 
     if( brightness != 255) {
-        brightness++; // adjust for rounding
+        ++brightness; // adjust for rounding
         red   = scale8_video_LEAVING_R1_DIRTY( red,   brightness);
         green = scale8_video_LEAVING_R1_DIRTY( green, brightness);
         blue  = scale8_video_LEAVING_R1_DIRTY( blue,  brightness);
@@ -883,7 +883,7 @@ CHSV ColorFromPalette( const struct CHSVPalette16& pal, uint8_t index, uint8_t b
         if( hi4 == 15 ) {
             entry = &(pal[0]);
         } else {
-            entry++;
+            ++entry;
         }
 
         uint8_t f2 = lo4 << 4;
@@ -973,7 +973,7 @@ CHSV ColorFromPalette( const struct CHSVPalette32& pal, uint8_t index, uint8_t b
         if( hi5 == 31 ) {
             entry = &(pal[0]);
         } else {
-            entry++;
+            ++entry;
         }
         
         uint8_t f2 = lo3 << 5;
@@ -1050,14 +1050,14 @@ CHSV ColorFromPalette( const struct CHSVPalette256& pal, uint8_t index, uint8_t
 
 void UpscalePalette(const struct CRGBPalette16& srcpal16, struct CRGBPalette256& destpal256)
 {
-    for( int i = 0; i < 256; i++) {
+    for( int i = 0; i < 256; ++i) {
         destpal256[(uint8_t)(i)] = ColorFromPalette( srcpal16, i);
     }
 }
 
 void UpscalePalette(const struct CHSVPalette16& srcpal16, struct CHSVPalette256& destpal256)
 {
-    for( int i = 0; i < 256; i++) {
+    for( int i = 0; i < 256; ++i) {
         destpal256[(uint8_t)(i)] = ColorFromPalette( srcpal16, i);
     }
 }
@@ -1065,7 +1065,7 @@ void UpscalePalette(const struct CHSVPalette16& srcpal16, struct CHSVPalette256&
 
 void UpscalePalette(const struct CRGBPalette16& srcpal16, struct CRGBPalette32& destpal32)
 {
-    for( uint8_t i = 0; i < 16; i++) {
+    for( uint8_t i = 0; i < 16; ++i) {
         uint8_t j = i * 2;
         destpal32[j+0] = srcpal16[i];
         destpal32[j+1] = srcpal16[i];
@@ -1074,7 +1074,7 @@ void UpscalePalette(const struct CRGBPalette16& srcpal16, struct CRGBPalette32&
 
 void UpscalePalette(const struct CHSVPalette16& srcpal16, struct CHSVPalette32& destpal32)
 {
-    for( uint8_t i = 0; i < 16; i++) {
+    for( uint8_t i = 0; i < 16; ++i) {
         uint8_t j = i * 2;
         destpal32[j+0] = srcpal16[i];
         destpal32[j+1] = srcpal16[i];
@@ -1083,14 +1083,14 @@ void UpscalePalette(const struct CHSVPalette16& srcpal16, struct CHSVPalette32&
 
 void UpscalePalette(const struct CRGBPalette32& srcpal32, struct CRGBPalette256& destpal256)
 {
-    for( int i = 0; i < 256; i++) {
+    for( int i = 0; i < 256; ++i) {
         destpal256[(uint8_t)(i)] = ColorFromPalette( srcpal32, i);
     }
 }
 
 void UpscalePalette(const struct CHSVPalette32& srcpal32, struct CHSVPalette256& destpal256)
 {
-    for( int i = 0; i < 256; i++) {
+    for( int i = 0; i < 256; ++i) {
         destpal256[(uint8_t)(i)] = ColorFromPalette( srcpal32, i);
     }
 }
@@ -1117,18 +1117,18 @@ void nblendPaletteTowardPalette( CRGBPalette16& current, CRGBPalette16& target,
     p2 = (uint8_t*)target.entries;
 
     const uint8_t totalChannels = sizeof(CRGBPalette16);
-    for( uint8_t i = 0; i < totalChannels; i++) {
+    for( uint8_t i = 0; i < totalChannels; ++i) {
         // if the values are equal, no changes are needed
         if( p1[i] == p2[i] ) { continue; }
 
         // if the current value is less than the target, increase it by one
-        if( p1[i] < p2[i] ) { p1[i]++; changes++; }
+        if( p1[i] < p2[i] ) { ++p1[i]; ++changes; }
 
         // if the current value is greater than the target,
         // increase it by one (or two if it's still greater).
         if( p1[i] > p2[i] ) {
-            p1[i]--; changes++;
-            if( p1[i] > p2[i] ) { p1[i]--; }
+            --p1[i]; ++changes;
+            if( p1[i] > p2[i] ) { --p1[i]; }
         }
 
         // if we've hit the maximum number of changes, exit
@@ -1182,14 +1182,14 @@ CRGB& napplyGamma_video( CRGB& rgb, float gammaR, float gammaG, float gammaB)
 
 void napplyGamma_video( CRGB* rgbarray, uint16_t count, float gamma)
 {
-    for( uint16_t i = 0; i < count; i++) {
+    for( uint16_t i = 0; i < count; ++i) {
         rgbarray[i] = applyGamma_video( rgbarray[i], gamma);
     }
 }
 
 void napplyGamma_video( CRGB* rgbarray, uint16_t count, float gammaR, float gammaG, float gammaB)
 {
-    for( uint16_t i = 0; i < count; i++) {
+    for( uint16_t i = 0; i < count; ++i) {
         rgbarray[i] = applyGamma_video( rgbarray[i], gammaR, gammaG, gammaB);
     }
 }
diff --git a/colorutils.h b/colorutils.h
index 92868434a9..65231b89c6 100644
--- a/colorutils.h
+++ b/colorutils.h
@@ -168,7 +168,7 @@ void fill_gradient( T* targetArray,
     accum88 hue88 = startcolor.hue << 8;
     accum88 sat88 = startcolor.sat << 8;
     accum88 val88 = startcolor.val << 8;
-    for( uint16_t i = startpos; i <= endpos; i++) {
+    for( uint16_t i = startpos; i <= endpos; ++i) {
         targetArray[i] = CHSV( hue88 >> 8, sat88 >> 8, val88 >> 8);
         hue88 += huedelta87;
         sat88 += satdelta87;
@@ -462,7 +462,7 @@ class CHSVPalette16 {
 
     CHSVPalette16( const TProgmemHSVPalette16& rhs)
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
             CRGB xyz   =  FL_PGM_READ_DWORD_NEAR( rhs + i);
             entries[i].hue = xyz.red;
             entries[i].sat = xyz.green;
@@ -471,7 +471,7 @@ class CHSVPalette16 {
     }
     CHSVPalette16& operator=( const TProgmemHSVPalette16& rhs)
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
             CRGB xyz   =  FL_PGM_READ_DWORD_NEAR( rhs + i);
             entries[i].hue = xyz.red;
             entries[i].sat = xyz.green;
@@ -508,10 +508,10 @@ class CHSVPalette16 {
         const uint8_t* p = (const uint8_t*)(&(this->entries[0]));
         const uint8_t* q = (const uint8_t*)(&(rhs.entries[0]));
         if( p == q) return true;
-        for( uint8_t i = 0; i < (sizeof( entries)); i++) {
+        for( uint8_t i = 0; i < (sizeof( entries)); ++i) {
             if( *p != *q) return false;
-            p++;
-            q++;
+            ++p;
+            ++q;
         }
         return true;
     }
@@ -613,10 +613,10 @@ class CHSVPalette256 {
         const uint8_t* p = (const uint8_t*)(&(this->entries[0]));
         const uint8_t* q = (const uint8_t*)(&(rhs.entries[0]));
         if( p == q) return true;
-        for( uint16_t i = 0; i < (sizeof( entries)); i++) {
+        for( uint16_t i = 0; i < (sizeof( entries)); ++i) {
             if( *p != *q) return false;
-            p++;
-            q++;
+            ++p;
+            ++q;
         }
         return true;
     }
@@ -679,26 +679,26 @@ class CRGBPalette16 {
 
     CRGBPalette16( const CHSVPalette16& rhs)
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
     		entries[i] = rhs.entries[i]; // implicit HSV-to-RGB conversion
         }
     }
     CRGBPalette16( const CHSV rhs[16])
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
             entries[i] = rhs[i]; // implicit HSV-to-RGB conversion
         }
     }
     CRGBPalette16& operator=( const CHSVPalette16& rhs)
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
     		entries[i] = rhs.entries[i]; // implicit HSV-to-RGB conversion
         }
         return *this;
     }
     CRGBPalette16& operator=( const CHSV rhs[16])
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
             entries[i] = rhs[i]; // implicit HSV-to-RGB conversion
         }
         return *this;
@@ -706,13 +706,13 @@ class CRGBPalette16 {
 
     CRGBPalette16( const TProgmemRGBPalette16& rhs)
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
             entries[i] =  FL_PGM_READ_DWORD_NEAR( rhs + i);
         }
     }
     CRGBPalette16& operator=( const TProgmemRGBPalette16& rhs)
     {
-        for( uint8_t i = 0; i < 16; i++) {
+        for( uint8_t i = 0; i < 16; ++i) {
             entries[i] =  FL_PGM_READ_DWORD_NEAR( rhs + i);
         }
         return *this;
@@ -723,10 +723,10 @@ class CRGBPalette16 {
         const uint8_t* p = (const uint8_t*)(&(this->entries[0]));
         const uint8_t* q = (const uint8_t*)(&(rhs.entries[0]));
         if( p == q) return true;
-        for( uint8_t i = 0; i < (sizeof( entries)); i++) {
+        for( uint8_t i = 0; i < (sizeof( entries)); ++i) {
             if( *p != *q) return false;
-            p++;
-            q++;
+            ++p;
+            ++q;
         }
         return true;
     }
@@ -828,7 +828,7 @@ class CRGBPalette16 {
         uint16_t count = 0;
         do {
             u.dword = FL_PGM_READ_DWORD_NEAR(progent + count);
-            count++;;
+            ++count;;
         } while ( u.index != 255);
 
         int8_t lastSlotUsed = -1;
@@ -840,7 +840,7 @@ class CRGBPalette16 {
         uint8_t istart8 = 0;
         uint8_t iend8 = 0;
         while( indexstart < 255) {
-            progent++;
+            ++progent;
             u.dword = FL_PGM_READ_DWORD_NEAR( progent);
             int indexend  = u.index;
             CRGB rgbend( u.r, u.g, u.b);
@@ -870,7 +870,7 @@ class CRGBPalette16 {
         uint16_t count = 0;
         do {
             u = *(ent + count);
-            count++;;
+            ++count;;
         } while ( u.index != 255);
 
         int8_t lastSlotUsed = -1;
@@ -883,7 +883,7 @@ class CRGBPalette16 {
         uint8_t istart8 = 0;
         uint8_t iend8 = 0;
         while( indexstart < 255) {
-            ent++;
+            ++ent;
             u = *ent;
             int indexend  = u.index;
             CRGB rgbend( u.r, u.g, u.b);
@@ -918,7 +918,7 @@ class CHSVPalette32 {
                   const CHSV& c08,const CHSV& c09,const CHSV& c10,const CHSV& c11,
                   const CHSV& c12,const CHSV& c13,const CHSV& c14,const CHSV& c15 )
     {
-        for( uint8_t i = 0; i < 2; i++) {
+        for( uint8_t i = 0; i < 2; ++i) {
             entries[0+i]=c00; entries[2+i]=c01; entries[4+i]=c02; entries[6+i]=c03;
             entries[8+i]=c04; entries[10+i]=c05; entries[12+i]=c06; entries[14+i]=c07;
             entries[16+i]=c08; entries[18+i]=c09; entries[20+i]=c10; entries[22+i]=c11;
@@ -938,7 +938,7 @@ class CHSVPalette32 {
     
     CHSVPalette32( const TProgmemHSVPalette32& rhs)
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             CRGB xyz   =  FL_PGM_READ_DWORD_NEAR( rhs + i);
             entries[i].hue = xyz.red;
             entries[i].sat = xyz.green;
@@ -947,7 +947,7 @@ class CHSVPalette32 {
     }
     CHSVPalette32& operator=( const TProgmemHSVPalette32& rhs)
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             CRGB xyz   =  FL_PGM_READ_DWORD_NEAR( rhs + i);
             entries[i].hue = xyz.red;
             entries[i].sat = xyz.green;
@@ -984,10 +984,10 @@ class CHSVPalette32 {
         const uint8_t* p = (const uint8_t*)(&(this->entries[0]));
         const uint8_t* q = (const uint8_t*)(&(rhs.entries[0]));
         if( p == q) return true;
-        for( uint8_t i = 0; i < (sizeof( entries)); i++) {
+        for( uint8_t i = 0; i < (sizeof( entries)); ++i) {
             if( *p != *q) return false;
-            p++;
-            q++;
+            ++p;
+            ++q;
         }
         return true;
     }
@@ -1024,7 +1024,7 @@ class CRGBPalette32 {
                   const CRGB& c08,const CRGB& c09,const CRGB& c10,const CRGB& c11,
                   const CRGB& c12,const CRGB& c13,const CRGB& c14,const CRGB& c15 )
     {
-        for( uint8_t i = 0; i < 2; i++) {
+        for( uint8_t i = 0; i < 2; ++i) {
             entries[0+i]=c00; entries[2+i]=c01; entries[4+i]=c02; entries[6+i]=c03;
             entries[8+i]=c04; entries[10+i]=c05; entries[12+i]=c06; entries[14+i]=c07;
             entries[16+i]=c08; entries[18+i]=c09; entries[20+i]=c10; entries[22+i]=c11;
@@ -1053,26 +1053,26 @@ class CRGBPalette32 {
     
     CRGBPalette32( const CHSVPalette32& rhs)
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             entries[i] = rhs.entries[i]; // implicit HSV-to-RGB conversion
         }
     }
     CRGBPalette32( const CHSV rhs[32])
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             entries[i] = rhs[i]; // implicit HSV-to-RGB conversion
         }
     }
     CRGBPalette32& operator=( const CHSVPalette32& rhs)
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             entries[i] = rhs.entries[i]; // implicit HSV-to-RGB conversion
         }
         return *this;
     }
     CRGBPalette32& operator=( const CHSV rhs[32])
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             entries[i] = rhs[i]; // implicit HSV-to-RGB conversion
         }
         return *this;
@@ -1080,13 +1080,13 @@ class CRGBPalette32 {
     
     CRGBPalette32( const TProgmemRGBPalette32& rhs)
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             entries[i] =  FL_PGM_READ_DWORD_NEAR( rhs + i);
         }
     }
     CRGBPalette32& operator=( const TProgmemRGBPalette32& rhs)
     {
-        for( uint8_t i = 0; i < 32; i++) {
+        for( uint8_t i = 0; i < 32; ++i) {
             entries[i] =  FL_PGM_READ_DWORD_NEAR( rhs + i);
         }
         return *this;
@@ -1097,10 +1097,10 @@ class CRGBPalette32 {
         const uint8_t* p = (const uint8_t*)(&(this->entries[0]));
         const uint8_t* q = (const uint8_t*)(&(rhs.entries[0]));
         if( p == q) return true;
-        for( uint8_t i = 0; i < (sizeof( entries)); i++) {
+        for( uint8_t i = 0; i < (sizeof( entries)); ++i) {
             if( *p != *q) return false;
-            p++;
-            q++;
+            ++p;
+            ++q;
         }
         return true;
     }
@@ -1225,7 +1225,7 @@ class CRGBPalette32 {
         uint16_t count = 0;
         do {
             u.dword = FL_PGM_READ_DWORD_NEAR(progent + count);
-            count++;;
+            ++count;;
         } while ( u.index != 255);
         
         int8_t lastSlotUsed = -1;
@@ -1237,7 +1237,7 @@ class CRGBPalette32 {
         uint8_t istart8 = 0;
         uint8_t iend8 = 0;
         while( indexstart < 255) {
-            progent++;
+            ++progent;
             u.dword = FL_PGM_READ_DWORD_NEAR( progent);
             int indexend  = u.index;
             CRGB rgbend( u.r, u.g, u.b);
@@ -1267,7 +1267,7 @@ class CRGBPalette32 {
         uint16_t count = 0;
         do {
             u = *(ent + count);
-            count++;;
+            ++count;;
         } while ( u.index != 255);
         
         int8_t lastSlotUsed = -1;
@@ -1280,7 +1280,7 @@ class CRGBPalette32 {
         uint8_t istart8 = 0;
         uint8_t iend8 = 0;
         while( indexstart < 255) {
-            ent++;
+            ++ent;
             u = *ent;
             int indexend  = u.index;
             CRGB rgbend( u.r, u.g, u.b);
@@ -1341,26 +1341,26 @@ class CRGBPalette256 {
 
     CRGBPalette256( const CHSVPalette256& rhs)
     {
-    	for( int i = 0; i < 256; i++) {
+    	for( int i = 0; i < 256; ++i) {
 	    	entries[i] = rhs.entries[i]; // implicit HSV-to-RGB conversion
     	}
     }
     CRGBPalette256( const CHSV rhs[256])
     {
-        for( int i = 0; i < 256; i++) {
+        for( int i = 0; i < 256; ++i) {
             entries[i] = rhs[i]; // implicit HSV-to-RGB conversion
         }
     }
     CRGBPalette256& operator=( const CHSVPalette256& rhs)
     {
-    	for( int i = 0; i < 256; i++) {
+    	for( int i = 0; i < 256; ++i) {
 	    	entries[i] = rhs.entries[i]; // implicit HSV-to-RGB conversion
     	}
         return *this;
     }
     CRGBPalette256& operator=( const CHSV rhs[256])
     {
-        for( int i = 0; i < 256; i++) {
+        for( int i = 0; i < 256; ++i) {
             entries[i] = rhs[i]; // implicit HSV-to-RGB conversion
         }
         return *this;
@@ -1393,10 +1393,10 @@ class CRGBPalette256 {
         const uint8_t* p = (const uint8_t*)(&(this->entries[0]));
         const uint8_t* q = (const uint8_t*)(&(rhs.entries[0]));
         if( p == q) return true;
-        for( uint16_t i = 0; i < (sizeof( entries)); i++) {
+        for( uint16_t i = 0; i < (sizeof( entries)); ++i) {
             if( *p != *q) return false;
-            p++;
-            q++;
+            ++p;
+            ++q;
         }
         return true;
     }
@@ -1475,7 +1475,7 @@ class CRGBPalette256 {
 
         int indexstart = 0;
         while( indexstart < 255) {
-            progent++;
+            ++progent;
             u.dword = FL_PGM_READ_DWORD_NEAR( progent);
             int indexend  = u.index;
             CRGB rgbend( u.r, u.g, u.b);
@@ -1494,7 +1494,7 @@ class CRGBPalette256 {
 
         int indexstart = 0;
         while( indexstart < 255) {
-            ent++;
+            ++ent;
             u = *ent;
             int indexend  = u.index;
             CRGB rgbend( u.r, u.g, u.b);
@@ -1557,7 +1557,7 @@ void fill_palette(CRGB* L, uint16_t N, uint8_t startIndex, uint8_t incIndex,
                   const PALETTE& pal, uint8_t brightness, TBlendType blendType)
 {
     uint8_t colorIndex = startIndex;
-    for( uint16_t i = 0; i < N; i++) {
+    for( uint16_t i = 0; i < N; ++i) {
         L[i] = ColorFromPalette( pal, colorIndex, brightness, blendType);
         colorIndex += incIndex;
     }
@@ -1572,7 +1572,7 @@ void map_data_into_colors_through_palette(
 	uint8_t opacity=255,
 	TBlendType blendType=LINEARBLEND)
 {
-	for( uint16_t i = 0; i < dataCount; i++) {
+	for( uint16_t i = 0; i < dataCount; ++i) {
 		uint8_t d = dataArray[i];
 		CRGB rgb = ColorFromPalette( pal, d, brightness, blendType);
 		if( opacity == 255 ) {
diff --git a/controller.h b/controller.h
index 951a8a0dfc..fe32d70d32 100644
--- a/controller.h
+++ b/controller.h
@@ -154,7 +154,7 @@ class CLEDController {
               CRGB adj(0,0,0);
 
               if(scale > 0) {
-                  for(uint8_t i = 0; i < 3; i++) {
+                  for(uint8_t i = 0; i < 3; ++i) {
                       uint8_t cc = colorCorrection.raw[i];
                       uint8_t ct = colorTemperature.raw[i];
                       if(cc > 0 && ct > 0) {
@@ -195,13 +195,13 @@ struct PixelController {
             mScale = other.mScale;
             mAdvance = other.mAdvance;
             mLenRemaining = mLen = other.mLen;
-            for(int i = 0; i < LANES; i++) { mOffsets[i] = other.mOffsets[i]; }
+            for(int i = 0; i < LANES; ++i) { mOffsets[i] = other.mOffsets[i]; }
 
         }
 
         void initOffsets(int len) {
           int nOffset = 0;
-          for(int i = 0; i < LANES; i++) {
+          for(int i = 0; i < LANES; ++i) {
             mOffsets[i] = nOffset;
             if((1<<i) & MASK) { nOffset += (len * mAdvance); }
           }
@@ -256,7 +256,7 @@ struct PixelController {
 
             // R is the digther signal 'counter'.
             static uint8_t R = 0;
-            R++;
+            ++R;
 
             // R is wrapped around at 2^ditherBits,
             // so if ditherBits is 2, R will cycle through (0,1,2,3)
@@ -293,14 +293,14 @@ struct PixelController {
             // actual dithering.
 
             // Setup the initial D and E values
-            for(int i = 0; i < 3; i++) {
+            for(int i = 0; i < 3; ++i) {
                     uint8_t s = mScale.raw[i];
                     e[i] = s ? (256/s) + 1 : 0;
                     d[i] = scale8(Q, e[i]);
 #if (FASTLED_SCALE8_FIXED == 1)
-                    if(d[i]) (d[i]--);
+                    if(d[i]) (--d[i]);
 #endif
-                    if(e[i]) e[i]--;
+                    if(e[i]) --e[i];
             }
 #endif
         }
@@ -324,7 +324,7 @@ struct PixelController {
         __attribute__((always_inline)) inline int advanceBy() { return mAdvance; }
 
         // advance the data pointer forward, adjust position counter
-         __attribute__((always_inline)) inline void advanceData() { mData += mAdvance; mLenRemaining--;}
+         __attribute__((always_inline)) inline void advanceData() { mData += mAdvance; --mLenRemaining;}
 
         // step the dithering forward
          __attribute__((always_inline)) inline void stepDithering() {
diff --git a/examples/ColorPalette/ColorPalette.ino b/examples/ColorPalette/ColorPalette.ino
index 4d64efb385..6ccd5c1b64 100644
--- a/examples/ColorPalette/ColorPalette.ino
+++ b/examples/ColorPalette/ColorPalette.ino
@@ -62,7 +62,7 @@ void FillLEDsFromPaletteColors( uint8_t colorIndex)
 {
     uint8_t brightness = 255;
     
-    for( int i = 0; i < NUM_LEDS; i++) {
+    for( int i = 0; i < NUM_LEDS; ++i) {
         leds[i] = ColorFromPalette( currentPalette, colorIndex, brightness, currentBlending);
         colorIndex += 3;
     }
@@ -101,7 +101,7 @@ void ChangePalettePeriodically()
 // This function fills the palette with totally random colors.
 void SetupTotallyRandomPalette()
 {
-    for( int i = 0; i < 16; i++) {
+    for( int i = 0; i < 16; ++i) {
         currentPalette[i] = CHSV( random8(), 255, random8());
     }
 }
diff --git a/hsv2rgb.cpp b/hsv2rgb.cpp
index cdb576bcc6..1fb8d56b77 100644
--- a/hsv2rgb.cpp
+++ b/hsv2rgb.cpp
@@ -496,19 +496,19 @@ void hsv2rgb_rainbow( const CHSV& hsv, CRGB& rgb)
 
 
 void hsv2rgb_raw(const struct CHSV * phsv, struct CRGB * prgb, int numLeds) {
-    for(int i = 0; i < numLeds; i++) {
+    for(int i = 0; i < numLeds; ++i) {
         hsv2rgb_raw(phsv[i], prgb[i]);
     }
 }
 
 void hsv2rgb_rainbow( const struct CHSV* phsv, struct CRGB * prgb, int numLeds) {
-    for(int i = 0; i < numLeds; i++) {
+    for(int i = 0; i < numLeds; ++i) {
         hsv2rgb_rainbow(phsv[i], prgb[i]);
     }
 }
 
 void hsv2rgb_spectrum( const struct CHSV* phsv, struct CRGB * prgb, int numLeds) {
-    for(int i = 0; i < numLeds; i++) {
+    for(int i = 0; i < numLeds; ++i) {
         hsv2rgb_spectrum(phsv[i], prgb[i]);
     }
 }
diff --git a/lib8tion.cpp b/lib8tion.cpp
index 1306e5c0cb..ecb051d80a 100644
--- a/lib8tion.cpp
+++ b/lib8tion.cpp
@@ -142,7 +142,7 @@ void test1abs( int8_t i)
 void testabs()
 {
     delay(5000);
-    for( int8_t q = -128; q != 127; q++) {
+    for( int8_t q = -128; q != 127; ++q) {
         test1abs(q);
     }
     for(;;){};
@@ -225,7 +225,7 @@ void testnscale8x3()
 {
     delay(5000);
     byte r, g, b, sc;
-    for( byte z = 0; z < 10; z++) {
+    for( byte z = 0; z < 10; ++z) {
         r = random8(); g = random8(); b = random8(); sc = random8();
 
         Serial.print("nscale8x3_video( ");
diff --git a/lib8tion/trig8.h b/lib8tion/trig8.h
index 6e08407549..c5896ef8e9 100644
--- a/lib8tion/trig8.h
+++ b/lib8tion/trig8.h
@@ -169,7 +169,7 @@ LIB8STATIC uint8_t  sin8_avr( uint8_t theta)
     offset &= 0x3F; // 0..63
 
     uint8_t secoffset  = offset & 0x0F; // 0..15
-    if( theta & 0x40) secoffset++;
+    if( theta & 0x40) ++secoffset;
 
     uint8_t m16; uint8_t b;
 
@@ -179,7 +179,7 @@ LIB8STATIC uint8_t  sin8_avr( uint8_t theta)
     const uint8_t* p = b_m16_interleave;
     p += s2;
     b   = *p;
-    p++;
+    ++p;
     m16 = *p;
 
     uint8_t mx;
@@ -223,14 +223,14 @@ LIB8STATIC uint8_t sin8_C( uint8_t theta)
     offset &= 0x3F; // 0..63
 
     uint8_t secoffset  = offset & 0x0F; // 0..15
-    if( theta & 0x40) secoffset++;
+    if( theta & 0x40) ++secoffset;
 
     uint8_t section = offset >> 4; // 0..3
     uint8_t s2 = section * 2;
     const uint8_t* p = b_m16_interleave;
     p += s2;
     uint8_t b   =  *p;
-    p++;
+    ++p;
     uint8_t m16 =  *p;
 
     uint8_t mx = (m16 * secoffset) >> 4;
diff --git a/noise.cpp b/noise.cpp
index 2963c4a413..3a40c476f3 100644
--- a/noise.cpp
+++ b/noise.cpp
@@ -565,8 +565,8 @@ uint8_t inoise8(uint16_t x) {
 void fill_raw_noise8(uint8_t *pData, uint8_t num_points, uint8_t octaves, uint16_t x, int scale, uint16_t time) {
   uint32_t _xx = x;
   uint32_t scx = scale;
-  for(int o = 0; o < octaves; o++) {
-    for(int i = 0,xx=_xx; i < num_points; i++, xx+=scx) {
+  for(int o = 0; o < octaves; ++o) {
+    for(int i = 0,xx=_xx; i < num_points; ++i, xx+=scx) {
           pData[i] = qadd8(pData[i],inoise8(xx,time)>>o);
     }
 
@@ -578,8 +578,8 @@ void fill_raw_noise8(uint8_t *pData, uint8_t num_points, uint8_t octaves, uint16
 void fill_raw_noise16into8(uint8_t *pData, uint8_t num_points, uint8_t octaves, uint32_t x, int scale, uint32_t time) {
   uint32_t _xx = x;
   uint32_t scx = scale;
-  for(int o = 0; o < octaves; o++) {
-    for(int i = 0,xx=_xx; i < num_points; i++, xx+=scx) {
+  for(int o = 0; o < octaves; ++o) {
+    for(int i = 0,xx=_xx; i < num_points; ++i, xx+=scx) {
       uint32_t accum = (inoise16(xx,time))>>o;
       accum += (pData[i]<<8);
       if(accum > 65535) { accum = 65535; }
@@ -604,19 +604,19 @@ void fill_raw_2dnoise8(uint8_t *pData, int width, int height, uint8_t octaves, q
 
   fract8 invamp = 255-amplitude;
   uint16_t xx = x;
-  for(int i = 0; i < height; i++, y+=scaley) {
+  for(int i = 0; i < height; ++i, y+=scaley) {
     uint8_t *pRow = pData + (i*width);
     xx = x;
-    for(int j = 0; j < width; j++, xx+=scalex) {
+    for(int j = 0; j < width; ++j, xx+=scalex) {
       uint8_t noise_base = inoise8(xx,y,time);
       noise_base = (0x80 & noise_base) ? (noise_base - 127) : (127 - noise_base);
       noise_base = scale8(noise_base<<1,amplitude);
       if(skip == 1) {
         pRow[j] = scale8(pRow[j],invamp) + noise_base;
       } else {
-        for(int ii = i; ii<(i+skip) && ii<height; ii++) {
+        for(int ii = i; ii<(i+skip) && ii<height; ++ii) {
           uint8_t *pRow = pData + (ii*width);
-          for(int jj=j; jj<(j+skip) && jj<width; jj++) {
+          for(int jj=j; jj<(j+skip) && jj<width; ++jj) {
             pRow[jj] = scale8(pRow[jj],invamp) + noise_base;
           }
         }
@@ -649,9 +649,9 @@ void fill_raw_2dnoise16(uint16_t *pData, int width, int height, uint8_t octaves,
       if(skip==1) {
         pRow[j] = scale16(pRow[j],invamp) + noise_base;
       } else {
-        for(int ii = i; ii<(i+skip) && ii<height; ii++) {
+        for(int ii = i; ii<(i+skip) && ii<height; ++ii) {
           uint16_t *pRow = pData + (ii*width);
-          for(int jj=j; jj<(j+skip) && jj<width; jj++) {
+          for(int jj=j; jj<(j+skip) && jj<width; ++jj) {
             pRow[jj] = scale16(pRow[jj],invamp) + noise_base;
           }
         }
@@ -685,9 +685,9 @@ void fill_raw_2dnoise16into8(uint8_t *pData, int width, int height, uint8_t octa
       if(skip==1) {
         pRow[j] = qadd8(scale8(pRow[j],invamp),noise_base);
       } else {
-        for(int ii = i; ii<(i+skip) && ii<height; ii++) {
+        for(int ii = i; ii<(i+skip) && ii<height; ++ii) {
           uint8_t *pRow = pData + (ii*width);
-          for(int jj=j; jj<(j+skip) && jj<width; jj++) {
+          for(int jj=j; jj<(j+skip) && jj<width; ++jj) {
             pRow[jj] = scale8(pRow[jj],invamp) + noise_base;
           }
         }
@@ -713,7 +713,7 @@ void fill_noise8(CRGB *leds, int num_leds,
   fill_raw_noise8(V,num_leds,octaves,x,scale,time);
   fill_raw_noise8(H,num_leds,hue_octaves,hue_x,hue_scale,time);
 
-  for(int i = 0; i < num_leds; i++) {
+  for(int i = 0; i < num_leds; ++i) {
     leds[i] = CHSV(H[i],255,V[i]);
   }
 }
@@ -731,7 +731,7 @@ void fill_noise16(CRGB *leds, int num_leds,
   fill_raw_noise16into8(V,num_leds,octaves,x,scale,time);
   fill_raw_noise8(H,num_leds,hue_octaves,hue_x,hue_scale,time);
 
-  for(int i = 0; i < num_leds; i++) {
+  for(int i = 0; i < num_leds; ++i) {
     leds[i] = CHSV(H[i] + hue_shift,255,V[i]);
   }
 }
@@ -750,9 +750,9 @@ void fill_2dnoise8(CRGB *leds, int width, int height, bool serpentine,
 
   int w1 = width-1;
   int h1 = height-1;
-  for(int i = 0; i < height; i++) {
+  for(int i = 0; i < height; ++i) {
     int wb = i*width;
-    for(int j = 0; j < width; j++) {
+    for(int j = 0; j < width; ++j) {
       CRGB led(CHSV(H[h1-i][w1-j],255,V[i][j]));
 
       int pos = j;
@@ -788,9 +788,9 @@ void fill_2dnoise16(CRGB *leds, int width, int height, bool serpentine,
   int h1 = height-1;
   hue_shift >>= 8;
 
-  for(int i = 0; i < height; i++) {
+  for(int i = 0; i < height; ++i) {
     int wb = i*width;
-    for(int j = 0; j < width; j++) {
+    for(int j = 0; j < width; ++j) {
       CRGB led(CHSV(hue_shift + (H[h1-i][w1-j]),196,V[i][j]));
 
       int pos = j;
diff --git a/pixeltypes.h b/pixeltypes.h
index 5781e1e4cb..6e91723df0 100644
--- a/pixeltypes.h
+++ b/pixeltypes.h
@@ -538,14 +538,14 @@ struct CRGB {
             // going 'up'
             if( (b > 0) && (b < 255)) {
                 if( r == g && g == b) {
-                    r++;
-                    g++;
+                    ++r;
+                    ++g;
                 }
-                b++;
+                ++b;
             } else if( (r > 0) && (r < 255)) {
-                r++;
+                ++r;
             } else if( (g > 0) && (g < 255)) {
-                g++;
+                ++g;
             } else {
                 if( r == g && g == b) {
                     r ^= 0x01;
@@ -557,14 +557,14 @@ struct CRGB {
             // going 'down'
             if( b > 1) {
                 if( r == g && g == b) {
-                    r--;
-                    g--;
+                    --r;
+                    --g;
                 }
-                b--;
+                --b;
             } else if( g > 1) {
-                g--;
+                --g;
             } else if( r > 1) {
-                r--;
+                --r;
             } else {
                 if( r == g && g == b) {
                     r ^= 0x01;
diff --git a/platforms.cpp b/platforms.cpp
index 47a0088314..5b6847ad67 100644
--- a/platforms.cpp
+++ b/platforms.cpp
@@ -17,16 +17,16 @@
     #endif
             // NOTE: Update platforms.cpp in root of FastLED library if this changes        
             #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE0)
-                void PWM0_IRQHandler(void) { isrCount++; PWM_Arbiter<0>::isr_handler(); }
+                void PWM0_IRQHandler(void) { ++isrCount; PWM_Arbiter<0>::isr_handler(); }
             #endif
             #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE1)
-                void PWM1_IRQHandler(void) { isrCount++; PWM_Arbiter<1>::isr_handler(); }
+                void PWM1_IRQHandler(void) { ++isrCount; PWM_Arbiter<1>::isr_handler(); }
             #endif
             #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE2)
-                void PWM2_IRQHandler(void) { isrCount++; PWM_Arbiter<2>::isr_handler(); }
+                void PWM2_IRQHandler(void) { ++isrCount; PWM_Arbiter<2>::isr_handler(); }
             #endif
             #if defined(FASTLED_NRF52_ENABLE_PWM_INSTANCE3)
-                void PWM3_IRQHandler(void) { isrCount++; PWM_Arbiter<3>::isr_handler(); }
+                void PWM3_IRQHandler(void) { ++isrCount; PWM_Arbiter<3>::isr_handler(); }
             #endif
     #ifdef __cplusplus
         }
diff --git a/platforms/arm/common/m0clockless.h b/platforms/arm/common/m0clockless.h
index b9ed2ba5ca..6fd865954d 100644
--- a/platforms/arm/common/m0clockless.h
+++ b/platforms/arm/common/m0clockless.h
@@ -2,182 +2,182 @@
 #define __INC_M0_CLOCKLESS_H
 
 struct M0ClocklessData {
-    uint8_t d[3];
-    uint8_t e[3];
-    uint8_t adj;
-    uint8_t pad;
-    uint32_t s[3];
+  uint8_t d[3];
+  uint8_t e[3];
+  uint8_t adj;
+  uint8_t pad;
+  uint32_t s[3];
 };
 
 
 template<int HI_OFFSET, int LO_OFFSET, int T1, int T2, int T3, EOrder RGB_ORDER, int WAIT_TIME>int
 showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, uint32_t num_leds, struct M0ClocklessData *pData) {
-    // Lo register variables
-    register uint32_t scratch=0;
-    register struct M0ClocklessData *base = pData;
-    register volatile uint32_t *port = _port;
-    register uint32_t d=0;
-    register uint32_t counter=num_leds;
-    register uint32_t bn=0;
-    register uint32_t b=0;
-    register uint32_t bitmask = _bitmask;
-
-    // high register variable
-    register const uint8_t *leds = _leds;
+  // Lo register variables
+  register uint32_t scratch=0;
+  register struct M0ClocklessData *base = pData;
+  register volatile uint32_t *port = _port;
+  register uint32_t d=0;
+  register uint32_t counter=num_leds;
+  register uint32_t bn=0;
+  register uint32_t b=0;
+  register uint32_t bitmask = _bitmask;
+
+  // high register variable
+  register const uint8_t *leds = _leds;
 #if (FASTLED_SCALE8_FIXED == 1)
-    pData->s[0]++;
-    pData->s[1]++;
-    pData->s[2]++;
+  ++pData->s[0];
+  ++pData->s[1];
+  ++pData->s[2];
 #endif
-    asm __volatile__ (
-        ///////////////////////////////////////////////////////////////////////////
-        //
-        // asm macro definitions - used to assemble the clockless output
-        //
-        ".ifnotdef fl_delay_def;"
+  asm __volatile__ (
+    ///////////////////////////////////////////////////////////////////////////
+    //
+    // asm macro definitions - used to assemble the clockless output
+    //
+    ".ifnotdef fl_delay_def;"
 #ifdef FASTLED_ARM_M0_PLUS
-        "  .set fl_is_m0p, 1;"
-        "  .macro m0pad;"
-        "    nop;"
-        "  .endm;"
+    "  .set fl_is_m0p, 1;"
+    "  .macro m0pad;"
+    "    nop;"
+    "  .endm;"
 #else
-        "  .set fl_is_m0p, 0;"
-        "  .macro m0pad;"
-        "  .endm;"
+    "  .set fl_is_m0p, 0;"
+    "  .macro m0pad;"
+    "  .endm;"
 #endif
-        "  .set fl_delay_def, 1;"
-        "  .set fl_delay_mod, 4;"
-        "  .if fl_is_m0p == 1;"
-        "    .set fl_delay_mod, 3;"
-        "  .endif;"
-        "  .macro fl_delay dtime, reg=r0;"
-        "    .if (\\dtime > 0);"
-        "      .set dcycle, (\\dtime / fl_delay_mod);"
-        "      .set dwork, (dcycle * fl_delay_mod);"
-        "      .set drem, (\\dtime - dwork);"
-        "      .rept (drem);"
-        "        nop;"
-        "      .endr;"
-        "      .if dcycle > 0;"
-        "        mov \\reg, #dcycle;"
-        "        delayloop_\\@:;"
-        "        sub \\reg, #1;"
-        "        bne delayloop_\\@;"
-        "	     .if fl_is_m0p == 0;"
-        "          nop;"
-        "        .endif;"
-        "      .endif;"
-        "    .endif;"
-        "  .endm;"
-
-        "  .macro mod_delay dtime,b1,b2,reg;"
-        "    .set adj, (\\b1 + \\b2);"
-        "    .if adj < \\dtime;"
-        "      .set dtime2, (\\dtime - adj);"
-        "      fl_delay dtime2, \\reg;"
-        "    .endif;"
-        "  .endm;"
-
-        // check the bit and drop the line low if it isn't set
-        "  .macro qlo4 b,bitmask,port,loff	;"
-        "    lsl \\b, #1			;"
-        "    bcs skip_\\@			;"
-        "    str \\bitmask, [\\port, \\loff]	;"
-        "    skip_\\@:			;"
-        "    m0pad;"
-        "  .endm				;"
-
-        // set the pin hi or low (determined by the offset passed in )
-        "  .macro qset2 bitmask,port,loff;"
-        "    str \\bitmask, [\\port, \\loff];"
-        "    m0pad;"
-        "  .endm;"
-
-        // Load up the next led byte to work with, put it in bn
-        "  .macro loadleds3 leds, bn, rled, scratch;"
-        "    mov \\scratch, \\leds;"
-        "    ldrb \\bn, [\\scratch, \\rled];"
-        "  .endm;"
-
-        // check whether or not we should dither
-        "  .macro loaddither7 bn,d,base,rdither;"
-        "    ldrb \\d, [\\base, \\rdither];"
-        "    lsl \\d, #24;"  //; shift high for the qadd w/bn
-        "    lsl \\bn, #24;" //; shift high for the qadd w/d
-        "    bne chkskip_\\@;" //; if bn==0, clear d;"
-        "    eor \\d, \\d;" //; clear d;"
-        "    m0pad;"
-        "    chkskip_\\@:;"
-        "  .endm;"
-
-        // Do the qadd8 for dithering -- there's two versions of this.  The m0 version
-        // takes advantage of the 3 cycle branch to do two things after the branch,
-        // while keeping timing constant.  The m0+, however, branches in 2 cycles, so
-        // we have to work around that a bit more.  This is one of the few times
-        // where the m0 will actually be _more_ efficient than the m0+
-        "  .macro dither5 bn,d;"
-        "  .syntax unified;"
-        "    .if fl_is_m0p == 0;"
-        "      adds \\bn, \\d;"         // do the add
-        "      bcc dither5_1_\\@;"
-        "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
-        "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
-        "      dither5_1_\\@:;"
-        "      nop;"                    // nop to keep timing in line
-        "    .else;"
-        "      adds \\bn, \\d;"         // do the add"
-        "      bcc dither5_2_\\@;"
-        "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
-        "      dither5_2_\\@:;"
-        "      bcc dither5_3_\\@;"
-        "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
-        "      dither5_3_\\@:;"
-        "    .endif;"
-        "  .syntax divided;"
-        "  .endm;"
-
-        // Do our scaling
-        "  .macro scale4 bn, base, scale, scratch;"
-        "    ldr \\scratch, [\\base, \\scale];"
-        "    lsr \\bn, \\bn, #24;"                  // bring bn back down to its low 8 bits
-        "    mul \\bn, \\scratch;"                  // do the multiply
-        "  .endm;"
-
-        // swap bn into b
-        "  .macro swapbbn1 b,bn;"
-        "    lsl \\b, \\bn, #16;"  // put the 8 bits we want for output high
-        "  .endm;"
-
-        // adjust the dithering value for the next time around (load e from memory
-        // to do the math)
-        "  .macro adjdither7 base,d,rled,eoffset,scratch;"
-        "    ldrb \\d, [\\base, \\rled];"
-        "    ldrb \\scratch,[\\base,\\eoffset];"          // load e
-        "    .syntax unified;"
-        "    subs \\d, \\scratch, \\d;"                   // d=e-d
-        "    .syntax divided;"
-        "    strb \\d, [\\base, \\rled];"                 // save d
-        "  .endm;"
-
-        // increment the led pointer (base+6 has what we're incrementing by)
-        "  .macro incleds3   leds, base, scratch;"
-        "    ldrb \\scratch, [\\base, #6];"               // load incremen
-        "    add \\leds, \\leds, \\scratch;"              // update leds pointer
-        "  .endm;"
-
-        // compare and loop
-        "  .macro cmploop5 counter,label;"
-        "    .syntax unified;"
-        "    subs \\counter, #1;"
-        "    .syntax divided;"
-        "    beq done_\\@;"
-        "    m0pad;"
-        "    b \\label;"
-        "    done_\\@:;"
-        "  .endm;"
-
-        " .endif;"
-    );
+    "  .set fl_delay_def, 1;"
+    "  .set fl_delay_mod, 4;"
+    "  .if fl_is_m0p == 1;"
+    "    .set fl_delay_mod, 3;"
+    "  .endif;"
+    "  .macro fl_delay dtime, reg=r0;"
+    "    .if (\\dtime > 0);"
+    "      .set dcycle, (\\dtime / fl_delay_mod);"
+    "      .set dwork, (dcycle * fl_delay_mod);"
+    "      .set drem, (\\dtime - dwork);"
+    "      .rept (drem);"
+    "        nop;"
+    "      .endr;"
+    "      .if dcycle > 0;"
+    "        mov \\reg, #dcycle;"
+    "        delayloop_\\@:;"
+    "        sub \\reg, #1;"
+    "        bne delayloop_\\@;"
+    "	     .if fl_is_m0p == 0;"
+    "          nop;"
+    "        .endif;"
+    "      .endif;"
+    "    .endif;"
+    "  .endm;"
+
+    "  .macro mod_delay dtime,b1,b2,reg;"
+    "    .set adj, (\\b1 + \\b2);"
+    "    .if adj < \\dtime;"
+    "      .set dtime2, (\\dtime - adj);"
+    "      fl_delay dtime2, \\reg;"
+    "    .endif;"
+    "  .endm;"
+
+    // check the bit and drop the line low if it isn't set
+    "  .macro qlo4 b,bitmask,port,loff	;"
+    "    lsl \\b, #1			;"
+    "    bcs skip_\\@			;"
+    "    str \\bitmask, [\\port, \\loff]	;"
+    "    skip_\\@:			;"
+    "    m0pad;"
+    "  .endm				;"
+
+    // set the pin hi or low (determined by the offset passed in )
+    "  .macro qset2 bitmask,port,loff;"
+    "    str \\bitmask, [\\port, \\loff];"
+    "    m0pad;"
+    "  .endm;"
+
+    // Load up the next led byte to work with, put it in bn
+    "  .macro loadleds3 leds, bn, rled, scratch;"
+    "    mov \\scratch, \\leds;"
+    "    ldrb \\bn, [\\scratch, \\rled];"
+    "  .endm;"
+
+    // check whether or not we should dither
+    "  .macro loaddither7 bn,d,base,rdither;"
+    "    ldrb \\d, [\\base, \\rdither];"
+    "    lsl \\d, #24;"  //; shift high for the qadd w/bn
+    "    lsl \\bn, #24;" //; shift high for the qadd w/d
+    "    bne chkskip_\\@;" //; if bn==0, clear d;"
+    "    eor \\d, \\d;" //; clear d;"
+    "    m0pad;"
+    "    chkskip_\\@:;"
+    "  .endm;"
+
+    // Do the qadd8 for dithering -- there's two versions of this.  The m0 version
+    // takes advantage of the 3 cycle branch to do two things after the branch,
+    // while keeping timing constant.  The m0+, however, branches in 2 cycles, so
+    // we have to work around that a bit more.  This is one of the few times
+    // where the m0 will actually be _more_ efficient than the m0+
+    "  .macro dither5 bn,d;"
+    "  .syntax unified;"
+    "    .if fl_is_m0p == 0;"
+    "      adds \\bn, \\d;"         // do the add
+    "      bcc dither5_1_\\@;"
+    "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
+    "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
+    "      dither5_1_\\@:;"
+    "      nop;"                    // nop to keep timing in line
+    "    .else;"
+    "      adds \\bn, \\d;"         // do the add"
+    "      bcc dither5_2_\\@;"
+    "      mvns \\bn, \\bn;"        // set the low 24bits ot 1's
+    "      dither5_2_\\@:;"
+    "      bcc dither5_3_\\@;"
+    "      lsls \\bn, \\bn, #24;"   // move low 8 bits to the high bits
+    "      dither5_3_\\@:;"
+    "    .endif;"
+    "  .syntax divided;"
+    "  .endm;"
+
+    // Do our scaling
+    "  .macro scale4 bn, base, scale, scratch;"
+    "    ldr \\scratch, [\\base, \\scale];"
+    "    lsr \\bn, \\bn, #24;"                  // bring bn back down to its low 8 bits
+    "    mul \\bn, \\scratch;"                  // do the multiply
+    "  .endm;"
+
+    // swap bn into b
+    "  .macro swapbbn1 b,bn;"
+    "    lsl \\b, \\bn, #16;"  // put the 8 bits we want for output high
+    "  .endm;"
+
+    // adjust the dithering value for the next time around (load e from memory
+    // to do the math)
+    "  .macro adjdither7 base,d,rled,eoffset,scratch;"
+    "    ldrb \\d, [\\base, \\rled];"
+    "    ldrb \\scratch,[\\base,\\eoffset];"          // load e
+    "    .syntax unified;"
+    "    subs \\d, \\scratch, \\d;"                   // d=e-d
+    "    .syntax divided;"
+    "    strb \\d, [\\base, \\rled];"                 // save d
+    "  .endm;"
+
+    // increment the led pointer (base+6 has what we're incrementing by)
+    "  .macro incleds3   leds, base, scratch;"
+    "    ldrb \\scratch, [\\base, #6];"               // load incremen
+    "    add \\leds, \\leds, \\scratch;"              // update leds pointer
+    "  .endm;"
+
+    // compare and loop
+    "  .macro cmploop5 counter,label;"
+    "    .syntax unified;"
+    "    subs \\counter, #1;"
+    "    .syntax divided;"
+    "    beq done_\\@;"
+    "    m0pad;"
+    "    b \\label;"
+    "    done_\\@:;"
+    "  .endm;"
+
+    " .endif;"
+  );
 
 #define M0_ASM_ARGS     :             \
       [leds] "+h" (leds),             \
@@ -198,9 +198,9 @@ showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, u
       [e0] "I" (3+RO(0)),             \
       [e1] "I" (3+RO(1)),             \
       [e2] "I" (3+RO(2)),             \
-      [scale0] "I" (4*(2+RO(0))),     \
-      [scale1] "I" (4*(2+RO(1))),     \
-      [scale2] "I" (4*(2+RO(2))),     \
+      [scale0] "I" (4*(2+RO(0))),         \
+      [scale1] "I" (4*(2+RO(1))),         \
+      [scale2] "I" (4*(2+RO(2))),         \
       [T1] "I" (T1),                  \
       [T2] "I" (T2),                  \
       [T3] "I" (T3)                   \
@@ -230,157 +230,157 @@ showLedData(volatile uint32_t *_port, uint32_t _bitmask, const uint8_t *_leds, u
     // track the loop outside the asm code, to allow inserting the interrupt
     // overrun checks.
     asm __volatile__ (
-        // pre-load byte 0
-        LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-        M0_ASM_ARGS);
+      // pre-load byte 0
+      LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+      M0_ASM_ARGS);
 
     do {
-        asm __volatile__ (
-            // Write out byte 0, prepping byte 1
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-            // Write out byte 1, prepping byte 2
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-            // Write out byte 2, prepping byte 0
-            HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
-
-            M0_ASM_ARGS
-        );
-        SEI_CHK; INNER_SEI; --counter; CLI_CHK;
+      asm __volatile__ (
+      // Write out byte 0, prepping byte 1
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+      // Write out byte 1, prepping byte 2
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+      // Write out byte 2, prepping byte 0
+      HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
+
+      M0_ASM_ARGS
+      );
+      SEI_CHK; INNER_SEI; --counter; CLI_CHK;
     } while(counter);
 #elif (FASTLED_ALLOW_INTERRUPTS == 1)
     // We're allowing interrupts - track the loop outside the asm code, and
     // re-enable interrupts in between each iteration.
     asm __volatile__ (
-        // pre-load byte 0
-        LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-        M0_ASM_ARGS);
+      // pre-load byte 0
+      LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+      M0_ASM_ARGS);
 
     do {
-        asm __volatile__ (
-            // Write out byte 0, prepping byte 1
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-            // Write out byte 1, prepping byte 2
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-            // Write out byte 2, prepping byte 0
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
-
-            M0_ASM_ARGS
-        );
-
-        uint32_t ticksBeforeInterrupts = SysTick->VAL;
-        sei();
-        --counter;
-        cli();
-
-        // If more than 45 uSecs have elapsed, give up on this frame and start over.
-        // Note: this isn't completely correct. It's possible that more than one
-        // millisecond will elapse, and so SysTick->VAL will lap
-        // ticksBeforeInterrupts.
-        // Note: ticksBeforeInterrupts DECREASES
-        const uint32_t kTicksPerMs = VARIANT_MCK / 1000;
-        const uint32_t kTicksPerUs = kTicksPerMs / 1000;
-        const uint32_t kTicksIn45us = kTicksPerUs * 45;
-
-        const uint32_t currentTicks = SysTick->VAL;
-
-        if (ticksBeforeInterrupts < currentTicks) {
-            // Timer started over
-            if ((ticksBeforeInterrupts + (kTicksPerMs - currentTicks)) > kTicksIn45us) {
-                return 0;
-            }
-        } else {
-            if ((ticksBeforeInterrupts - currentTicks) > kTicksIn45us) {
-                return 0;
-            }
+      asm __volatile__ (
+      // Write out byte 0, prepping byte 1
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+      // Write out byte 1, prepping byte 2
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+      // Write out byte 2, prepping byte 0
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5)
+
+      M0_ASM_ARGS
+      );
+
+      uint32_t ticksBeforeInterrupts = SysTick->VAL;
+      sei();
+      --counter;
+      cli();
+
+      // If more than 45 uSecs have elapsed, give up on this frame and start over.
+      // Note: this isn't completely correct. It's possible that more than one
+      // millisecond will elapse, and so SysTick->VAL will lap
+      // ticksBeforeInterrupts.
+      // Note: ticksBeforeInterrupts DECREASES
+      const uint32_t kTicksPerMs = VARIANT_MCK / 1000;
+      const uint32_t kTicksPerUs = kTicksPerMs / 1000;
+      const uint32_t kTicksIn45us = kTicksPerUs * 45;
+
+      const uint32_t currentTicks = SysTick->VAL;
+
+      if (ticksBeforeInterrupts < currentTicks) {
+        // Timer started over
+        if ((ticksBeforeInterrupts + (kTicksPerMs - currentTicks)) > kTicksIn45us) {
+          return 0;
+        }
+      } else {
+        if ((ticksBeforeInterrupts - currentTicks) > kTicksIn45us) {
+          return 0;
         }
+      }
     } while(counter);
 #else
     // We're not allowing interrupts - run the entire loop in asm to keep things
     // as tight as possible.  In an ideal world, we should be pushing out ws281x
     // leds (or other 3-wire leds) with zero gaps between pixels.
     asm __volatile__ (
-        // pre-load byte 0
-        LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
-
-        // loop over writing out the data
-        LOOP
-            // Write out byte 0, prepping byte 1
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-            // Write out byte 1, prepping byte 2
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
-
-            // Write out byte 2, prepping byte 0
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
-            HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
-            HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
-            HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
-            HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
-            HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5) CMPLOOP5
-
-            M0_ASM_ARGS
+      // pre-load byte 0
+    LOADLEDS3(0) LOADDITHER7(0) DITHER5 SCALE4(0) ADJDITHER7(0) SWAPBBN1
+
+    // loop over writing out the data
+    LOOP
+      // Write out byte 0, prepping byte 1
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(1)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(1)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(1)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(1)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+      // Write out byte 1, prepping byte 2
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(2)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(2)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(2)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(2)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 INCLEDS3        _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(0)
+
+      // Write out byte 2, prepping byte 0
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADLEDS3(0)    _D2(3) LO2 _D3(0)
+      HI2 _D1 QLO4 LOADDITHER7(0)  _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 DITHER5         _D2(5) LO2 _D3(0)
+      HI2 _D1 QLO4 SCALE4(0)       _D2(4) LO2 _D3(0)
+      HI2 _D1 QLO4 ADJDITHER7(0)   _D2(7) LO2 _D3(0)
+      HI2 _D1 QLO4 NOTHING         _D2(0) LO2 _D3(0)
+      HI2 _D1 QLO4 SWAPBBN1        _D2(1) LO2 _D3(5) CMPLOOP5
+
+      M0_ASM_ARGS
     );
 #endif
     return num_leds;
diff --git a/platforms/arm/d51/clockless_arm_d51.h b/platforms/arm/d51/clockless_arm_d51.h
index a5c7f68023..7bb48062b8 100644
--- a/platforms/arm/d51/clockless_arm_d51.h
+++ b/platforms/arm/d51/clockless_arm_d51.h
@@ -43,7 +43,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
-		for(register uint32_t i = BITS-1; i > 0; i--) {
+		for(register uint32_t i = BITS-1; i > 0; --i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
 			FastPin<DATA_PIN>::fastset(port, hi);
diff --git a/platforms/arm/k20/clockless_arm_k20.h b/platforms/arm/k20/clockless_arm_k20.h
index 87e8634573..c38b5c2933 100644
--- a/platforms/arm/k20/clockless_arm_k20.h
+++ b/platforms/arm/k20/clockless_arm_k20.h
@@ -38,7 +38,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
-		for(register uint32_t i = BITS-1; i > 0; i--) {
+		for(register uint32_t i = BITS-1; i > 0; --i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
 			FastPin<DATA_PIN>::fastset(port, hi);
diff --git a/platforms/arm/k20/clockless_block_arm_k20.h b/platforms/arm/k20/clockless_block_arm_k20.h
index c0d838d278..9beeb9fa9f 100644
--- a/platforms/arm/k20/clockless_block_arm_k20.h
+++ b/platforms/arm/k20/clockless_block_arm_k20.h
@@ -94,7 +94,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		register uint8_t d = pixels.template getd<PX>(pixels);
 		register uint8_t scale = pixels.template getscale<PX>(pixels);
 
-		for(register uint32_t i = 0; i < (USED_LANES/2); i++) {
+		for(register uint32_t i = 0; i < (USED_LANES/2); ++i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
 			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
@@ -118,7 +118,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 			b.bytes[USED_LANES-1] = pixels.template loadAndScale<PX>(pixels,USED_LANES-1,d,scale);
 		}
 
-		for(register uint32_t i = USED_LANES/2; i < 8; i++) {
+		for(register uint32_t i = USED_LANES/2; i < 8; ++i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
 			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
@@ -151,7 +151,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		register Lines b0;
 
 		allpixels.preStepFirstByteDithering();
-		for(int i = 0; i < USED_LANES; i++) {
+		for(int i = 0; i < USED_LANES; ++i) {
 			b0.bytes[i] = allpixels.loadAndScale0(i);
 		}
 
@@ -252,7 +252,7 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 		register uint8_t d = pixels.template getd<PX>(pixels);
 		register uint8_t scale = pixels.template getscale<PX>(pixels);
 
-		for(register uint32_t i = 0; (i < LANES) && (i < 8); i++) {
+		for(register uint32_t i = 0; (i < LANES) && (i < 8); ++i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
 			*FastPin<PORTD_FIRST_PIN>::sport() = PMASK_LO;
@@ -288,7 +288,7 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 		register Lines b0;
 
 		allpixels.preStepFirstByteDithering();
-		for(int i = 0; i < LANES; i++) {
+		for(int i = 0; i < LANES; ++i) {
 			b0.bytes[i] = allpixels.loadAndScale0(i);
 		}
 
diff --git a/platforms/arm/k20/fastspi_arm_k20.h b/platforms/arm/k20/fastspi_arm_k20.h
index cbb72a9f6b..3d492558dc 100644
--- a/platforms/arm/k20/fastspi_arm_k20.h
+++ b/platforms/arm/k20/fastspi_arm_k20.h
@@ -87,7 +87,7 @@ template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint
 
 			dbl = 0;
 			if(scalar == 0) { dbl = 1; }
-			else if(scalar < 3) { scalar--; }
+			else if(scalar < 3) { --scalar; }
 		}
 	}
 	return;
diff --git a/platforms/arm/k20/octows2811_controller.h b/platforms/arm/k20/octows2811_controller.h
index 749e18e1a6..f365e61f8d 100644
--- a/platforms/arm/k20/octows2811_controller.h
+++ b/platforms/arm/k20/octows2811_controller.h
@@ -9,54 +9,54 @@ FASTLED_NAMESPACE_BEGIN
 
 template<EOrder RGB_ORDER = GRB, uint8_t CHIP = WS2811_800kHz>
 class COctoWS2811Controller : public CPixelLEDController<RGB_ORDER, 8, 0xFF> {
-    OctoWS2811  *pocto;
-    uint8_t *drawbuffer,*framebuffer;
+  OctoWS2811  *pocto;
+  uint8_t *drawbuffer,*framebuffer;
 
-    void _init(int nLeds) {
-        if(pocto == NULL) {
-            drawbuffer = (uint8_t*)malloc(nLeds * 8 * 3);
-            framebuffer = (uint8_t*)malloc(nLeds * 8 * 3);
+  void _init(int nLeds) {
+    if(pocto == NULL) {
+      drawbuffer = (uint8_t*)malloc(nLeds * 8 * 3);
+      framebuffer = (uint8_t*)malloc(nLeds * 8 * 3);
 
-            // byte ordering is handled in show by the pixel controller
-            int config = WS2811_RGB;
-            config |= CHIP;
+      // byte ordering is handled in show by the pixel controller
+      int config = WS2811_RGB;
+      config |= CHIP;
 
-            pocto = new OctoWS2811(nLeds, framebuffer, drawbuffer, config);
+      pocto = new OctoWS2811(nLeds, framebuffer, drawbuffer, config);
 
-            pocto->begin();
-        }
+      pocto->begin();
     }
-
+  }
 public:
-    COctoWS2811Controller() { pocto = NULL; }
-    virtual int size() { return CLEDController::size() * 8; }
-
-    virtual void init() { /* do nothing yet */ }
-
-    typedef union {
-        uint8_t bytes[8];
-        uint32_t raw[2];
-    } Lines;
-
-    virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
-        _init(pixels.size());
-
-        uint8_t *pData = drawbuffer;
-        while(pixels.has(1)) {
-            Lines b;
-
-            for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale0(i); }
-            transpose8x1_MSB(b.bytes,pData); pData += 8;
-            for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale1(i); }
-            transpose8x1_MSB(b.bytes,pData); pData += 8;
-            for(int i = 0; i < 8; i++) { b.bytes[i] = pixels.loadAndScale2(i); }
-            transpose8x1_MSB(b.bytes,pData); pData += 8;
-            pixels.stepDithering();
-            pixels.advanceData();
-        }
-
-        pocto->show();
+  COctoWS2811Controller() { pocto = NULL; }
+  virtual int size() { return CLEDController::size() * 8; }
+
+  virtual void init() { /* do nothing yet */ }
+
+  typedef union {
+    uint8_t bytes[8];
+    uint32_t raw[2];
+  } Lines;
+
+  virtual void showPixels(PixelController<RGB_ORDER, 8, 0xFF> & pixels) {
+    _init(pixels.size());
+
+    uint8_t *pData = drawbuffer;
+    while(pixels.has(1)) {
+      Lines b;
+
+      for(int i = 0; i < 8; ++i) { b.bytes[i] = pixels.loadAndScale0(i); }
+      transpose8x1_MSB(b.bytes,pData); pData += 8;
+      for(int i = 0; i < 8; ++i) { b.bytes[i] = pixels.loadAndScale1(i); }
+      transpose8x1_MSB(b.bytes,pData); pData += 8;
+      for(int i = 0; i < 8; ++i) { b.bytes[i] = pixels.loadAndScale2(i); }
+      transpose8x1_MSB(b.bytes,pData); pData += 8;
+      pixels.stepDithering();
+      pixels.advanceData();
     }
+
+    pocto->show();
+  }
+
 };
 
 FASTLED_NAMESPACE_END
diff --git a/platforms/arm/k66/clockless_arm_k66.h b/platforms/arm/k66/clockless_arm_k66.h
index ec4241f701..df1b5007ee 100644
--- a/platforms/arm/k66/clockless_arm_k66.h
+++ b/platforms/arm/k66/clockless_arm_k66.h
@@ -38,7 +38,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
-		for(register uint32_t i = BITS-1; i > 0; i--) {
+		for(register uint32_t i = BITS-1; i > 0; --i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3);
 			FastPin<DATA_PIN>::fastset(port, hi);
diff --git a/platforms/arm/k66/clockless_block_arm_k66.h b/platforms/arm/k66/clockless_block_arm_k66.h
index c7eb99251f..70f8c7a590 100644
--- a/platforms/arm/k66/clockless_block_arm_k66.h
+++ b/platforms/arm/k66/clockless_block_arm_k66.h
@@ -108,7 +108,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		register uint8_t d = pixels.template getd<PX>(pixels);
 		register uint8_t scale = pixels.template getscale<PX>(pixels);
 
-		for(register uint32_t i = 0; i < (USED_LANES/2); i++) {
+		for(register uint32_t i = 0; i < (USED_LANES/2); ++i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
 			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
@@ -132,7 +132,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 			b.bytes[USED_LANES-1] = pixels.template loadAndScale<PX>(pixels,USED_LANES-1,d,scale);
 		}
 
-		for(register uint32_t i = USED_LANES/2; i < 8; i++) {
+		for(register uint32_t i = USED_LANES/2; i < 8; ++i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
 			*FastPin<FIRST_PIN>::sport() = PORT_MASK;
@@ -165,7 +165,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		register Lines b0;
 
 		allpixels.preStepFirstByteDithering();
-		for(int i = 0; i < USED_LANES; i++) {
+		for(int i = 0; i < USED_LANES; ++i) {
 			b0.bytes[i] = allpixels.loadAndScale0(i);
 		}
 
@@ -266,7 +266,7 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 		register uint8_t d = pixels.template getd<PX>(pixels);
 		register uint8_t scale = pixels.template getscale<PX>(pixels);
 
-		for(register uint32_t i = 0; (i < LANES) && (i < 8); i++) {
+		for(register uint32_t i = 0; (i < LANES) && (i < 8); ++i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + (T1+T2+T3)-3;
 			*FastPin<PORTD_FIRST_PIN>::sport() = PMASK_LO;
@@ -301,7 +301,7 @@ class SixteenWayInlineBlockClocklessController : public CPixelLEDController<RGB_
 		register Lines b0;
 
 		allpixels.preStepFirstByteDithering();
-		for(int i = 0; i < LANES; i++) {
+		for(int i = 0; i < LANES; ++i) {
 			b0.bytes[i] = allpixels.loadAndScale0(i);
 		}
 
diff --git a/platforms/arm/k66/fastspi_arm_k66.h b/platforms/arm/k66/fastspi_arm_k66.h
index e0683fa2ee..f990741b4f 100644
--- a/platforms/arm/k66/fastspi_arm_k66.h
+++ b/platforms/arm/k66/fastspi_arm_k66.h
@@ -95,7 +95,7 @@ template <int VAL> void getScalars(uint32_t & preScalar, uint32_t & scalar, uint
 
 			dbl = 0;
 			if(scalar == 0) { dbl = 1; }
-			else if(scalar < 3) { scalar--; }
+			else if(scalar < 3) { --scalar; }
 		}
 	}
 	return;
diff --git a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index 5c878c728b..c30823b610 100644
--- a/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -82,7 +82,7 @@ class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_OR
             _BLOCK_PIN(30);
         }
 
-        for(int i = 0; i < m_nActualLanes; i++) {
+        for(int i = 0; i < m_nActualLanes; ++i) {
             if(m_bitOffsets[i] < m_nLowBit) { m_nLowBit = m_bitOffsets[i]; }
             if(m_bitOffsets[i] > m_nHighBit) { m_nHighBit = m_bitOffsets[i]; }
         }
@@ -106,49 +106,48 @@ class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_OR
 		mWait.mark();
 	}
 
-    typedef union {
-        uint8_t bytes[32];
-        uint8_t bg[4][8];
-        uint16_t shorts[16];
-        uint32_t raw[8];
-    } _outlines;
-
-    template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
-        _outlines b2;
-        transpose8x1(b.bg[3], b2.bg[3]);
-        transpose8x1(b.bg[2], b2.bg[2]);
-        transpose8x1(b.bg[1], b2.bg[1]);
-        transpose8x1(b.bg[0], b2.bg[0]);
-
-        register uint8_t d = pixels.template getd<PX>(pixels);
-        register uint8_t scale = pixels.template getscale<PX>(pixels);
-
-        int x = 0;
-        for(uint32_t i = 8; i > 0;) {
-            i--;
-            while(ARM_DWT_CYCCNT < next_mark);
-            *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
-            next_mark = ARM_DWT_CYCCNT + m_offsets[0];
-
-            uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
-
-            out = ((~out) & m_nWriteMask);
-            while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
-            *FastPin<FIRST_PIN>::cport() = out;
-
-            out = m_nWriteMask;
-            while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
-            *FastPin<FIRST_PIN>::cport() = out;
-
-            // Read and store up to two bytes
-            if (x < m_nActualLanes) {
-                b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
-                x++;
-                if (x < m_nActualLanes) {
-                    b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
-                    x++;
-                }
-            }
+  typedef union {
+    uint8_t bytes[32];
+    uint8_t bg[4][8];
+    uint16_t shorts[16];
+    uint32_t raw[8];
+  } _outlines;
+
+
+  template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
+    _outlines b2;
+    transpose8x1(b.bg[3], b2.bg[3]);
+    transpose8x1(b.bg[2], b2.bg[2]);
+    transpose8x1(b.bg[1], b2.bg[1]);
+    transpose8x1(b.bg[0], b2.bg[0]);
+
+    register uint8_t d = pixels.template getd<PX>(pixels);
+    register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+    int x = 0;
+    for(uint32_t i = 8; i > 0;) {
+      --i;
+      while(ARM_DWT_CYCCNT < next_mark);
+      *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
+      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+      uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
+
+      out = ((~out) & m_nWriteMask);
+      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
+      *FastPin<FIRST_PIN>::cport() = out;
+
+      out = m_nWriteMask;
+      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
+      *FastPin<FIRST_PIN>::cport() = out;
+
+      // Read and store up to two bytes
+      if (x < m_nActualLanes) {
+        b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+        ++x;
+        if (x < m_nActualLanes) {
+          b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
+          ++x;
         }
     }
 
@@ -157,9 +156,15 @@ class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_OR
         _outlines b0;
         uint32_t start = ARM_DWT_CYCCNT;
 
+<<<<<<< HEAD
         for(int i = 0; i < m_nActualLanes; i++) {
             b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
         }
+=======
+    for(int i = 0; i < m_nActualLanes; ++i) {
+      b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
+    }
+>>>>>>> use prefix notation for ++ and -- where possible
 
         cli();
 
diff --git a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
index dfb772aead..ed3be816c9 100644
--- a/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
+++ b/platforms/arm/mxrt1062/clockless_arm_mxrt1062.h
@@ -51,7 +51,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
   	}
 
 	template<int BITS> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register uint32_t & b)  {
-		for(register uint32_t i = BITS-1; i > 0; i--) {
+		for(register uint32_t i = BITS-1; i > 0; --i) {
 			while(ARM_DWT_CYCCNT < next_mark);
 			next_mark = ARM_DWT_CYCCNT + off[0];
 			FastPin<DATA_PIN>::hi();
diff --git a/platforms/arm/nrf52/clockless_arm_nrf52.h b/platforms/arm/nrf52/clockless_arm_nrf52.h
index 613ff2824a..1dd3cd94d6 100644
--- a/platforms/arm/nrf52/clockless_arm_nrf52.h
+++ b/platforms/arm/nrf52/clockless_arm_nrf52.h
@@ -206,45 +206,45 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
 
         while (pixels.has(1) && (remainingSequenceElements >= _BITS_PER_PIXEL)) {
             uint8_t b0 = pixels.loadAndScale0();
-            WriteBitToSequence<7>(b0, e); e++;
-            WriteBitToSequence<6>(b0, e); e++;
-            WriteBitToSequence<5>(b0, e); e++;
-            WriteBitToSequence<4>(b0, e); e++;
-            WriteBitToSequence<3>(b0, e); e++;
-            WriteBitToSequence<2>(b0, e); e++;
-            WriteBitToSequence<1>(b0, e); e++;
-            WriteBitToSequence<0>(b0, e); e++;
+            WriteBitToSequence<7>(b0, e); ++e;
+            WriteBitToSequence<6>(b0, e); ++e;
+            WriteBitToSequence<5>(b0, e); ++e;
+            WriteBitToSequence<4>(b0, e); ++e;
+            WriteBitToSequence<3>(b0, e); ++e;
+            WriteBitToSequence<2>(b0, e); ++e;
+            WriteBitToSequence<1>(b0, e); ++e;
+            WriteBitToSequence<0>(b0, e); ++e;
             if (_XTRA0 > 0) {
-                for (int i = 0; i < _XTRA0; i++) {
-                    WriteBitToSequence<0>(0,e); e++;
+                for (int i = 0; i < _XTRA0; ++i) {
+                    WriteBitToSequence<0>(0,e); ++e;
                 }
             }
             uint8_t b1 = pixels.loadAndScale1();
-            WriteBitToSequence<7>(b1, e); e++;
-            WriteBitToSequence<6>(b1, e); e++;
-            WriteBitToSequence<5>(b1, e); e++;
-            WriteBitToSequence<4>(b1, e); e++;
-            WriteBitToSequence<3>(b1, e); e++;
-            WriteBitToSequence<2>(b1, e); e++;
-            WriteBitToSequence<1>(b1, e); e++;
-            WriteBitToSequence<0>(b1, e); e++;
+            WriteBitToSequence<7>(b1, e); ++e;
+            WriteBitToSequence<6>(b1, e); ++e;
+            WriteBitToSequence<5>(b1, e); ++e;
+            WriteBitToSequence<4>(b1, e); ++e;
+            WriteBitToSequence<3>(b1, e); ++e;
+            WriteBitToSequence<2>(b1, e); ++e;
+            WriteBitToSequence<1>(b1, e); ++e;
+            WriteBitToSequence<0>(b1, e); ++e;
             if (_XTRA0 > 0) {
-                for (int i = 0; i < _XTRA0; i++) {
-                    WriteBitToSequence<0>(0,e); e++;
+                for (int i = 0; i < _XTRA0; ++i) {
+                    WriteBitToSequence<0>(0,e); ++e;
                 }
             }
             uint8_t b2 = pixels.loadAndScale2();
-            WriteBitToSequence<7>(b2, e); e++;
-            WriteBitToSequence<6>(b2, e); e++;
-            WriteBitToSequence<5>(b2, e); e++;
-            WriteBitToSequence<4>(b2, e); e++;
-            WriteBitToSequence<3>(b2, e); e++;
-            WriteBitToSequence<2>(b2, e); e++;
-            WriteBitToSequence<1>(b2, e); e++;
-            WriteBitToSequence<0>(b2, e); e++;
+            WriteBitToSequence<7>(b2, e); ++e;
+            WriteBitToSequence<6>(b2, e); ++e;
+            WriteBitToSequence<5>(b2, e); ++e;
+            WriteBitToSequence<4>(b2, e); ++e;
+            WriteBitToSequence<3>(b2, e); ++e;
+            WriteBitToSequence<2>(b2, e); ++e;
+            WriteBitToSequence<1>(b2, e); ++e;
+            WriteBitToSequence<0>(b2, e); ++e;
             if (_XTRA0 > 0) {
-                for (int i = 0; i < _XTRA0; i++) {
-                    WriteBitToSequence<0>(0,e); e++;
+                for (int i = 0; i < _XTRA0; ++i) {
+                    WriteBitToSequence<0>(0,e); ++e;
                 }
             }
 
@@ -291,22 +291,22 @@ class ClocklessController : public CPixelLEDController<_RGB_ORDER> {
         uint8_t  * nextByte    = arrayOfBytes;
         for (uint16_t bytesRemain = bytesToSend;
             (remainingSequenceElements >= 8) && (bytesRemain > 0);
-            bytesRemain--,
+            --bytesRemain,
             remainingSequenceElements     -= 8,
             s_SequenceBufferValidElements += 8
             ) {
             uint8_t b = *nextByte;
-            WriteBitToSequence<7,false>(b, e); e++;
-            WriteBitToSequence<6,false>(b, e); e++;
-            WriteBitToSequence<5,false>(b, e); e++;
-            WriteBitToSequence<4,false>(b, e); e++;
-            WriteBitToSequence<3,false>(b, e); e++;
-            WriteBitToSequence<2,false>(b, e); e++;
-            WriteBitToSequence<1,false>(b, e); e++;
-            WriteBitToSequence<0,false>(b, e); e++;
+            WriteBitToSequence<7,false>(b, e); ++e;
+            WriteBitToSequence<6,false>(b, e); ++e;
+            WriteBitToSequence<5,false>(b, e); ++e;
+            WriteBitToSequence<4,false>(b, e); ++e;
+            WriteBitToSequence<3,false>(b, e); ++e;
+            WriteBitToSequence<2,false>(b, e); ++e;
+            WriteBitToSequence<1,false>(b, e); ++e;
+            WriteBitToSequence<0,false>(b, e); ++e;
             if (_XTRA0 > 0) {
-                for (int i = 0; i < _XTRA0; i++) {
-                    WriteBitToSequence<0,_FLIP>(0,e); e++;
+                for (int i = 0; i < _XTRA0; ++i) {
+                    WriteBitToSequence<0,_FLIP>(0,e); ++e;
                 }
             }
         }
diff --git a/platforms/arm/sam/clockless_arm_sam.h b/platforms/arm/sam/clockless_arm_sam.h
index 737a4555c2..d7c57940b6 100644
--- a/platforms/arm/sam/clockless_arm_sam.h
+++ b/platforms/arm/sam/clockless_arm_sam.h
@@ -48,7 +48,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		// while(VAL < (TOTAL*10)) { bShift=true; }
 		// if(bShift) { next_mark = (VAL-TOTAL); };
 
-		for(register uint32_t i = BITS; i > 0; i--) {
+		for(register uint32_t i = BITS; i > 0; --i) {
 			// wait to start the bit, then set the pin high
 			while(DUE_TIMER_VAL < next_mark);
 			next_mark = (DUE_TIMER_VAL+TOTAL);
diff --git a/platforms/arm/stm32/clockless_arm_stm32.h b/platforms/arm/stm32/clockless_arm_stm32.h
index 1cc1f66785..0ac8a5d436 100644
--- a/platforms/arm/stm32/clockless_arm_stm32.h
+++ b/platforms/arm/stm32/clockless_arm_stm32.h
@@ -38,7 +38,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 #define _CYCCNT (*(volatile uint32_t*)(0xE0001004UL))
 
     template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & next_mark, register data_ptr_t port, register data_t hi, register data_t lo, register uint8_t & b)  {
-        for(register uint32_t i = BITS-1; i > 0; i--) {
+        for(register uint32_t i = BITS-1; i > 0; --i) {
             while(_CYCCNT < (T1+T2+T3-20));
             FastPin<DATA_PIN>::fastset(port, hi);
             _CYCCNT = 4;
diff --git a/platforms/esp/32/clockless_block_esp32.h b/platforms/esp/32/clockless_block_esp32.h
index 41f44be2b6..45b7671cf8 100644
--- a/platforms/esp/32/clockless_block_esp32.h
+++ b/platforms/esp/32/clockless_block_esp32.h
@@ -35,7 +35,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	while(!showRGBInternal(pixels) && cnt--) {
 	    ets_intr_unlock();
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	    _retry_cnt++;
+	    ++_retry_cnt;
 #endif
 	    delayMicroseconds(WAIT_TIME * 10);
 	    ets_intr_lock();
@@ -86,7 +86,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	register uint8_t d = pixels.template getd<PX>(pixels);
 	register uint8_t scale = pixels.template getscale<PX>(pixels);
 	
-	for(register uint32_t i = 0; i < USED_LANES; i++) {
+	for(register uint32_t i = 0; i < USED_LANES; ++i) {
 	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
 	    last_mark = __clock_cycles();
 	    *FastPin<FIRST_PIN>::sport() = PORT_MASK << REAL_FIRST_PIN;
@@ -101,7 +101,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	    b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
 	}
 
-	for(register uint32_t i = USED_LANES; i < 8; i++) {
+	for(register uint32_t i = USED_LANES; i < 8; ++i) {
 	    while((__clock_cycles() - last_mark) < (T1+T2+T3));
 	    last_mark = __clock_cycles();
 	    *FastPin<FIRST_PIN>::sport() = PORT_MASK << REAL_FIRST_PIN;
@@ -122,7 +122,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	// Setup the pixel controller and load/scale the first byte
 	Lines b0;
 	
-	for(int i = 0; i < USED_LANES; i++) {
+	for(int i = 0; i < USED_LANES; ++i) {
 	    b0.bytes[i] = allpixels.loadAndScale0(i);
 	}
 	allpixels.preStepFirstByteDithering();
@@ -159,7 +159,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 	
 	ets_intr_unlock();
 #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-	_frame_cnt++;
+	++_frame_cnt;
 #endif
 	return __clock_cycles() - _start;
     }
diff --git a/platforms/esp/32/clockless_esp32.h.orig b/platforms/esp/32/clockless_esp32.h.orig
new file mode 100644
index 0000000000..bdc0bd7ab9
--- /dev/null
+++ b/platforms/esp/32/clockless_esp32.h.orig
@@ -0,0 +1,786 @@
+/*
+ * Integration into FastLED ClocklessController 2017 Thomas Basler
+ *
+ * Modifications Copyright (c) 2017 Martin F. Falatic
+ *
+ * Modifications Copyright (c) 2018 Samuel Z. Guyer
+ *
+ * ESP32 support is provided using the RMT peripheral device -- a unit
+ * on the chip designed specifically for generating (and receiving)
+ * precisely-timed digital signals. Nominally for use in infrared
+ * remote controls, we use it to generate the signals for clockless
+ * LED strips. The main advantage of using the RMT device is that,
+ * once programmed, it generates the signal asynchronously, allowing
+ * the CPU to continue executing other code. It is also not vulnerable
+ * to interrupts or other timing problems that could disrupt the signal.
+ *
+ * The implementation strategy is borrowed from previous work and from
+ * the RMT support built into the ESP32 IDF. The RMT device has 8
+ * channels, which can be programmed independently to send sequences
+ * of high/low bits. Memory for each channel is limited, however, so
+ * in order to send a long sequence of bits, we need to continuously
+ * refill the buffer until all the data is sent. To do this, we fill
+ * half the buffer and then set an interrupt to go off when that half
+ * is sent. Then we refill that half while the second half is being
+ * sent. This strategy effectively overlaps computation (by the CPU)
+ * and communication (by the RMT).
+ *
+ * Since the RMT device only has 8 channels, we need a strategy to
+ * allow more than 8 LED controllers. Our driver assigns controllers
+ * to channels on the fly, queuing up controllers as necessary until a
+ * channel is free. The main showPixels routine just fires off the
+ * first 8 controllers; the interrupt handler starts new controllers
+ * asynchronously as previous ones finish. So, for example, it can
+ * send the data for 8 controllers simultaneously, but 16 controllers
+ * would take approximately twice as much time.
+ *
+ * There is a #define that allows a program to control the total
+ * number of channels that the driver is allowed to use. It defaults
+ * to 8 -- use all the channels. Setting it to 1, for example, results
+ * in fully serial output:
+ *
+ *     #define FASTLED_RMT_MAX_CHANNELS 1
+ *
+ * OTHER RMT APPLICATIONS
+ *
+ * The default FastLED driver takes over control of the RMT interrupt
+ * handler, making it hard to use the RMT device for other
+ * (non-FastLED) purposes. You can change it's behavior to use the ESP
+ * core driver instead, allowing other RMT applications to
+ * co-exist. To switch to this mode, add the following directive
+ * before you include FastLED.h:
+ *
+ *      #define FASTLED_RMT_BUILTIN_DRIVER
+ *
+ * There may be a performance penalty for using this mode. We need to
+ * compute the RMT signal for the entire LED strip ahead of time,
+ * rather than overlapping it with communication. We also need a large
+ * buffer to hold the signal specification. Each bit of pixel data is
+ * represented by a 32-bit pulse specification, so it is a 32X blow-up
+ * in memory use.
+ *
+ *
+ * Based on public domain code created 19 Nov 2016 by Chris Osborn <fozztexx@fozztexx.com>
+ * http://insentricity.com *
+ *
+ */
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#pragma once
+
+FASTLED_NAMESPACE_BEGIN
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "esp32-hal.h"
+#include "esp_intr.h"
+#include "driver/gpio.h"
+#include "driver/rmt.h"
+#include "driver/periph_ctrl.h"
+#include "freertos/semphr.h"
+#include "soc/rmt_struct.h"
+
+#include "esp_log.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+__attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
+}
+
+#define FASTLED_HAS_CLOCKLESS 1
+
+// -- Configuration constants
+#define DIVIDER             2 /* 4, 8 still seem to work, but timings become marginal */
+#define MAX_PULSES         32 /* A channel has a 64 "pulse" buffer - we use half per pass */
+
+// -- Convert ESP32 cycles back into nanoseconds
+#define ESPCLKS_TO_NS(_CLKS) (((long)(_CLKS) * 1000L) / F_CPU_MHZ)
+
+// -- Convert nanoseconds into RMT cycles
+#define F_CPU_RMT       (  80000000L)
+#define NS_PER_SEC      (1000000000L)
+#define CYCLES_PER_SEC  (F_CPU_RMT/DIVIDER)
+#define NS_PER_CYCLE    ( NS_PER_SEC / CYCLES_PER_SEC )
+#define NS_TO_CYCLES(n) ( (n) / NS_PER_CYCLE )
+
+// -- Convert ESP32 cycles to RMT cycles
+#define TO_RMT_CYCLES(_CLKS) NS_TO_CYCLES(ESPCLKS_TO_NS(_CLKS))    
+
+// -- Number of cycles to signal the strip to latch
+#define RMT_RESET_DURATION NS_TO_CYCLES(50000)
+
+// -- Core or custom driver
+#ifndef FASTLED_RMT_BUILTIN_DRIVER
+#define FASTLED_RMT_BUILTIN_DRIVER false
+#endif
+
+// -- Max number of controllers we can support
+#ifndef FASTLED_RMT_MAX_CONTROLLERS
+#define FASTLED_RMT_MAX_CONTROLLERS 32
+#endif
+
+// -- Number of RMT channels to use (up to 8)
+//    Redefine this value to 1 to force serial output
+#ifndef FASTLED_RMT_MAX_CHANNELS
+#define FASTLED_RMT_MAX_CHANNELS 8
+#endif
+
+// -- Array of all controllers
+static CLEDController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
+
+// -- Current set of active controllers, indexed by the RMT
+//    channel assigned to them.
+static CLEDController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
+
+static int gNumControllers = 0;
+static int gNumStarted = 0;
+static int gNumDone = 0;
+static int gNext = 0;
+
+static intr_handle_t gRMT_intr_handle = NULL;
+
+// -- Global semaphore for the whole show process
+//    Semaphore is not given until all data has been sent
+static xSemaphoreHandle gTX_sem = NULL;
+
+static bool gInitialized = false;
+
+template <int DATA_PIN, int T1, int T2, int T3, EOrder RGB_ORDER = RGB, int XTRA0 = 0, bool FLIP = false, int WAIT_TIME = 5>
+class ClocklessController : public CPixelLEDController<RGB_ORDER>
+{
+    // -- RMT has 8 channels, numbered 0 to 7
+    rmt_channel_t  mRMT_channel;
+
+    // -- Store the GPIO pin
+    gpio_num_t     mPin;
+<<<<<<< HEAD
+
+    // -- This instantiation forces a check on the pin choice
+    FastPin<DATA_PIN> mFastPin;
+
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+=======
+
+    // -- Timing values for zero and one bits, derived from T1, T2, and T3
+    rmt_item32_t   mZero;
+    rmt_item32_t   mOne;
+
+>>>>>>> upstream/master
+    // -- State information for keeping track of where we are in the pixel data
+    PixelController<RGB_ORDER> * mPixels = NULL;
+    void *         mPixelSpace = NULL;
+    uint8_t        mRGB_channel;
+    uint16_t       mCurPulse;
+
+    // -- Buffer to hold all of the pulses. For the version that uses
+    //    the RMT driver built into the ESP core.
+    rmt_item32_t * mBuffer;
+    uint16_t       mBufferSize;
+
+public:
+
+    virtual void init()
+    {
+        // -- Precompute rmt items corresponding to a zero bit and a one bit
+        //    according to the timing values given in the template instantiation
+        // T1H
+        mOne.level0 = 1;
+        mOne.duration0 = TO_RMT_CYCLES(T1+T2);
+        // T1L
+        mOne.level1 = 0;
+        mOne.duration1 = TO_RMT_CYCLES(T3);
+
+        // T0H
+        mZero.level0 = 1;
+        mZero.duration0 = TO_RMT_CYCLES(T1);
+        // T0L
+        mZero.level1 = 0;
+        mZero.duration1 = TO_RMT_CYCLES(T2 + T3);
+
+<<<<<<< HEAD
+        gControllers[gNumControllers] = this;
+        ++gNumControllers;
+
+        mPin = gpio_num_t(DATA_PIN);
+=======
+	gControllers[gNumControllers] = this;
+        ++gNumControllers;
+
+	mPin = gpio_num_t(DATA_PIN);
+>>>>>>> upstream/master
+    }
+
+    virtual uint16_t getMaxRefreshRate() const { return 400; }
+
+protected:
+
+    void initRMT()
+    {
+<<<<<<< HEAD
+        // -- Only need to do this once
+        if (gInitialized) return;
+
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; ++i) {
+            gOnChannel[i] = NULL;
+
+            // -- RMT configuration for transmission
+            rmt_config_t rmt_tx;
+            rmt_tx.channel = rmt_channel_t(i);
+            rmt_tx.rmt_mode = RMT_MODE_TX;
+            rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+            rmt_tx.mem_block_num = 1;
+            rmt_tx.clk_div = DIVIDER;
+            rmt_tx.tx_config.loop_en = false;
+            rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+            rmt_tx.tx_config.carrier_en = false;
+            rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+            rmt_tx.tx_config.idle_output_en = true;
+                
+            // -- Apply the configuration
+            rmt_config(&rmt_tx);
+
+            if (FASTLED_RMT_BUILTIN_DRIVER) {
+                rmt_driver_install(rmt_channel_t(i), 0, 0);
+            } else {
+                // -- Set up the RMT to send 1/2 of the pulse buffer and then
+                //    generate an interrupt. When we get this interrupt we
+                //    fill the other half in preparation (kind of like double-buffering)
+                rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+            }
+        }
+
+        // -- Create a semaphore to block execution until all the controllers are done
+        if (gTX_sem == NULL) {
+            gTX_sem = xSemaphoreCreateBinary();
+            xSemaphoreGive(gTX_sem);
+        }
+                
+        if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Allocate the interrupt if we have not done so yet. This
+            //    interrupt handler must work for all different kinds of
+            //    strips, so it delegates to the refill function for each
+            //    specific instantiation of ClocklessController.
+            if (gRMT_intr_handle == NULL)
+                esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+        }
+
+        gInitialized = true;
+    }
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+        if (gNumStarted == 0) {
+            // -- First controller: make sure everything is set up
+            initRMT();
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+        }
+
+        // -- Initialize the local state, save a pointer to the pixel
+        //    data. We need to make a copy because pixels is a local
+        //    variable in the calling function, and this data structure
+        //    needs to outlive this call to showPixels.
+
+        if (mPixels != NULL) delete mPixels;
+        mPixels = new PixelController<RGB_ORDER>(pixels);
+        
+        // -- Keep track of the number of strips we've seen
+        ++gNumStarted;
+
+        // -- The last call to showPixels is the one responsible for doing
+        //    all of the actual worl
+        if (gNumStarted == gNumControllers) {
+            gNext = 0;
+
+            // -- First, fill all the available channels
+            int channel = 0;
+            while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+                startNext(channel);
+                ++channel;
+            }
+
+            // -- Wait here while the rest of the data is sent. The interrupt handler
+            //    will keep refilling the RMT buffers until it is all sent; then it
+            //    gives the semaphore back.
+            xSemaphoreTake(gTX_sem, portMAX_DELAY);
+            xSemaphoreGive(gTX_sem);
+
+            // -- Reset the counters
+            gNumStarted = 0;
+            gNumDone = 0;
+            gNext = 0;
+        }
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the appropriate
+    //    startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+        if (gNext < gNumControllers) {
+            ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+            pController->startOnChannel(channel);
+            ++gNext;
+        }
+    }
+
+    virtual void startOnChannel(int channel)
+    {
+        // -- Assign this channel and configure the RMT
+        mRMT_channel = rmt_channel_t(channel);
+
+        // -- Store a reference to this controller, so we can get it
+        //    inside the interrupt handler
+        gOnChannel[channel] = this;
+
+        // -- Assign the pin to this channel
+        rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+        if (FASTLED_RMT_BUILTIN_DRIVER) {
+            // -- Use the built-in RMT driver to send all the data in one shot
+            rmt_register_tx_end_callback(doneOnChannel, 0);
+            writeAllRMTItems();
+        } else {
+            // -- Use our custom driver to send the data incrementally
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+        
+            // -- Initialize the counters that keep track of where we are in
+            //    the pixel data.
+            mCurPulse = 0;
+            mRGB_channel = 0;
+
+            // -- Fill both halves of the buffer
+            fillHalfRMTBuffer();
+            fillHalfRMTBuffer();
+
+            // -- Turn on the interrupts
+            rmt_set_tx_intr_en(mRMT_channel, true);
+            
+            // -- Start the RMT TX operation
+            rmt_tx_start(mRMT_channel, true);
+        }
+    }
+
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+        ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+        // -- Turn off output on the pin
+        gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+        gOnChannel[channel] = NULL;
+        ++gNumDone;
+
+        if (gNumDone == gNumControllers) {
+            // -- If this is the last controller, signal that we are all done
+            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+            if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+        } else {
+            // -- Otherwise, if there are still controllers waiting, then
+            //    start the next one on this channel
+            if (gNext < gNumControllers)
+                startNext(channel);
+        }
+=======
+	// -- Only need to do this once
+	if (gInitialized) return;
+
+	for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; ++i) {
+	    gOnChannel[i] = NULL;
+
+	    // -- RMT configuration for transmission
+	    rmt_config_t rmt_tx;
+	    rmt_tx.channel = rmt_channel_t(i);
+	    rmt_tx.rmt_mode = RMT_MODE_TX;
+	    rmt_tx.gpio_num = mPin;  // The particular pin will be assigned later
+	    rmt_tx.mem_block_num = 1;
+	    rmt_tx.clk_div = DIVIDER;
+	    rmt_tx.tx_config.loop_en = false;
+	    rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
+	    rmt_tx.tx_config.carrier_en = false;
+	    rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
+	    rmt_tx.tx_config.idle_output_en = true;
+		
+	    // -- Apply the configuration
+	    rmt_config(&rmt_tx);
+
+	    if (FASTLED_RMT_BUILTIN_DRIVER) {
+		rmt_driver_install(rmt_channel_t(i), 0, 0);
+	    } else {
+		// -- Set up the RMT to send 1/2 of the pulse buffer and then
+		//    generate an interrupt. When we get this interrupt we
+		//    fill the other half in preparation (kind of like double-buffering)
+		rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, MAX_PULSES);
+	    }
+	}
+
+	// -- Create a semaphore to block execution until all the controllers are done
+	if (gTX_sem == NULL) {
+	    gTX_sem = xSemaphoreCreateBinary();
+	    xSemaphoreGive(gTX_sem);
+	}
+		
+	if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Allocate the interrupt if we have not done so yet. This
+	    //    interrupt handler must work for all different kinds of
+	    //    strips, so it delegates to the refill function for each
+	    //    specific instantiation of ClocklessController.
+	    if (gRMT_intr_handle == NULL)
+		esp_intr_alloc(ETS_RMT_INTR_SOURCE, 0, interruptHandler, 0, &gRMT_intr_handle);
+	}
+
+	gInitialized = true;
+    }
+
+    virtual void showPixels(PixelController<RGB_ORDER> & pixels)
+    {
+	if (gNumStarted == 0) {
+	    // -- First controller: make sure everything is set up
+	    initRMT();
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	}
+
+	// -- Initialize the local state, save a pointer to the pixel
+	//    data. We need to make a copy because pixels is a local
+	//    variable in the calling function, and this data structure
+	//    needs to outlive this call to showPixels.
+
+	if (mPixels != NULL) delete mPixels;
+	mPixels = new PixelController<RGB_ORDER>(pixels);
+	
+	// -- Keep track of the number of strips we've seen
+	++gNumStarted;
+
+	// -- The last call to showPixels is the one responsible for doing
+	//    all of the actual worl
+	if (gNumStarted == gNumControllers) {
+	    gNext = 0;
+
+	    // -- First, fill all the available channels
+	    int channel = 0;
+	    while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
+		startNext(channel);
+		++channel;
+	    }
+
+	    // -- Wait here while the rest of the data is sent. The interrupt handler
+	    //    will keep refilling the RMT buffers until it is all sent; then it
+	    //    gives the semaphore back.
+	    xSemaphoreTake(gTX_sem, portMAX_DELAY);
+	    xSemaphoreGive(gTX_sem);
+
+	    // -- Reset the counters
+	    gNumStarted = 0;
+	    gNumDone = 0;
+	    gNext = 0;
+	}
+    }
+
+    // -- Start up the next controller
+    //    This method is static so that it can dispatch to the appropriate
+    //    startOnChannel method of the given controller.
+    static void startNext(int channel)
+    {
+	if (gNext < gNumControllers) {
+	    ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
+	    pController->startOnChannel(channel);
+	    ++gNext;
+	}
+    }
+
+    virtual void startOnChannel(int channel)
+    {
+	// -- Assign this channel and configure the RMT
+	mRMT_channel = rmt_channel_t(channel);
+
+	// -- Store a reference to this controller, so we can get it
+	//    inside the interrupt handler
+	gOnChannel[channel] = this;
+
+	// -- Assign the pin to this channel
+	rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
+
+	if (FASTLED_RMT_BUILTIN_DRIVER) {
+	    // -- Use the built-in RMT driver to send all the data in one shot
+	    rmt_register_tx_end_callback(doneOnChannel, 0);
+	    writeAllRMTItems();
+	} else {
+	    // -- Use our custom driver to send the data incrementally
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	
+	    // -- Initialize the counters that keep track of where we are in
+	    //    the pixel data.
+	    mCurPulse = 0;
+	    mRGB_channel = 0;
+
+	    // -- Fill both halves of the buffer
+	    fillHalfRMTBuffer();
+	    fillHalfRMTBuffer();
+
+	    // -- Turn on the interrupts
+	    rmt_set_tx_intr_en(mRMT_channel, true);
+	    
+	    // -- Start the RMT TX operation
+	    rmt_tx_start(mRMT_channel, true);
+	}
+    }
+
+    static void doneOnChannel(rmt_channel_t channel, void * arg)
+    {
+	ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+        portBASE_TYPE HPTaskAwoken = 0;
+
+	// -- Turn off output on the pin
+	gpio_matrix_out(controller->mPin, 0x100, 0, 0);
+
+	gOnChannel[channel] = NULL;
+	++gNumDone;
+
+	if (gNumDone == gNumControllers) {
+	    // -- If this is the last controller, signal that we are all done
+	    xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
+	    if(HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
+	} else {
+	    // -- Otherwise, if there are still controllers waiting, then
+	    //    start the next one on this channel
+	    if (gNext < gNumControllers)
+		startNext(channel);
+	}
+>>>>>>> upstream/master
+    }
+    
+    static IRAM_ATTR void interruptHandler(void *arg)
+    {
+        // -- The basic structure of this code is borrowed from the
+        //    interrupt handler in esp-idf/components/driver/rmt.c
+        uint32_t intr_st = RMT.int_st.val;
+        uint8_t channel;
+
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; ++channel) {
+            int tx_done_bit = channel * 3;
+            int tx_next_bit = channel + 24;
+
+            if (gOnChannel[channel] != NULL) {
+
+<<<<<<< HEAD
+                ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+
+                // -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+                    RMT.int_clr.val |= BIT(tx_next_bit);
+
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+                    controller->fillHalfRMTBuffer();
+                }
+
+                // -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+                    doneOnChannel(rmt_channel_t(channel), 0);
+=======
+		ClocklessController * controller = static_cast<ClocklessController*>(gOnChannel[channel]);
+
+		// -- More to send on this channel
+                if (intr_st & BIT(tx_next_bit)) {
+		    RMT.int_clr.val |= BIT(tx_next_bit);
+
+                    // -- Refill the half of the buffer that we just finished,
+                    //    allowing the other half to proceed.
+		    controller->fillHalfRMTBuffer();
+                }
+
+		// -- Transmission is complete on this channel
+                if (intr_st & BIT(tx_done_bit)) {
+                    RMT.int_clr.val |= BIT(tx_done_bit);
+		    doneOnChannel(rmt_channel_t(channel), 0);
+>>>>>>> upstream/master
+                }
+            }
+        }
+    }
+
+    virtual void fillHalfRMTBuffer()
+    {
+        // -- Fill half of the RMT pulse buffer
+
+        //    The buffer holds 64 total pulse items, so this loop converts
+        //    as many pixels as can fit in half of the buffer (MAX_PULSES =
+        //    32 items). In our case, each pixel consists of three bytes,
+        //    each bit turns into one pulse item -- 24 items per pixel. So,
+        //    each half of the buffer can hold 1 and 1/3 of a pixel.
+
+        //    The member variable mCurPulse keeps track of which of the 64
+        //    items we are writing. During the first call to this method it
+        //    fills 0-31; in the second call it fills 32-63, and then wraps
+        //    back around to zero.
+
+        //    When we run out of pixel data, just fill the remaining items
+        //    with zero pulses.
+
+        uint16_t pulse_count = 0; // Ranges from 0-31 (half a buffer)
+        uint32_t byteval = 0;
+        uint32_t one_val = mOne.val;
+        uint32_t zero_val = mZero.val;
+        bool done_strip = false;
+
+        while (pulse_count < MAX_PULSES) {
+            if (! mPixels->has(1)) {
+<<<<<<< HEAD
+                if (mCurPulse > 0) {
+                    // -- Extend the last pulse to force the strip to latch. Honestly, I'm not
+                    //    sure if this is really necessary.
+                    // RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+                }
+=======
+>>>>>>> upstream/master
+                done_strip = true;
+                break;
+            }
+
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; ++j) {
+                uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
+                byteval <<= 1;
+                ++mCurPulse;
+                ++pulse_count;
+            }
+<<<<<<< HEAD
+=======
+
+	    if (done_strip)
+		RMTMEM.chan[mRMT_channel].data32[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+>>>>>>> upstream/master
+        }
+        
+        if (done_strip) {
+            // -- And fill the remaining items with zero pulses. The zero values triggers
+            //    the tx_done interrupt.
+            while (pulse_count < MAX_PULSES) {
+                RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
+                ++mCurPulse;
+                ++pulse_count;
+            }
+        }
+
+        // -- When we have filled the back half the buffer, reset the position to the first half
+        if (mCurPulse >= MAX_PULSES*2)
+            mCurPulse = 0;
+    }
+
+    virtual void writeAllRMTItems()
+    {
+        // -- Compute the pulse values for the whole strip at once.
+        //    Requires a large buffer
+<<<<<<< HEAD
+        mBufferSize = mPixels->size() * 3 * 8;
+=======
+	mBufferSize = mPixels->size() * 3 * 8;
+>>>>>>> upstream/master
+
+        // TODO: need a specific number here
+        if (mBuffer == NULL) {
+            mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
+        }
+
+        mCurPulse = 0;
+        mRGB_channel = 0;
+        uint32_t byteval = 0;
+        while (mPixels->has(1)) {
+            // -- Cycle through the R,G, and B values in the right order
+            switch (mRGB_channel) {
+            case 0:
+                byteval = mPixels->loadAndScale0();
+                mRGB_channel = 1;
+                break;
+            case 1:
+                byteval = mPixels->loadAndScale1();
+                mRGB_channel = 2;
+                break;
+            case 2:
+                byteval = mPixels->loadAndScale2();
+                mPixels->advanceData();
+                mPixels->stepDithering();
+                mRGB_channel = 0;
+                break;
+            default:
+                break;
+            }
+
+            byteval <<= 24;
+            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
+            // rmt_item32_t value corresponding to the buffered bit value
+            for (register uint32_t j = 0; j < 8; ++j) {
+                mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
+                byteval <<= 1;
+                ++mCurPulse;
+            }
+        }
+
+        mBuffer[mCurPulse-1].duration1 = RMT_RESET_DURATION;
+        assert(mCurPulse == mBufferSize);
+
+<<<<<<< HEAD
+        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+=======
+	rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
+>>>>>>> upstream/master
+    }
+};
+
+FASTLED_NAMESPACE_END
diff --git a/platforms/esp/32/clockless_i2s_esp32.h b/platforms/esp/32/clockless_i2s_esp32.h
index a82e43a6cf..d7af459d82 100644
--- a/platforms/esp/32/clockless_i2s_esp32.h
+++ b/platforms/esp/32/clockless_i2s_esp32.h
@@ -213,7 +213,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
         gControllers[gNumControllers] = this;
         int my_index = gNumControllers;
-        gNumControllers++;
+        ++gNumControllers;
         
         // -- Set up the pin We have to do two things: configure the
         //    actual GPIO pin, and route the output from the default
@@ -235,7 +235,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
    static int pgcd(int smallest,int precision,int a,int b,int c)
     {
         int pgc_=1;
-        for( int i=smallest;i>0;i--)
+        for( int i=smallest;i>0;--i)
         {
             
             if( a%i<=precision && b%i<=precision && c%i<=precision)
@@ -293,7 +293,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         //Serial.printf("%f\n",I2S_MAX_CLK/(1000000000L*freq));
         while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_PULSE_PER_BIT) //while(pgc_==1 ||  (T1/pgc_ +T2/pgc_ +T3/pgc_)>I2S_MAX_CLK/(1000000000L*freq))
         {
-            precision++;
+            ++precision;
             pgc_=pgcd(smallest,precision,T1,T2,T3);
             //Serial.printf("%d %d\n",pgc_,(a+b+c)/pgc_);
         }
@@ -327,9 +327,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         int b=0;
         CLOCK_DIVIDER_A=1;
         CLOCK_DIVIDER_B=0;
-        for(a=1;a<64;a++)
+        for(a=1;a<64;++a)
         {
-            for(b=0;b<a;b++)
+            for(b=0;b<a;++b)
             {
                 //printf("%d %d %f %f %f\n",b,a,v,(double)v*(double)a,fabsf(v-(double)b/a));
                 if(fabsf(v-(double)b/a) <= prec/2)
@@ -356,7 +356,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         {
             CLOCK_DIVIDER_A=1;
             CLOCK_DIVIDER_B=0;
-            CLOCK_DIVIDER_N++;
+            ++CLOCK_DIVIDER_N;
         }
         
         //printf("%d %d %f %f %d\n",CLOCK_DIVIDER_B,CLOCK_DIVIDER_A,(double)CLOCK_DIVIDER_B/CLOCK_DIVIDER_A,v,CLOCK_DIVIDER_N);
@@ -382,11 +382,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         int i = 0;
         while ( i < ones_for_one ) {
             gOneBit[i] = 0xFFFFFF00;
-            i++;
+            ++i;
         }
         while ( i < gPulsesPerBit ) {
             gOneBit[i] = 0x00000000;
-            i++;
+            ++i;
         }
         
         //int ones_for_zero = ((T1ns - 1)/FASTLED_I2S_NS_PER_PULSE) + 1;
@@ -399,11 +399,11 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         i = 0;
         while ( i < ones_for_zero ) {
             gZeroBit[i] = 0xFFFFFF00;
-            i++;
+            ++i;
         }
         while ( i < gPulsesPerBit ) {
             gZeroBit[i] = 0x00000000;
-            i++;
+            ++i;
         }
         
         memset(gPixelRow, 0, NUM_COLOR_CHANNELS * 32);
@@ -531,13 +531,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
      */
     static void empty( uint32_t *buf)
     {
-        for(int i=0;i<8*NUM_COLOR_CHANNELS;i++)
+        for(int i=0;i<8*NUM_COLOR_CHANNELS;++i)
         {
             int offset=gPulsesPerBit*i;
-            for(int j=0;j<ones_for_zero;j++)
+            for(int j=0;j<ones_for_zero;++j)
                 buf[offset+j]=0xffffffff;
             
-            for(int j=ones_for_one;j<gPulsesPerBit;j++)
+            for(int j=ones_for_one;j<gPulsesPerBit;++j)
                 buf[offset+j]=0;
         }
     }
@@ -558,7 +558,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         (*mPixels) = pixels;
         
         // -- Keep track of the number of strips we've seen
-        gNumStarted++;
+        ++gNumStarted;
 
         // Serial.print("Show pixels ");
         // Serial.println(gNumStarted);
@@ -626,7 +626,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         // -- Get the requested pixel from each controller. Store the
         //    data for each color channel in a separate array.
         uint32_t has_data_mask = 0;
-        for (int i = 0; i < gNumControllers; i++) {
+        for (int i = 0; i < gNumControllers; ++i) {
             // -- Store the pixels in reverse controller order starting at index 23
             //    This causes the bits to come out in the right position after we
             //    transpose them.
@@ -652,24 +652,24 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         
         // -- Transpose and encode the pixel data for the DMA buffer
         // int buf_index = 0;
-        for (int channel = 0; channel < NUM_COLOR_CHANNELS; channel++) {
+        for (int channel = 0; channel < NUM_COLOR_CHANNELS; ++channel) {
             
             // -- Tranpose each array: all the bit 7's, then all the bit 6's, ...
             transpose32(gPixelRow[channel], gPixelBits[channel][0] );
             
             //Serial.print("Channel: "); Serial.print(channel); Serial.print(" ");
-            for (int bitnum = 0; bitnum < 8; bitnum++) {
+            for (int bitnum = 0; bitnum < 8; ++bitnum) {
                 uint8_t * row = (uint8_t *) (gPixelBits[channel][bitnum]);
                 uint32_t bit = (row[0] << 24) | (row[1] << 16) | (row[2] << 8) | row[3];
                 
-                /* SZG: More general, but too slow:
-                for (int pulse_num = 0; pulse_num < gPulsesPerBit; pulse_num++) {
-                    buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
-                }
-                */
+               /* SZG: More general, but too slow:
+                    for (int pulse_num = 0; pulse_num < gPulsesPerBit; ++pulse_num) {
+                        buf[buf_index++] = has_data_mask & ( (bit & gOneBit[pulse_num]) | (~bit & gZeroBit[pulse_num]) );
+                     }
+               */
 
                 // -- Only fill in the pulses that are different between the "0" and "1" encodings
-                for(int pulse_num = ones_for_zero; pulse_num < ones_for_one; pulse_num++) {
+                for(int pulse_num = ones_for_zero; pulse_num < ones_for_one; ++pulse_num) {
                     buf[bitnum*gPulsesPerBit+channel*8*gPulsesPerBit+pulse_num] = has_data_mask & bit;
                 }
             }
diff --git a/platforms/esp/32/clockless_rmt_esp32.h b/platforms/esp/32/clockless_rmt_esp32.h
index 1b83018f94..a53ff5f6a9 100644
--- a/platforms/esp/32/clockless_rmt_esp32.h
+++ b/platforms/esp/32/clockless_rmt_esp32.h
@@ -237,7 +237,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         mZero.duration1 = ESP_TO_RMT_CYCLES(T2+T3); // TO_RMT_CYCLES(T2 + T3);
 
         gControllers[gNumControllers] = this;
-        gNumControllers++;
+        ++gNumControllers;
 
         mPin = gpio_num_t(DATA_PIN);
     }
@@ -247,7 +247,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
 protected:
     void initRMT()
     {
-        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
+        for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; ++i) {
             gOnChannel[i] = NULL;
 
             // -- RMT configuration for transmission
@@ -323,7 +323,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         }
 
         // -- Keep track of the number of strips we've seen
-        gNumStarted++;
+        ++gNumStarted;
 
         // -- The last call to showPixels is the one responsible for doing
         //    all of the actual worl
@@ -334,14 +334,14 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             int channel = 0;
             while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
                 startNext(channel);
-                channel++;
+                ++channel;
             }
 
             // -- Make sure it's been at least 50ms since last show
             mWait.wait();
 
             // -- Start them all
-            for (int i = 0; i < channel; i++) {
+            for (int i = 0; i < channel; ++i) {
                 ClocklessController * pController = static_cast<ClocklessController*>(gControllers[i]);
                 rmt_tx_start(pController->mRMT_channel, true);
             }
@@ -404,10 +404,10 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
     {
         // -- Write one byte's worth of RMT pulses to the big buffer
         byteval <<= 24;
-        for (register uint32_t j = 0; j < 8; j++) {
+        for (register uint32_t j = 0; j < 8; ++j) {
             mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
             byteval <<= 1;
-            mCurPulse++;
+            ++mCurPulse;
         }
     }
 
@@ -419,7 +419,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         if (gNext < gNumControllers) {
             ClocklessController * pController = static_cast<ClocklessController*>(gControllers[gNext]);
             pController->startOnChannel(channel);
-            gNext++;
+            ++gNext;
         }
     }
 
@@ -475,7 +475,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         gpio_matrix_out(controller->mPin, 0x100, 0, 0);
 
         gOnChannel[channel] = NULL;
-        gNumDone++;
+        ++gNumDone;
 
         if (gNumDone == gNumControllers) {
             // -- If this is the last controller, signal that we are all done
@@ -508,7 +508,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
         uint32_t intr_st = RMT.int_st.val;
         uint8_t channel;
 
-        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
+        for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; ++channel) {
             int tx_done_bit = channel * 3;
             int tx_next_bit = channel + 24;
 
@@ -557,13 +557,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 24; j++) {
+            for (register uint32_t j = 0; j < 24; ++j) {
                 uint32_t val = (pixel & 0x80000000L) ? one_val : zero_val;
                 *pItem++ = val;
                 // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
 
                 pixel <<= 1;
-                curPulse++;
+                ++curPulse;
 
                 if (curPulse == MAX_PULSES) {
                     pItem = & (RMTMEM.chan[mRMT_channel].data32[0].val);
@@ -576,7 +576,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             mRMT_mem_ptr = pItem;
         } else {
             // -- No more data; signal to the RMT we are done
-            for (uint32_t j = 0; j < 8; j++) {
+            for (uint32_t j = 0; j < 8; ++j) {
                 * mRMT_mem_ptr++ = 0;
             }
         }   
@@ -605,7 +605,7 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             byte = 0;
         }
 
-        mCurColor++;
+        ++mCurColor;
         if (mCurColor == NUM_COLOR_CHANNELS) mCurColor = 0;
 
         return byte;
@@ -647,19 +647,19 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
                 byteval = 0;
             }
 
-            mCurColor++;
+            ++mCurColor;
             if (mCurColor == NUM_COLOR_CHANNELS) mCurColor = 0;
         
             // byteval = getNextByte();
             byteval <<= 24;
             // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
             // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 8; j++) {
+            for (register uint32_t j = 0; j < 8; ++j) {
                 uint32_t val = (byteval & 0x80000000L) ? one_val : zero_val;
                 * mRMT_mem_ptr++ = val;
                 // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
                 byteval <<= 1;
-                mCurPulse++;
+                ++mCurPulse;
             }
             pulses += 8;
         }
@@ -670,8 +670,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER>
             while (pulses < 32) {
                 * mRMT_mem_ptr++ = 0;
                 // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = 0;
-                mCurPulse++;
-                pulses++;
+                ++mCurPulse;
+                ++pulses;
             }
         }
         
diff --git a/platforms/esp/8266/clockless_block_esp8266.h b/platforms/esp/8266/clockless_block_esp8266.h
index d3b1cf9595..3eccbe1e81 100644
--- a/platforms/esp/8266/clockless_block_esp8266.h
+++ b/platforms/esp/8266/clockless_block_esp8266.h
@@ -34,7 +34,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		while(!showRGBInternal(pixels) && cnt--) {
       		os_intr_unlock();
 			#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-			_retry_cnt++;
+			++_retry_cnt;
 			#endif
 			delayMicroseconds(WAIT_TIME * 10);
 			os_intr_lock();
@@ -77,7 +77,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		register uint8_t d = pixels.template getd<PX>(pixels);
 		register uint8_t scale = pixels.template getscale<PX>(pixels);
 
-		for(register uint32_t i = 0; i < USED_LANES; i++) {
+		for(register uint32_t i = 0; i < USED_LANES; ++i) {
 			while((__clock_cycles() - last_mark) < (T1+T2+T3));
 			last_mark = __clock_cycles();
 			*FastPin<FIRST_PIN>::sport() = PIN_MASK;
@@ -92,7 +92,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 			b.bytes[i] = pixels.template loadAndScale<PX>(pixels,i,d,scale);
 		}
 
-		for(register uint32_t i = USED_LANES; i < 8; i++) {
+		for(register uint32_t i = USED_LANES; i < 8; ++i) {
 			while((__clock_cycles() - last_mark) < (T1+T2+T3));
 			last_mark = __clock_cycles();
 			*FastPin<FIRST_PIN>::sport() = PIN_MASK;
@@ -113,7 +113,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 		// Setup the pixel controller and load/scale the first byte
 		Lines b0;
 
-		for(int i = 0; i < USED_LANES; i++) {
+		for(int i = 0; i < USED_LANES; ++i) {
 			b0.bytes[i] = allpixels.loadAndScale0(i);
 		}
 		allpixels.preStepFirstByteDithering();
@@ -150,7 +150,7 @@ class InlineBlockClocklessController : public CPixelLEDController<RGB_ORDER, LAN
 
 		os_intr_unlock();
 		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-		_frame_cnt++;
+		++_frame_cnt;
 		#endif
 		return __clock_cycles() - _start;
 	}
diff --git a/platforms/esp/8266/clockless_esp8266.h b/platforms/esp/8266/clockless_esp8266.h
index 504b9f963a..131f24671b 100644
--- a/platforms/esp/8266/clockless_esp8266.h
+++ b/platforms/esp/8266/clockless_esp8266.h
@@ -9,9 +9,9 @@ extern uint32_t _retry_cnt;
 
 // Info on reading cycle counter from https://github.com/kbeckmann/nodemcu-firmware/blob/ws2812-dual/app/modules/ws2812.c
 __attribute__ ((always_inline)) inline static uint32_t __clock_cycles() {
-	uint32_t cyc;
-	__asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
-	return cyc;
+  uint32_t cyc;
+  __asm__ __volatile__ ("rsr %0,ccount":"=a" (cyc));
+  return cyc;
 }
 
 #define FASTLED_HAS_CLOCKLESS 1
@@ -24,7 +24,6 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	data_t mPinMask;
 	data_ptr_t mPort;
 	CMinWait<WAIT_TIME> mWait;
-
 public:
 	virtual void init() {
 		FastPin<DATA_PIN>::setOutput();
@@ -35,36 +34,37 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 	virtual uint16_t getMaxRefreshRate() const { return 400; }
 
 protected:
+
 	virtual void showPixels(PixelController<RGB_ORDER> & pixels) {
-	// mWait.wait();
-	int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
-	while((showRGBInternal(pixels)==0) && cnt--) {
-		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-		_retry_cnt++;
-		#endif
-		os_intr_unlock();
-		delayMicroseconds(WAIT_TIME);
-		os_intr_lock();
-	}
-	// mWait.mark();
+    // mWait.wait();
+		int cnt = FASTLED_INTERRUPT_RETRY_COUNT;
+    while((showRGBInternal(pixels)==0) && cnt--) {
+      #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+      ++_retry_cnt;
+      #endif
+      os_intr_unlock();
+      delayMicroseconds(WAIT_TIME);
+      os_intr_lock();
+    }
+    // mWait.mark();
   }
 
 #define _ESP_ADJ (0)
 #define _ESP_ADJ2 (0)
 
 	template<int BITS> __attribute__ ((always_inline)) inline static void writeBits(register uint32_t & last_mark, register uint32_t b)  {
-		b <<= 24; b = ~b;
-		for(register uint32_t i = BITS; i > 0; i--) {
-			while((__clock_cycles() - last_mark) < (T1+T2+T3));
+    b <<= 24; b = ~b;
+    for(register uint32_t i = BITS; i > 0; --i) {
+      while((__clock_cycles() - last_mark) < (T1+T2+T3));
 			last_mark = __clock_cycles();
-			FastPin<DATA_PIN>::hi();
+      FastPin<DATA_PIN>::hi();
 
-			while((__clock_cycles() - last_mark) < T1);
-			if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
-			b <<= 1;
+      while((__clock_cycles() - last_mark) < T1);
+      if(b & 0x80000000L) { FastPin<DATA_PIN>::lo(); }
+      b <<= 1;
 
-			while((__clock_cycles() - last_mark) < (T1+T2));
-			FastPin<DATA_PIN>::lo();
+      while((__clock_cycles() - last_mark) < (T1+T2));
+      FastPin<DATA_PIN>::lo();
 		}
 	}
 
@@ -74,9 +74,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		// Setup the pixel controller and load/scale the first byte
 		pixels.preStepFirstByteDithering();
 		register uint32_t b = pixels.loadAndScale0();
-    	pixels.preStepFirstByteDithering();
+    pixels.preStepFirstByteDithering();
 		os_intr_lock();
-    	uint32_t start = __clock_cycles();
+    uint32_t start = __clock_cycles();
 		uint32_t last_mark = start;
 		while(pixels.has(1)) {
 			// Write first byte, read next byte
@@ -89,13 +89,13 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 
 			// Write third byte, read 1st byte of next pixel
 			writeBits<8+XTRA0>(last_mark, b);
-      		b = pixels.advanceAndLoadAndScale0();
+      b = pixels.advanceAndLoadAndScale0();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			os_intr_unlock();
 			#endif
 
-      		pixels.stepDithering();
+      pixels.stepDithering();
 
 			#if (FASTLED_ALLOW_INTERRUPTS == 1)
 			os_intr_lock();
@@ -107,9 +107,9 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		};
 
 		os_intr_unlock();
-		#ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
-		_frame_cnt++;
-		#endif
+    #ifdef FASTLED_DEBUG_COUNT_FRAME_RETRIES
+    ++_frame_cnt;
+    #endif
 		return __clock_cycles() - start;
 	}
 };
diff --git a/power_mgt.cpp b/power_mgt.cpp
index 33b4cece1c..e15fa709b5 100644
--- a/power_mgt.cpp
+++ b/power_mgt.cpp
@@ -60,7 +60,7 @@ uint32_t calculate_unscaled_power_mW( const CRGB* ledbuffer, uint16_t numLeds )
         red32   += *p++;
         green32 += *p++;
         blue32  += *p++;
-        count--;
+        --count;
     }
 
     red32   *= gRed_mW;
diff --git a/wiring.cpp b/wiring.cpp
index e366c64c09..744373a172 100644
--- a/wiring.cpp
+++ b/wiring.cpp
@@ -17,16 +17,16 @@ volatile unsigned long FastLED_timer0_overflow_count=0;
 volatile unsigned long FastLED_timer0_millis = 0;
 
 LIB8STATIC void  __attribute__((always_inline)) fastinc32 (volatile uint32_t & _long) {
-    uint8_t b = ++((tBytesForLong&)_long).raw[0];
+  uint8_t b = ++((tBytesForLong&)_long).raw[0];
+  if(!b) {
+    b = ++((tBytesForLong&)_long).raw[1];
     if(!b) {
-        b = ++((tBytesForLong&)_long).raw[1];
-        if(!b) {
-            b = ++((tBytesForLong&)_long).raw[2];
-            if(!b) {
-                ++((tBytesForLong&)_long).raw[3];
-            }
-        }
+      b = ++((tBytesForLong&)_long).raw[2];
+      if(!b) {
+        ++((tBytesForLong&)_long).raw[3];
+      }
     }
+  }
 }
 
 #if defined(__AVR_ATtiny24__) || defined(__AVR_ATtiny44__) || defined(__AVR_ATtiny84__)
@@ -35,200 +35,200 @@ ISR(TIM0_OVF_vect)
 ISR(TIMER0_OVF_vect)
 #endif
 {
-    fastinc32(FastLED_timer0_overflow_count);
-    // FastLED_timer0_overflow_count++;
+  fastinc32(FastLED_timer0_overflow_count);
+  // FastLED_timer0_overflow_count++;
 }
 
 // there are 1024 microseconds per overflow counter tick.
 unsigned long millis()
 {
-    unsigned long m;
-    uint8_t oldSREG = SREG;
+        unsigned long m;
+        uint8_t oldSREG = SREG;
 
-    // disable interrupts while we read FastLED_timer0_millis or we might get an
-    // inconsistent value (e.g. in the middle of a write to FastLED_timer0_millis)
-    cli();
-    m = FastLED_timer0_overflow_count;  //._long;
-    SREG = oldSREG;
+        // disable interrupts while we read FastLED_timer0_millis or we might get an
+        // inconsistent value (e.g. in the middle of a write to FastLED_timer0_millis)
+        cli();
+        m = FastLED_timer0_overflow_count;  //._long;
+        SREG = oldSREG;
 
-    return (m*(MICROSECONDS_PER_TIMER0_OVERFLOW/8))/(1000/8);
+        return (m*(MICROSECONDS_PER_TIMER0_OVERFLOW/8))/(1000/8);
 }
 
 unsigned long micros() {
-    unsigned long m;
-    uint8_t oldSREG = SREG, t;
+        unsigned long m;
+        uint8_t oldSREG = SREG, t;
 
-    cli();
-    m = FastLED_timer0_overflow_count; // ._long;
+        cli();
+        m = FastLED_timer0_overflow_count; // ._long;
 #if defined(TCNT0)
-    t = TCNT0;
+        t = TCNT0;
 #elif defined(TCNT0L)
-    t = TCNT0L;
+        t = TCNT0L;
 #else
-#error TIMER 0 not defined
+        #error TIMER 0 not defined
 #endif
 
 
 #ifdef TIFR0
-    if ((TIFR0 & _BV(TOV0)) && (t < 255))
-        m++;
+        if ((TIFR0 & _BV(TOV0)) && (t < 255))
+                ++m;
 #else
-    if ((TIFR & _BV(TOV0)) && (t < 255))
-        m++;
+        if ((TIFR & _BV(TOV0)) && (t < 255))
+                ++m;
 #endif
 
-    SREG = oldSREG;
+        SREG = oldSREG;
 
-    return ((m << 8) + t) * (64 / clockCyclesPerMicrosecond());
+        return ((m << 8) + t) * (64 / clockCyclesPerMicrosecond());
 }
 
 void delay(unsigned long ms)
 {
-    uint16_t start = (uint16_t)micros();
+        uint16_t start = (uint16_t)micros();
 
-    while (ms > 0) {
-        if (((uint16_t)micros() - start) >= 1000) {
-            ms--;
-            start += 1000;
+        while (ms > 0) {
+                if (((uint16_t)micros() - start) >= 1000) {
+                        --ms;
+                        start += 1000;
+                }
         }
-    }
 }
 
 #define sbi(sfr, bit) (_SFR_BYTE(sfr) |= _BV(bit))
 void init()
 {
-    // this needs to be called before setup() or some functions won't
-    // work there
-    sei();
+  // this needs to be called before setup() or some functions won't
+  // work there
+  sei();
 
-    // on the ATmega168, timer 0 is also used for fast hardware pwm
-    // (using phase-correct PWM would mean that timer 0 overflowed half as often
-    // resulting in different millis() behavior on the ATmega8 and ATmega168)
+  // on the ATmega168, timer 0 is also used for fast hardware pwm
+  // (using phase-correct PWM would mean that timer 0 overflowed half as often
+  // resulting in different millis() behavior on the ATmega8 and ATmega168)
 #if defined(TCCR0A) && defined(WGM01)
-    sbi(TCCR0A, WGM01);
-    sbi(TCCR0A, WGM00);
+  sbi(TCCR0A, WGM01);
+  sbi(TCCR0A, WGM00);
 #endif
 
-    // set timer 0 prescale factor to 64
+  // set timer 0 prescale factor to 64
 #if defined(__AVR_ATmega128__)
-    // CPU specific: different values for the ATmega128
-    sbi(TCCR0, CS02);
+  // CPU specific: different values for the ATmega128
+  sbi(TCCR0, CS02);
 #elif defined(TCCR0) && defined(CS01) && defined(CS00)
-    // this combination is for the standard atmega8
-    sbi(TCCR0, CS01);
-    sbi(TCCR0, CS00);
+  // this combination is for the standard atmega8
+  sbi(TCCR0, CS01);
+  sbi(TCCR0, CS00);
 #elif defined(TCCR0B) && defined(CS01) && defined(CS00)
-    // this combination is for the standard 168/328/1280/2560
-    sbi(TCCR0B, CS01);
-    sbi(TCCR0B, CS00);
+  // this combination is for the standard 168/328/1280/2560
+  sbi(TCCR0B, CS01);
+  sbi(TCCR0B, CS00);
 #elif defined(TCCR0A) && defined(CS01) && defined(CS00)
-    // this combination is for the __AVR_ATmega645__ series
-    sbi(TCCR0A, CS01);
-    sbi(TCCR0A, CS00);
+  // this combination is for the __AVR_ATmega645__ series
+  sbi(TCCR0A, CS01);
+  sbi(TCCR0A, CS00);
 #else
-#error Timer 0 prescale factor 64 not set correctly
+  #error Timer 0 prescale factor 64 not set correctly
 #endif
 
-    // enable timer 0 overflow interrupt
+  // enable timer 0 overflow interrupt
 #if defined(TIMSK) && defined(TOIE0)
-    sbi(TIMSK, TOIE0);
+  sbi(TIMSK, TOIE0);
 #elif defined(TIMSK0) && defined(TOIE0)
-    sbi(TIMSK0, TOIE0);
+  sbi(TIMSK0, TOIE0);
 #else
-#error	Timer 0 overflow interrupt not set correctly
+  #error	Timer 0 overflow interrupt not set correctly
 #endif
 
-    // timers 1 and 2 are used for phase-correct hardware pwm
-    // this is better for motors as it ensures an even waveform
-    // note, however, that fast pwm mode can achieve a frequency of up
-    // 8 MHz (with a 16 MHz clock) at 50% duty cycle
+  // timers 1 and 2 are used for phase-correct hardware pwm
+  // this is better for motors as it ensures an even waveform
+  // note, however, that fast pwm mode can achieve a frequency of up
+  // 8 MHz (with a 16 MHz clock) at 50% duty cycle
 
 #if defined(TCCR1B) && defined(CS11) && defined(CS10)
-    TCCR1B = 0;
+  TCCR1B = 0;
 
-    // set timer 1 prescale factor to 64
-    sbi(TCCR1B, CS11);
+  // set timer 1 prescale factor to 64
+  sbi(TCCR1B, CS11);
 #if F_CPU >= 8000000L
-    sbi(TCCR1B, CS10);
+  sbi(TCCR1B, CS10);
 #endif
 #elif defined(TCCR1) && defined(CS11) && defined(CS10)
-    sbi(TCCR1, CS11);
+  sbi(TCCR1, CS11);
 #if F_CPU >= 8000000L
-    sbi(TCCR1, CS10);
+  sbi(TCCR1, CS10);
 #endif
 #endif
-    // put timer 1 in 8-bit phase correct pwm mode
+  // put timer 1 in 8-bit phase correct pwm mode
 #if defined(TCCR1A) && defined(WGM10)
-    sbi(TCCR1A, WGM10);
+  sbi(TCCR1A, WGM10);
 #elif defined(TCCR1)
-#warning this needs to be finished
+  #warning this needs to be finished
 #endif
 
   // set timer 2 prescale factor to 64
 #if defined(TCCR2) && defined(CS22)
-    sbi(TCCR2, CS22);
+  sbi(TCCR2, CS22);
 #elif defined(TCCR2B) && defined(CS22)
-    sbi(TCCR2B, CS22);
+  sbi(TCCR2B, CS22);
 #else
-#warning Timer 2 not finished (may not be present on this CPU)
+  #warning Timer 2 not finished (may not be present on this CPU)
 #endif
 
   // configure timer 2 for phase correct pwm (8-bit)
 #if defined(TCCR2) && defined(WGM20)
-    sbi(TCCR2, WGM20);
+  sbi(TCCR2, WGM20);
 #elif defined(TCCR2A) && defined(WGM20)
-    sbi(TCCR2A, WGM20);
+  sbi(TCCR2A, WGM20);
 #else
-#warning Timer 2 not finished (may not be present on this CPU)
+  #warning Timer 2 not finished (may not be present on this CPU)
 #endif
 
 #if defined(TCCR3B) && defined(CS31) && defined(WGM30)
-    sbi(TCCR3B, CS31);		// set timer 3 prescale factor to 64
-    sbi(TCCR3B, CS30);
-    sbi(TCCR3A, WGM30);		// put timer 3 in 8-bit phase correct pwm mode
+  sbi(TCCR3B, CS31);		// set timer 3 prescale factor to 64
+  sbi(TCCR3B, CS30);
+  sbi(TCCR3A, WGM30);		// put timer 3 in 8-bit phase correct pwm mode
 #endif
 
 #if defined(TCCR4A) && defined(TCCR4B) && defined(TCCR4D) /* beginning of timer4 block for 32U4 and similar */
-    sbi(TCCR4B, CS42);		// set timer4 prescale factor to 64
-    sbi(TCCR4B, CS41);
-    sbi(TCCR4B, CS40);
-    sbi(TCCR4D, WGM40);		// put timer 4 in phase- and frequency-correct PWM mode
-    sbi(TCCR4A, PWM4A);		// enable PWM mode for comparator OCR4A
-    sbi(TCCR4C, PWM4D);		// enable PWM mode for comparator OCR4D
+  sbi(TCCR4B, CS42);		// set timer4 prescale factor to 64
+  sbi(TCCR4B, CS41);
+  sbi(TCCR4B, CS40);
+  sbi(TCCR4D, WGM40);		// put timer 4 in phase- and frequency-correct PWM mode
+  sbi(TCCR4A, PWM4A);		// enable PWM mode for comparator OCR4A
+  sbi(TCCR4C, PWM4D);		// enable PWM mode for comparator OCR4D
 #else /* beginning of timer4 block for ATMEGA1280 and ATMEGA2560 */
 #if defined(TCCR4B) && defined(CS41) && defined(WGM40)
-    sbi(TCCR4B, CS41);		// set timer 4 prescale factor to 64
-    sbi(TCCR4B, CS40);
-    sbi(TCCR4A, WGM40);		// put timer 4 in 8-bit phase correct pwm mode
+  sbi(TCCR4B, CS41);		// set timer 4 prescale factor to 64
+  sbi(TCCR4B, CS40);
+  sbi(TCCR4A, WGM40);		// put timer 4 in 8-bit phase correct pwm mode
 #endif
 #endif /* end timer4 block for ATMEGA1280/2560 and similar */
 
 #if defined(TCCR5B) && defined(CS51) && defined(WGM50)
-    sbi(TCCR5B, CS51);		// set timer 5 prescale factor to 64
-    sbi(TCCR5B, CS50);
-    sbi(TCCR5A, WGM50);		// put timer 5 in 8-bit phase correct pwm mode
+  sbi(TCCR5B, CS51);		// set timer 5 prescale factor to 64
+  sbi(TCCR5B, CS50);
+  sbi(TCCR5A, WGM50);		// put timer 5 in 8-bit phase correct pwm mode
 #endif
 
 #if defined(ADCSRA)
-    // set a2d prescale factor to 128
-    // 16 MHz / 128 = 125 KHz, inside the desired 50-200 KHz range.
-    // XXX: this will not work properly for other clock speeds, and
-    // this code should use F_CPU to determine the prescale factor.
-    sbi(ADCSRA, ADPS2);
-    sbi(ADCSRA, ADPS1);
-    sbi(ADCSRA, ADPS0);
+  // set a2d prescale factor to 128
+  // 16 MHz / 128 = 125 KHz, inside the desired 50-200 KHz range.
+  // XXX: this will not work properly for other clock speeds, and
+  // this code should use F_CPU to determine the prescale factor.
+  sbi(ADCSRA, ADPS2);
+  sbi(ADCSRA, ADPS1);
+  sbi(ADCSRA, ADPS0);
 
-    // enable a2d conversions
-    sbi(ADCSRA, ADEN);
+  // enable a2d conversions
+  sbi(ADCSRA, ADEN);
 #endif
 
-    // the bootloader connects pins 0 and 1 to the USART; disconnect them
-    // here so they can be used as normal digital i/o; they will be
-    // reconnected in Serial.begin()
+  // the bootloader connects pins 0 and 1 to the USART; disconnect them
+  // here so they can be used as normal digital i/o; they will be
+  // reconnected in Serial.begin()
 #if defined(UCSRB)
-    UCSRB = 0;
+  UCSRB = 0;
 #elif defined(UCSR0B)
-    UCSR0B = 0;
+  UCSR0B = 0;
 #endif
 }
 };

From fccbed9fdf63e31d90d96f4794fb7457b95cccc6 Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Fri, 17 Apr 2020 22:20:18 +0200
Subject: [PATCH 195/204] format double ';' to single

---
 colorutils.h                          | 8 ++++----
 platforms/arm/d51/clockless_arm_d51.h | 4 ++--
 platforms/arm/k20/clockless_arm_k20.h | 4 ++--
 platforms/arm/k66/clockless_arm_k66.h | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/colorutils.h b/colorutils.h
index 65231b89c6..f09d525fe7 100644
--- a/colorutils.h
+++ b/colorutils.h
@@ -828,7 +828,7 @@ class CRGBPalette16 {
         uint16_t count = 0;
         do {
             u.dword = FL_PGM_READ_DWORD_NEAR(progent + count);
-            ++count;;
+            ++count;
         } while ( u.index != 255);
 
         int8_t lastSlotUsed = -1;
@@ -870,7 +870,7 @@ class CRGBPalette16 {
         uint16_t count = 0;
         do {
             u = *(ent + count);
-            ++count;;
+            ++count;
         } while ( u.index != 255);
 
         int8_t lastSlotUsed = -1;
@@ -1225,7 +1225,7 @@ class CRGBPalette32 {
         uint16_t count = 0;
         do {
             u.dword = FL_PGM_READ_DWORD_NEAR(progent + count);
-            ++count;;
+            ++count;
         } while ( u.index != 255);
         
         int8_t lastSlotUsed = -1;
@@ -1267,7 +1267,7 @@ class CRGBPalette32 {
         uint16_t count = 0;
         do {
             u = *(ent + count);
-            ++count;;
+            ++count;
         } while ( u.index != 255);
         
         int8_t lastSlotUsed = -1;
diff --git a/platforms/arm/d51/clockless_arm_d51.h b/platforms/arm/d51/clockless_arm_d51.h
index 7bb48062b8..2bf00d2728 100644
--- a/platforms/arm/d51/clockless_arm_d51.h
+++ b/platforms/arm/d51/clockless_arm_d51.h
@@ -79,8 +79,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		ARM_DWT_CYCCNT = 0;
 
 		register data_ptr_t port = FastPin<DATA_PIN>::port();
-		register data_t hi = *port | FastPin<DATA_PIN>::mask();;
-		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+		register data_t hi = *port | FastPin<DATA_PIN>::mask();
+		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();
 		*port = lo;
 
 		// Setup the pixel controller and load/scale the first byte
diff --git a/platforms/arm/k20/clockless_arm_k20.h b/platforms/arm/k20/clockless_arm_k20.h
index c38b5c2933..0a7f7b94b7 100644
--- a/platforms/arm/k20/clockless_arm_k20.h
+++ b/platforms/arm/k20/clockless_arm_k20.h
@@ -74,8 +74,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		ARM_DWT_CYCCNT = 0;
 
 		register data_ptr_t port = FastPin<DATA_PIN>::port();
-		register data_t hi = *port | FastPin<DATA_PIN>::mask();;
-		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+		register data_t hi = *port | FastPin<DATA_PIN>::mask();
+		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();
 		*port = lo;
 
 		// Setup the pixel controller and load/scale the first byte
diff --git a/platforms/arm/k66/clockless_arm_k66.h b/platforms/arm/k66/clockless_arm_k66.h
index df1b5007ee..e9dcc0cd39 100644
--- a/platforms/arm/k66/clockless_arm_k66.h
+++ b/platforms/arm/k66/clockless_arm_k66.h
@@ -74,8 +74,8 @@ class ClocklessController : public CPixelLEDController<RGB_ORDER> {
 		ARM_DWT_CYCCNT = 0;
 
 		register data_ptr_t port = FastPin<DATA_PIN>::port();
-		register data_t hi = *port | FastPin<DATA_PIN>::mask();;
-		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();;
+		register data_t hi = *port | FastPin<DATA_PIN>::mask();
+		register data_t lo = *port & ~FastPin<DATA_PIN>::mask();
 		*port = lo;
 
 		// Setup the pixel controller and load/scale the first byte

From b2b242aba88b033343bfe2e3418fd84a7937851e Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 2 Nov 2020 11:40:06 -0500
Subject: [PATCH 196/204] A few small tweaks

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 31188474d7..a00f5681fb 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -160,6 +160,9 @@ void ESP32RMTController::showPixels()
         // -- This Take always succeeds immediately
         xSemaphoreTake(gTX_sem, portMAX_DELAY);
 
+        // -- Make sure it's been at least 50us since last show
+        gWait.wait();
+
         // -- First, fill all the available channels
         int channel = 0;
         while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
@@ -169,9 +172,6 @@ void ESP32RMTController::showPixels()
             channel += FASTLED_RMT_MEM_BLOCKS;
         }
 
-        // -- Make sure it's been at least 50us since last show
-        gWait.wait();
-
         // -- Wait here while the data is sent. The interrupt handler
         //    will keep refilling the RMT buffers until it is all
         //    done; then it gives the semaphore back.
@@ -234,6 +234,7 @@ void ESP32RMTController::startOnChannel(int channel)
         mRMT_mem_ptr = mRMT_mem_start;
         mCur = 0;
         mWhichHalf = 0;
+        mLastFill = 0;
 
         // -- Fill both halves of the RMT buffer (a totaly of 64 bits of pixel data)
         fillNext(false);
@@ -267,7 +268,10 @@ void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
 
     // -- Turn off output on the pin
     // SZG: Do I really need to do this?
-    // gpio_matrix_out(pController->mPin, 0x100, 0, 0);
+    gpio_matrix_out(pController->mPin, 0x100, 0, 0);
+
+    // -- Turn off the interrupts
+    rmt_set_tx_intr_en(channel, false);
 
     gOnChannel[channel] = NULL;
     gNumDone++;
@@ -310,8 +314,8 @@ void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
         if (pController != NULL) {
             if (intr_st & BIT(tx_next_bit)) {
                 // -- More to send on this channel
-                RMT.int_clr.val |= BIT(tx_next_bit);
                 pController->fillNext(true);
+                RMT.int_clr.val |= BIT(tx_next_bit);
             } else {
                 // -- Transmission is complete on this channel
                 if (intr_st & BIT(tx_done_bit)) {
@@ -331,7 +335,7 @@ void IRAM_ATTR ESP32RMTController::fillNext(bool check_time)
 {
     uint32_t now = __clock_cycles();
     if (check_time) {
-        if (now > mLastFill) {
+        if (mLastFill != 0 and now > mLastFill) {
             uint32_t delta = (now - mLastFill);
             if (delta > mMaxCyclesPerFill) {
                 Serial.print(delta);

From ece280877b8ea6ce279e7e71d39a9461d71c616e Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Mon, 2 Nov 2020 13:33:14 -0500
Subject: [PATCH 197/204] Small compile error

---
 src/platforms/esp/32/clockless_rmt_esp32.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp b/src/platforms/esp/32/clockless_rmt_esp32.cpp
index 3a25468341..987f1bfc66 100644
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp
+++ b/src/platforms/esp/32/clockless_rmt_esp32.cpp
@@ -397,8 +397,7 @@ void ESP32RMTController::initPulseBuffer(int size_in_bytes)
 {
     if (mBuffer == 0) {
         // -- Each byte has 8 bits, each bit needs a 32-bit RMT item
-        mBufferSize size = size_in_bytes * 8 * 4;
-
+        mBufferSize = size_in_bytes * 8 * 4;
         mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
     }
     mCurPulse = 0;

From 57735647939bdacf2020374157ab670836d70353 Mon Sep 17 00:00:00 2001
From: Sam Guyer <sam.guyer@gmail.com>
Date: Thu, 12 Nov 2020 21:16:34 -0500
Subject: [PATCH 198/204] Removed a file that shouldn't be there

---
 .../esp/32/clockless_rmt_esp32.cpp-old        | 419 ------------------
 1 file changed, 419 deletions(-)
 delete mode 100644 src/platforms/esp/32/clockless_rmt_esp32.cpp-old

diff --git a/src/platforms/esp/32/clockless_rmt_esp32.cpp-old b/src/platforms/esp/32/clockless_rmt_esp32.cpp-old
deleted file mode 100644
index 267a26b2d4..0000000000
--- a/src/platforms/esp/32/clockless_rmt_esp32.cpp-old
+++ /dev/null
@@ -1,419 +0,0 @@
-
-#ifdef ESP32
-
-#define FASTLED_INTERNAL
-#include "FastLED.h"
-
-// -- Forward reference
-class ESP32RMTController;
-
-// -- Array of all controllers
-//    This array is filled at the time controllers are registered 
-//    (Usually when the sketch calls addLeds)
-static ESP32RMTController * gControllers[FASTLED_RMT_MAX_CONTROLLERS];
-
-// -- Current set of active controllers, indexed by the RMT
-//    channel assigned to them.
-static ESP32RMTController * gOnChannel[FASTLED_RMT_MAX_CHANNELS];
-
-static int gNumControllers = 0;
-static int gNumStarted = 0;
-static int gNumDone = 0;
-static int gNext = 0;
-
-static intr_handle_t gRMT_intr_handle = NULL;
-
-// -- Global semaphore for the whole show process
-//    Semaphore is not given until all data has been sent
-static xSemaphoreHandle gTX_sem = NULL;
-
-// -- Make sure we can't call show() too quickly
-CMinWait<50>   gWait;
-
-static bool gInitialized = false;
-
-ESP32RMTController::ESP32RMTController(int DATA_PIN, int T1, int T2, int T3)
-    : mPixelData(0), 
-      mSize(0), 
-      mCur(0), 
-      mWhichHalf(0),
-      mBuffer(0),
-      mBufferSize(0),
-      mCurPulse(0)
-{
-    // -- Precompute rmt items corresponding to a zero bit and a one bit
-    //    according to the timing values given in the template instantiation
-    // T1H
-    mOne.level0 = 1;
-    mOne.duration0 = ESP_TO_RMT_CYCLES(T1+T2); // TO_RMT_CYCLES(T1+T2);
-    // T1L
-    mOne.level1 = 0;
-    mOne.duration1 = ESP_TO_RMT_CYCLES(T3); // TO_RMT_CYCLES(T3);
-
-    // T0H
-    mZero.level0 = 1;
-    mZero.duration0 = ESP_TO_RMT_CYCLES(T1); // TO_RMT_CYCLES(T1);
-    // T0L
-    mZero.level1 = 0;
-    mZero.duration1 = ESP_TO_RMT_CYCLES(T2+T3); // TO_RMT_CYCLES(T2 + T3);
-
-    gControllers[gNumControllers] = this;
-    gNumControllers++;
-
-    // -- Expected number of CPU cycles between buffer fills
-    mCyclesPerFill = (T1 + T2 + T3) * PULSES_PER_FILL;
-
-    // -- If there is ever an interval greater than 1.5 times
-    //    the expected time, then bail out.
-    mMaxCyclesPerFill = mCyclesPerFill + mCyclesPerFill/2;
-
-    mPin = gpio_num_t(DATA_PIN);
-}
-
-// -- Get or create the buffer for the pixel data
-//    We can't allocate it ahead of time because we don't have
-//    the PixelController object until show is called.
-uint32_t * ESP32RMTController::getPixelBuffer(int size_in_bytes)
-{
-    if (mPixelData == 0) {
-        mSize = ((size_in_bytes-1) / sizeof(uint32_t)) + 1;
-        mPixelData = (uint32_t *) calloc( mSize, sizeof(uint32_t));
-    }
-    return mPixelData;
-}
-
-// -- Initialize RMT subsystem
-//    This only needs to be done once
-void ESP32RMTController::init()
-{
-    if (gInitialized) return;
-
-    for (int i = 0; i < FASTLED_RMT_MAX_CHANNELS; i++) {
-        gOnChannel[i] = NULL;
-
-        // -- RMT configuration for transmission
-        rmt_config_t rmt_tx;
-        rmt_tx.channel = rmt_channel_t(i);
-        rmt_tx.rmt_mode = RMT_MODE_TX;
-        rmt_tx.gpio_num = gpio_num_t(0);  // The particular pin will be assigned later
-        rmt_tx.mem_block_num = FASTLED_RMT_MEM_BLOCKS;
-        rmt_tx.clk_div = DIVIDER;
-        rmt_tx.tx_config.loop_en = false;
-        rmt_tx.tx_config.carrier_level = RMT_CARRIER_LEVEL_LOW;
-        rmt_tx.tx_config.carrier_en = false;
-        rmt_tx.tx_config.idle_level = RMT_IDLE_LEVEL_LOW;
-        rmt_tx.tx_config.idle_output_en = true;
-
-        // -- Apply the configuration
-        rmt_config(&rmt_tx);
-
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            rmt_driver_install(rmt_channel_t(i), 0, 0);
-        } else {
-            // -- Set up the RMT to send 32 bits of the pulse buffer and then
-            //    generate an interrupt. When we get this interrupt we
-            //    fill the other part in preparation (like double-buffering)
-            rmt_set_tx_thr_intr_en(rmt_channel_t(i), true, PULSES_PER_FILL);
-        }
-    }
-
-    // -- Create a semaphore to block execution until all the controllers are done
-    if (gTX_sem == NULL) {
-        gTX_sem = xSemaphoreCreateBinary();
-        xSemaphoreGive(gTX_sem);
-    }
-                
-    if ( ! FASTLED_RMT_BUILTIN_DRIVER) {
-        // -- Allocate the interrupt if we have not done so yet. This
-        //    interrupt handler must work for all different kinds of
-        //    strips, so it delegates to the refill function for each
-        //    specific instantiation of ClocklessController.
-        if (gRMT_intr_handle == NULL)
-            esp_intr_alloc(ETS_RMT_INTR_SOURCE, ESP_INTR_FLAG_IRAM | ESP_INTR_FLAG_LEVEL3, interruptHandler, 0, &gRMT_intr_handle);
-    }
-
-    gInitialized = true;
-}
-
-// -- Show this string of pixels
-//    This is the main entry point for the pixel controller
-void ESP32RMTController::showPixels()
-{
-    if (gNumStarted == 0) {
-        // -- First controller: make sure everything is set up
-        ESP32RMTController::init();
-
-#if FASTLED_ESP32_FLASH_LOCK == 1
-        // -- Make sure no flash operations happen right now
-        spi_flash_op_lock();
-#endif
-    }
-
-    // -- Keep track of the number of strips we've seen
-    gNumStarted++;
-
-    // -- The last call to showPixels is the one responsible for doing
-    //    all of the actual worl
-    if (gNumStarted == gNumControllers) {
-        gNext = 0;
-
-        // -- This Take always succeeds immediately
-        xSemaphoreTake(gTX_sem, portMAX_DELAY);
-
-        // -- Make sure it's been at least 50us since last show
-        gWait.wait();
-
-        // -- First, fill all the available channels
-        int channel = 0;
-        while (channel < FASTLED_RMT_MAX_CHANNELS && gNext < gNumControllers) {
-            ESP32RMTController::startNext(channel);
-            // -- Important: when we use more than one memory block, we need to
-            //    skip the channels that would otherwise overlap in memory.
-            channel += FASTLED_RMT_MEM_BLOCKS;
-        }
-
-        // -- Wait here while the data is sent. The interrupt handler
-        //    will keep refilling the RMT buffers until it is all
-        //    done; then it gives the semaphore back.
-        xSemaphoreTake(gTX_sem, portMAX_DELAY);
-        xSemaphoreGive(gTX_sem);
-
-        // -- Make sure we don't call showPixels too quickly
-        gWait.mark();
-
-        // -- Reset the counters
-        gNumStarted = 0;
-        gNumDone = 0;
-        gNext = 0;
-
-#if FASTLED_ESP32_FLASH_LOCK == 1
-        // -- Release the lock on flash operations
-        spi_flash_op_unlock();
-#endif
-
-    }
-}
-
-// -- Start up the next controller
-//    This method is static so that it can dispatch to the
-//    appropriate startOnChannel method of the given controller.
-void ESP32RMTController::startNext(int channel)
-{
-    if (gNext < gNumControllers) {
-        ESP32RMTController * pController = gControllers[gNext];
-        pController->startOnChannel(channel);
-        gNext++;
-    }
-}
-
-// -- Start this controller on the given channel
-//    This function just initiates the RMT write; it does not wait
-//    for it to finish.
-void ESP32RMTController::startOnChannel(int channel)
-{
-    // -- Assign this channel and configure the RMT
-    mRMT_channel = rmt_channel_t(channel);
-
-    // -- Store a reference to this controller, so we can get it
-    //    inside the interrupt handler
-    gOnChannel[channel] = this;
-
-    // -- Assign the pin to this channel
-    rmt_set_pin(mRMT_channel, RMT_MODE_TX, mPin);
-
-    if (FASTLED_RMT_BUILTIN_DRIVER) {
-        // -- Use the built-in RMT driver to send all the data in one shot
-        rmt_register_tx_end_callback(doneOnChannel, 0);
-        rmt_write_items(mRMT_channel, mBuffer, mBufferSize, false);
-    } else {
-        // -- Use our custom driver to send the data incrementally
-
-        // -- Initialize the counters that keep track of where we are in
-        //    the pixel data and the RMT buffer
-        mRMT_mem_start = & (RMTMEM.chan[mRMT_channel].data32[0].val);
-        mRMT_mem_ptr = mRMT_mem_start;
-        mCur = 0;
-        mWhichHalf = 0;
-        mLastFill = 0;
-
-        // -- Fill both halves of the RMT buffer (a totaly of 64 bits of pixel data)
-        fillNext(false);
-        fillNext(false);
-
-        // -- Turn on the interrupts
-        rmt_set_tx_intr_en(mRMT_channel, true);
-
-        // -- Kick off the transmission
-        tx_start();
-    }
-}
-
-// -- Start RMT transmission
-//    Setting this RMT flag is what actually kicks off the peripheral
-void ESP32RMTController::tx_start()
-{
-    rmt_tx_start(mRMT_channel, true);
-    mLastFill = __clock_cycles();
-}
-
-// -- A controller is done 
-//    This function is called when a controller finishes writing
-//    its data. It is called either by the custom interrupt
-//    handler (below), or as a callback from the built-in
-//    interrupt handler. It is static because we don't know which
-//    controller is done until we look it up.
-void ESP32RMTController::doneOnChannel(rmt_channel_t channel, void * arg)
-{
-    ESP32RMTController * pController = gOnChannel[channel];
-
-    // -- Turn off output on the pin
-    // SZG: Do I really need to do this?
-    gpio_matrix_out(pController->mPin, 0x100, 0, 0);
-
-    // -- Turn off the interrupts
-    rmt_set_tx_intr_en(channel, false);
-
-    gOnChannel[channel] = NULL;
-    gNumDone++;
-
-    if (gNumDone == gNumControllers) {
-        // -- If this is the last controller, signal that we are all done
-        if (FASTLED_RMT_BUILTIN_DRIVER) {
-            xSemaphoreGive(gTX_sem);
-        } else {
-            portBASE_TYPE HPTaskAwoken = 0;
-            xSemaphoreGiveFromISR(gTX_sem, &HPTaskAwoken);
-            if (HPTaskAwoken == pdTRUE) portYIELD_FROM_ISR();
-        }
-    } else {
-        // -- Otherwise, if there are still controllers waiting, then
-        //    start the next one on this channel
-        if (gNext < gNumControllers) {
-            startNext(channel);
-        }
-    }
-}
-    
-// -- Custom interrupt handler
-//    This interrupt handler handles two cases: a controller is
-//    done writing its data, or a controller needs to fill the
-//    next half of the RMT buffer with data.
-void IRAM_ATTR ESP32RMTController::interruptHandler(void *arg)
-{
-    // -- The basic structure of this code is borrowed from the
-    //    interrupt handler in esp-idf/components/driver/rmt.c
-    uint32_t intr_st = RMT.int_st.val;
-    uint8_t channel;
-
-    bool stuff_to_do = false;
-    for (channel = 0; channel < FASTLED_RMT_MAX_CHANNELS; channel++) {
-        int tx_done_bit = channel * 3;
-        int tx_next_bit = channel + 24;
-
-        ESP32RMTController * pController = gOnChannel[channel];
-        if (pController != NULL) {
-            if (intr_st & BIT(tx_next_bit)) {
-                // -- More to send on this channel
-                pController->fillNext(true);
-                RMT.int_clr.val |= BIT(tx_next_bit);
-            } else {
-                // -- Transmission is complete on this channel
-                if (intr_st & BIT(tx_done_bit)) {
-                    RMT.int_clr.val |= BIT(tx_done_bit);
-                    doneOnChannel(rmt_channel_t(channel), 0);
-                }
-            }
-        }
-    }
-}
-
-// -- Fill RMT buffer
-//    Puts 32 bits of pixel data into the next 32 slots in the RMT memory
-//    Each data bit is represented by a 32-bit RMT item that specifies how
-//    long to hold the signal high, followed by how long to hold it low.
-void IRAM_ATTR ESP32RMTController::fillNext(bool check_time)
-{
-    uint32_t now = __clock_cycles();
-    if (check_time) {
-        if (mLastFill != 0 and now > mLastFill) {
-            uint32_t delta = (now - mLastFill);
-            if (delta > mMaxCyclesPerFill) {
-                Serial.print(delta);
-                Serial.print(" BAIL ");
-                Serial.println(mCur);
-                mCur = mSize;
-                rmt_tx_stop(mRMT_channel);
-            }
-        }
-    }
-    mLastFill = now;
-
-    // -- Get the zero and one values into local variables
-    register uint32_t one_val = mOne.val;
-    register uint32_t zero_val = mZero.val;
-
-    // -- Use locals for speed
-    volatile register uint32_t * pItem =  mRMT_mem_ptr;
-
-    for (register int i = 0; i < PULSES_PER_FILL/32; i++) {
-        if (mCur < mSize) {
-
-            // -- Get the next four bytes of pixel data
-            register uint32_t pixeldata = mPixelData[mCur];
-            mCur++;
-            
-            // Shift bits out, MSB first, setting RMTMEM.chan[n].data32[x] to the 
-            // rmt_item32_t value corresponding to the buffered bit value
-            for (register uint32_t j = 0; j < 32; j++) {
-                *pItem++ = (pixeldata & 0x80000000L) ? one_val : zero_val;
-                // Replaces: RMTMEM.chan[mRMT_channel].data32[mCurPulse].val = val;
-
-                pixeldata <<= 1;
-            }
-        } else {
-            // -- No more data; signal to the RMT we are done
-            for (uint32_t j = 0; j < 32; j++) {
-                * mRMT_mem_ptr++ = 0;
-            }
-        }
-    }
-
-    // -- Flip to the other half, resetting the pointer if necessary
-    mWhichHalf++;
-    if (mWhichHalf == 2) {
-        pItem = mRMT_mem_start;
-        mWhichHalf = 0;
-    }
-
-    // -- Store the new pointer back into the object
-    mRMT_mem_ptr = pItem;
-}
-
-// -- Init pulse buffer
-//    Set up the buffer that will hold all of the pulse items for this
-//    controller. 
-//    This function is only used when the built-in RMT driver is chosen
-void ESP32RMTController::initPulseBuffer(int size_in_bytes)
-{
-    if (mBuffer == 0) {
-        // -- Each byte has 8 bits, each bit needs a 32-bit RMT item
-        int mBufferSize = size_in_bytes * 8 * 4;
-        mBuffer = (rmt_item32_t *) calloc( mBufferSize, sizeof(rmt_item32_t));
-    }
-    mCurPulse = 0;
-}
-
-// -- Convert a byte into RMT pulses
-//    This function is only used when the built-in RMT driver is chosen
-void ESP32RMTController::convertByte(uint32_t byteval)
-{
-    // -- Write one byte's worth of RMT pulses to the big buffer
-    byteval <<= 24;
-    for (register uint32_t j = 0; j < 8; j++) {
-        mBuffer[mCurPulse] = (byteval & 0x80000000L) ? mOne : mZero;
-        byteval <<= 1;
-        mCurPulse++;
-    }
-}
-
-#endif

From 715e4d557eb58f4d6f451fb5279227fd03107f0b Mon Sep 17 00:00:00 2001
From: Geoff Evans <gbeevans@me.com>
Date: Fri, 13 Nov 2020 14:48:41 +1300
Subject: [PATCH 199/204] Add Pin Support For ODYSSEY - X86J4105

Add pin support for Seeedstudio ODYSSEY - X86J4105.
Pin numbering taken from
https://github.com/Seeed-Studio/ArduinoCore-samd/blob/master/variants/arduino_zero/variant.cpp
---
 src/platforms/arm/d21/fastpin_arm_d21.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/platforms/arm/d21/fastpin_arm_d21.h b/src/platforms/arm/d21/fastpin_arm_d21.h
index f3fa79cb60..93dc43c657 100644
--- a/src/platforms/arm/d21/fastpin_arm_d21.h
+++ b/src/platforms/arm/d21/fastpin_arm_d21.h
@@ -109,6 +109,24 @@ _FL_DEFPIN( 8, 7,0); _FL_DEFPIN( 9, 5,0); _FL_DEFPIN(10, 6,0);
 
 #define HAS_HARDWARE_PIN_SUPPORT 1
 
+#elif defined(ARDUINO_SEEED_ZERO)
+
+#define MAX_PIN 24
+
+_FL_DEFPIN( 0,11,0); _FL_DEFPIN( 1,10,0); _FL_DEFPIN( 2,14,0); _FL_DEFPIN( 3,9,0);
+_FL_DEFPIN( 4,8,0); _FL_DEFPIN( 5,15,0); _FL_DEFPIN( 6,20,0); _FL_DEFPIN( 7,21,0);
+_FL_DEFPIN( 8,6,0); _FL_DEFPIN( 9,7,0); _FL_DEFPIN( 10,18,0); _FL_DEFPIN( 11,16,0);
+_FL_DEFPIN( 12,19,0); _FL_DEFPIN( 13,17,0); _FL_DEFPIN( 14,2,0); _FL_DEFPIN( 15,8,1);
+_FL_DEFPIN( 16,9,1); _FL_DEFPIN( 17,4,0); _FL_DEFPIN( 18,5,0); _FL_DEFPIN( 19,2,1);
+_FL_DEFPIN( 20,22,0); _FL_DEFPIN( 21,23,0); _FL_DEFPIN( 22,12,0);
+_FL_DEFPIN( 23,10,1);//MOSI
+_FL_DEFPIN( 24,11,1);//SCK
+
+#define SPI_DATA  23
+#define SPI_CLOCK 24
+
+#define HAS_HARDWARE_PIN_SUPPORT 1
+
 #elif defined(ARDUINO_SAMD_ZERO)
 
 #define MAX_PIN 42

From 7147b7539d1cefc0da09ae993add9dfa42c410fd Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Sat, 21 Nov 2020 14:28:36 +0100
Subject: [PATCH 200/204] fix rebase artifact

---
 src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index c30823b610..bdcff272ca 100644
--- a/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -156,15 +156,9 @@ class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_OR
         _outlines b0;
         uint32_t start = ARM_DWT_CYCCNT;
 
-<<<<<<< HEAD
-        for(int i = 0; i < m_nActualLanes; i++) {
+        for(int i = 0; i < m_nActualLanes; ++i) {
             b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
         }
-=======
-    for(int i = 0; i < m_nActualLanes; ++i) {
-      b0.bytes[m_bitOffsets[i]] = allpixels.loadAndScale0(i);
-    }
->>>>>>> use prefix notation for ++ and -- where possible
 
         cli();
 

From b1e19e3ceb7f89d1b7a6e4c1f1583dba719afb5d Mon Sep 17 00:00:00 2001
From: Beaudry Chase <beaudrychase@utexas.edu>
Date: Sat, 21 Nov 2020 21:51:36 -0600
Subject: [PATCH 201/204] Update README.md

fixed a typo
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 18039087aa..a945c5db87 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ Here's a list of all the LED chipsets are supported.  More details on the led ch
 * TM1803 - 3 wire chipset, sold by radio shack
 * UCS1903 - another 3 wire led chipset, cheap
 * GW6205 - another 3 wire led chipset
-* LPD8806 - SPI based chpiset, very high speed
+* LPD8806 - SPI based chipset, very high speed
 * WS2801 - SPI based chipset, cheap and widely available
 * SM16716 - SPI based chipset
 * APA102 - SPI based chipset

From 2d3777e340bbadbdc37f740a0e41f8ed9139028a Mon Sep 17 00:00:00 2001
From: Elliott Kember <elliott.kember@gmail.com>
Date: Mon, 23 Nov 2020 19:20:35 -0800
Subject: [PATCH 202/204] Update component.mk to work with esp-idf

---
 component.mk | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/component.mk b/component.mk
index 27ad11a7e5..f4082014b9 100644
--- a/component.mk
+++ b/component.mk
@@ -1 +1,3 @@
-COMPONENT_ADD_INCLUDEDIRS := .
+COMPONENT_ADD_INCLUDEDIRS := ./src src/platforms/esp/32
+COMPONENT_SRCDIRS := ./src src/platforms/esp/32
+

From 599edc996171d2f52798aae6b14717fcc4992fb4 Mon Sep 17 00:00:00 2001
From: Elliott Kember <elliott.kember@gmail.com>
Date: Mon, 23 Nov 2020 19:24:25 -0800
Subject: [PATCH 203/204] Remove newline

---
 component.mk | 1 -
 1 file changed, 1 deletion(-)

diff --git a/component.mk b/component.mk
index f4082014b9..874ca9b0ac 100644
--- a/component.mk
+++ b/component.mk
@@ -1,3 +1,2 @@
 COMPONENT_ADD_INCLUDEDIRS := ./src src/platforms/esp/32
 COMPONENT_SRCDIRS := ./src src/platforms/esp/32
-

From 52e52b8eb56176e88113ebda789ee37f825d6b70 Mon Sep 17 00:00:00 2001
From: 5chmidti <44101708+5chmidti@users.noreply.github.com>
Date: Wed, 25 Nov 2020 20:36:52 +0100
Subject: [PATCH 204/204] fix missing rbraces

---
 .../mxrt1062/block_clockless_arm_mxrt1062.h   | 68 ++++++++++---------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h b/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
index bdcff272ca..a7bcddf040 100644
--- a/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
+++ b/src/platforms/arm/mxrt1062/block_clockless_arm_mxrt1062.h
@@ -115,39 +115,41 @@ class FlexibleInlineBlockClocklessController : public CPixelLEDController<RGB_OR
 
 
   template<int BITS,int PX> __attribute__ ((always_inline)) inline void writeBits(register uint32_t & next_mark, register _outlines & b, PixelController<RGB_ORDER, LANES, __FL_T4_MASK> &pixels) {
-    _outlines b2;
-    transpose8x1(b.bg[3], b2.bg[3]);
-    transpose8x1(b.bg[2], b2.bg[2]);
-    transpose8x1(b.bg[1], b2.bg[1]);
-    transpose8x1(b.bg[0], b2.bg[0]);
-
-    register uint8_t d = pixels.template getd<PX>(pixels);
-    register uint8_t scale = pixels.template getscale<PX>(pixels);
-
-    int x = 0;
-    for(uint32_t i = 8; i > 0;) {
-      --i;
-      while(ARM_DWT_CYCCNT < next_mark);
-      *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
-      next_mark = ARM_DWT_CYCCNT + m_offsets[0];
-
-      uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
-
-      out = ((~out) & m_nWriteMask);
-      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
-      *FastPin<FIRST_PIN>::cport() = out;
-
-      out = m_nWriteMask;
-      while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
-      *FastPin<FIRST_PIN>::cport() = out;
-
-      // Read and store up to two bytes
-      if (x < m_nActualLanes) {
-        b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
-        ++x;
-        if (x < m_nActualLanes) {
-          b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels,x,d,scale);
-          ++x;
+        _outlines b2;
+        transpose8x1(b.bg[3], b2.bg[3]);
+        transpose8x1(b.bg[2], b2.bg[2]);
+        transpose8x1(b.bg[1], b2.bg[1]);
+        transpose8x1(b.bg[0], b2.bg[0]);
+
+        register uint8_t d = pixels.template getd<PX>(pixels);
+        register uint8_t scale = pixels.template getscale<PX>(pixels);
+
+        int x = 0;
+        for(uint32_t i = 8; i > 0;) {
+            --i;
+            while(ARM_DWT_CYCCNT < next_mark);
+            *FastPin<FIRST_PIN>::sport() = m_nWriteMask;
+            next_mark = ARM_DWT_CYCCNT + m_offsets[0];
+
+            uint32_t out = (b2.bg[3][i] << 24) | (b2.bg[2][i] << 16) | (b2.bg[1][i] << 8) | b2.bg[0][i];
+
+            out = ((~out) & m_nWriteMask);
+            while((next_mark - ARM_DWT_CYCCNT) > m_offsets[1]);
+            *FastPin<FIRST_PIN>::cport() = out;
+
+            out = m_nWriteMask;
+            while((next_mark - ARM_DWT_CYCCNT) > m_offsets[2]);
+            *FastPin<FIRST_PIN>::cport() = out;
+
+            // Read and store up to two bytes
+            if (x < m_nActualLanes) {
+                b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels, x, d, scale);
+                ++x;
+                if (x < m_nActualLanes) {
+                    b.bytes[m_bitOffsets[x]] = pixels.template loadAndScale<PX>(pixels, x, d, scale);
+                    ++x;
+                }
+            }
         }
     }