New approach.

User opeus-1.2.1 Use libopusenc-0.1 Use last (17/09/2017) esp-idf Fixed point
commarmi76 · Sep 16, 2017 · 97714fe · 97714fe
1 parent 7b63a1f
commit 97714fe
Show file tree

Hide file tree

Showing 251 changed files with 11,733 additions and 5,290 deletions.
diff --git a/README.md b/README.md
@@ -3,37 +3,11 @@ Experiments to port libopus encode/decoder to esp32
 
 Original sources by the authors of Opus: http://opus-codec.org
 
-=================================================================================
-
-First benchmark:
-Encode Sample16kHz.raw (little-endian) data, 16 bit, stereo 16000 Hz 10 seconds
-It's still missing the ogg part !!!!
-
-1. Without CFLAGS:
-
-9746146 microseconds
-
-2. Without CFLAGS and FIXED_POINT:
-
-8185269 microseconds
-
-3. Without CFLAGS and release mode:
-
-8970423 microseconds
-
-So, the best option is (2): 1,22 realtime. 
+Use opus-1.2.1 and libopusenc-0.1
 
 =================================================================================
 
-Second benchmark:
-Encode Sample16kHz.raw (little-endian) data, 16 bit, stereo 16000 Hz 10 seconds, resampled 48KHz (mandatory for opus ????)
-Using libopusenc from xiph and last idf framework (24/08/2017).
-
-1. Without CFLAGS:
-
-Total time in microseconds: 23622539 microseconds
-
-So, big performance regression. :-(
+13200580 microseconds To encode Sample16kHz.raw (little-endian) data, 16 bit, stereo 16000 Hz 10 seconds
 
 =================================================================================
 

diff --git a/components/libopus/component.mk b/components/libopus/component.mk
@@ -1,8 +1,8 @@
 #
 # Component Makefile
 #
-COMPONENT_ADD_INCLUDEDIRS := include include/silk include/silk/celt include/silk/fixed include/silk/float include/celt 
+COMPONENT_ADD_INCLUDEDIRS := include include/silk include/silk/celt include/silk/fixed include/celt 
 
-COMPONENT_SRCDIRS := library library/celt library/silk library/silk/fixed library/silk/float
+COMPONENT_SRCDIRS := library library/celt library/silk library/silk/fixed 
 
-CFLAGS += -Wno-unused-function -DHAVE_CONFIG_H 
+CFLAGS += -Wno-unused-function -DHAVE_CONFIG_H -Os -DSMALL_FOOTPRINT -funroll-loops -ffast-math
diff --git a/components/libopus/include/celt/_kiss_fft_guts.h b/components/libopus/include/celt/_kiss_fft_guts.h
@@ -58,12 +58,12 @@
 #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 
 #   define C_MUL(m,a,b) \
-      do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
-          (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
+      do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0)
 
 #   define C_MULC(m,a,b) \
-      do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
-          (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
+      do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
+          (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
 
 #   define C_MULBYSCALAR( c, s ) \
       do{ (c).r =  S_MUL( (c).r , s ) ;\
@@ -77,17 +77,17 @@
                 DIVSCALAR( (c).i  , div); }while (0)
 
 #define  C_ADD( res, a,b)\
-    do {(res).r=ADD32((a).r,(b).r);  (res).i=ADD32((a).i,(b).i); \
+    do {(res).r=ADD32_ovflw((a).r,(b).r);  (res).i=ADD32_ovflw((a).i,(b).i); \
     }while(0)
 #define  C_SUB( res, a,b)\
-    do {(res).r=SUB32((a).r,(b).r);  (res).i=SUB32((a).i,(b).i); \
+    do {(res).r=SUB32_ovflw((a).r,(b).r);  (res).i=SUB32_ovflw((a).i,(b).i); \
     }while(0)
 #define C_ADDTO( res , a)\
-    do {(res).r = ADD32((res).r, (a).r);  (res).i = ADD32((res).i,(a).i);\
+    do {(res).r = ADD32_ovflw((res).r, (a).r);  (res).i = ADD32_ovflw((res).i,(a).i);\
     }while(0)
 
 #define C_SUBFROM( res , a)\
-    do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \
+    do {(res).r = ADD32_ovflw((res).r,(a).r);  (res).i = SUB32_ovflw((res).i,(a).i); \
     }while(0)
 
 #if defined(OPUS_ARM_INLINE_ASM)

diff --git a/components/libopus/include/celt/arch.h b/components/libopus/include/celt/arch.h
@@ -46,6 +46,14 @@
 #  endif
 # endif
 
+#if OPUS_GNUC_PREREQ(3, 0)
+#define opus_likely(x)       (__builtin_expect(!!(x), 1))
+#define opus_unlikely(x)     (__builtin_expect(!!(x), 0))
+#else
+#define opus_likely(x)       (!!(x))
+#define opus_unlikely(x)     (!!(x))
+#endif
+
 #define CELT_SIG_SCALE 32768.f
 
 #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
@@ -93,6 +101,7 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
 
 typedef opus_int16 opus_val16;
 typedef opus_int32 opus_val32;
+typedef opus_int64 opus_val64;
 
 typedef opus_val32 celt_sig;
 typedef opus_val16 celt_norm;
@@ -101,6 +110,9 @@ typedef opus_val32 celt_ener;
 #define Q15ONE 32767
 
 #define SIG_SHIFT 12
+/* Safe saturation value for 32-bit signals. Should be less than
+   2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
+#define SIG_SAT (300000000)
 
 #define NORM_SCALING 16384
 
@@ -147,6 +159,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
 
 typedef float opus_val16;
 typedef float opus_val32;
+typedef float opus_val64;
 
 typedef float celt_sig;
 typedef float celt_norm;
@@ -186,6 +199,7 @@ static OPUS_INLINE int celt_isnan(float x)
 
 #define NEG16(x) (-(x))
 #define NEG32(x) (-(x))
+#define NEG32_ovflw(x) (-(x))
 #define EXTRACT16(x) (x)
 #define EXTEND32(x) (x)
 #define SHR16(a,shift) (a)
@@ -202,13 +216,16 @@ static OPUS_INLINE int celt_isnan(float x)
 #define SATURATE16(x)   (x)
 
 #define ROUND16(a,shift)  (a)
+#define SROUND16(a,shift) (a)
 #define HALF16(x)       (.5f*(x))
 #define HALF32(x)       (.5f*(x))
 
 #define ADD16(a,b) ((a)+(b))
 #define SUB16(a,b) ((a)-(b))
 #define ADD32(a,b) ((a)+(b))
 #define SUB32(a,b) ((a)-(b))
+#define ADD32_ovflw(a,b) ((a)+(b))
+#define SUB32_ovflw(a,b) ((a)-(b))
 #define MULT16_16_16(a,b)     ((a)*(b))
 #define MULT16_16(a,b)     ((opus_val32)(a)*(opus_val32)(b))
 #define MAC16_16(c,a,b)     ((c)+(opus_val32)(a)*(opus_val32)(b))
@@ -243,9 +260,9 @@ static OPUS_INLINE int celt_isnan(float x)
 
 #ifndef GLOBAL_STACK_SIZE
 #ifdef FIXED_POINT
-#define GLOBAL_STACK_SIZE 100000
+#define GLOBAL_STACK_SIZE 120000
 #else
-#define GLOBAL_STACK_SIZE 100000
+#define GLOBAL_STACK_SIZE 120000
 #endif
 #endif
 

diff --git a/components/libopus/include/celt/bands.h b/components/libopus/include/celt/bands.h
@@ -31,17 +31,20 @@
 #define BANDS_H
 
 #include "arch.h"
-#include "entdec.h"
 #include "modes.h"
 #include "entenc.h"
+#include "entdec.h"
 #include "rate.h"
 
+opus_int16 bitexact_cos(opus_int16 x);
+int bitexact_log2tan(int isin,int icos);
+
 /** Compute the amplitude (sqrt energy) in each of the bands
  * @param m Mode data
  * @param X Spectrum
  * @param bandE Square root of the energy for each band (returned)
  */
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM);
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch);
 
 /*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/
 
@@ -105,7 +108,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
       const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
       int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
       opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed,
-      int arch);
+      int complexity, int arch, int disable_inv);
 
 void anti_collapse(const CELTMode *m, celt_norm *X_,
       unsigned char *collapse_masks, int LM, int C, int size, int start,

diff --git a/components/libopus/include/celt/celt.h b/components/libopus/include/celt/celt.h
@@ -39,8 +39,8 @@
 #include "opus_defines.h"
 #include "opus_custom.h"
 #include "entenc.h"
-#include "arch.h"
 #include "entdec.h"
+#include "arch.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -50,24 +50,34 @@ extern "C" {
 #define CELTDecoder OpusCustomDecoder
 #define CELTMode OpusCustomMode
 
+#define LEAK_BANDS 19
+
 typedef struct {
    int valid;
    float tonality;
    float tonality_slope;
    float noisiness;
    float activity;
    float music_prob;
-   int        bandwidth;
-}AnalysisInfo;
+   float vad_prob;
+   int   bandwidth;
+   float activity_probability;
+   /* Store as Q6 char to save space. */
+   unsigned char leak_boost[LEAK_BANDS];
+} AnalysisInfo;
+
+typedef struct {
+   int signalType;
+   int offset;
+} SILKInfo;
 
 #define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
 
 #define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
 
-/* Encoder/decoder Requests */
+#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
 
-/* Expose this option again when variable framesize actually works */
-#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
+/* Encoder/decoder Requests */
 
 
 #define CELT_SET_PREDICTION_REQUEST    10002
@@ -116,6 +126,9 @@ typedef struct {
 #define OPUS_SET_ENERGY_MASK_REQUEST    10026
 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
 
+#define CELT_SET_SILK_INFO_REQUEST    10028
+#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
+
 /* Encoder stuff */
 
 int celt_encoder_get_size(int channels);

diff --git a/components/libopus/include/celt/celt_lpc.h b/components/libopus/include/celt/celt_lpc.h
@@ -45,12 +45,11 @@ void celt_fir_c(
          opus_val16 *y,
          int N,
          int ord,
-         opus_val16 *mem,
          int arch);
 
 #if !defined(OVERRIDE_CELT_FIR)
-#define celt_fir(x, num, y, N, ord, mem, arch) \
-    (celt_fir_c(x, num, y, N, ord, mem, arch))
+#define celt_fir(x, num, y, N, ord, arch) \
+    (celt_fir_c(x, num, y, N, ord, arch))
 #endif
 
 void celt_iir(const opus_val32 *x,

diff --git a/components/libopus/include/celt/cwrs.h b/components/libopus/include/celt/cwrs.h
@@ -31,9 +31,9 @@
 #define CWRS_H
 
 #include "arch.h"
-#include "entdec.h"
 #include "stack_alloc.h"
 #include "entenc.h"
+#include "entdec.h"
 
 #ifdef CUSTOM_MODES
 int log2_frac(opus_uint32 val, int frac);

diff --git a/components/libopus/include/celt/fixed_debug.h b/components/libopus/include/celt/fixed_debug.h
@@ -59,6 +59,14 @@ extern opus_int64 celt_mips;
 #define SHR(a,b) SHR32(a,b)
 #define PSHR(a,b) PSHR32(a,b)
 
+/** Add two 32-bit values, ignore any overflows */
+#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
+/** Subtract two 32-bit values, ignore any overflows */
+#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
+/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
+/** Negate 32-bit value, ignore any overflows */
+#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(0-(opus_uint32)(a)))
+
 static OPUS_INLINE short NEG16(int x)
 {
    int res;
@@ -227,12 +235,11 @@ static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line)
 #define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift)))
 
 #define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a))))
+#define SROUND16(x,a) (celt_mips--,EXTRACT16(SATURATE(PSHR32(x,a), 32767)));
+
 #define HALF16(x)  (SHR16(x,1))
 #define HALF32(x)  (SHR32(x,1))
 
-//#define SHR(a,shift) ((a) >> (shift))
-//#define SHL(a,shift) ((a) << (shift))
-
 #define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__)
 static OPUS_INLINE short ADD16_(int a, int b, char *file, int line)
 {

diff --git a/components/libopus/include/celt/fixed_generic.h b/components/libopus/include/celt/fixed_generic.h
@@ -104,6 +104,9 @@
 
 /** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */
 #define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a))))
+/** Shift by a and round-to-neareast 32-bit value. Result is a saturated 16-bit value */
+#define SROUND16(x,a) EXTRACT16(SATURATE(PSHR32(x,a), 32767));
+
 /** Divide by two */
 #define HALF16(x)  (SHR16(x,1))
 #define HALF32(x)  (SHR32(x,1))
@@ -117,6 +120,14 @@
 /** Subtract two 32-bit values */
 #define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b))
 
+/** Add two 32-bit values, ignore any overflows */
+#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b)))
+/** Subtract two 32-bit values, ignore any overflows */
+#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b)))
+/* Avoid MSVC warning C4146: unary minus operator applied to unsigned type */
+/** Negate 32-bit value, ignore any overflows */
+#define NEG32_ovflw(a) ((opus_val32)(0-(opus_uint32)(a)))
+
 /** 16x16 multiplication where the result fits in 16 bits */
 #define MULT16_16_16(a,b)     ((((opus_val16)(a))*((opus_val16)(b))))
 

diff --git a/components/libopus/include/celt/float_cast.h b/components/libopus/include/celt/float_cast.h
@@ -61,7 +61,13 @@
 **      the config.h file.
 */
 
-#if (HAVE_LRINTF)
+/* With GCC, when SSE is available, the fastest conversion is cvtss2si. */
+#if defined(__GNUC__) && defined(__SSE__)
+
+#include <xmmintrin.h>
+static OPUS_INLINE opus_int32 float2int(float x) {return _mm_cvt_ss2si(_mm_set_ss(x));}
+
+#elif defined(HAVE_LRINTF)
 
 /*      These defines enable functionality introduced with the 1999 ISO C
 **      standard. They must be defined before the inclusion of math.h to

diff --git a/components/libopus/include/celt/laplace.h b/components/libopus/include/celt/laplace.h
@@ -26,8 +26,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#include "entdec.h"
 #include "entenc.h"
+#include "entdec.h"
 
 /** Encode a value that is assumed to be the realisation of a
     Laplace-distributed random process