Skip to content

Commit

Permalink
Merge remote-tracking branch 'mpg123/master' into master-with-github-ci
Browse files Browse the repository at this point in the history
  • Loading branch information
mpg123 GitHub bot committed Dec 29, 2023
2 parents c223935 + 6425108 commit bc38cec
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 69 deletions.
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
to build individual modules using --disable-components logic.
- out123:
-- added --libversion
- libmpg123:
-- Avoid indirect branches into the assembly routines by using C wrappers,
relieving us of the need to care for bti / endbr instructions.

1.32.3
------
Expand Down
17 changes: 7 additions & 10 deletions src/libmpg123/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,16 +203,6 @@ void INT123_dct64_i386 (real *,real *,real *);
void INT123_dct64_altivec(real *,real *,real *);
void INT123_dct64_i486(int*, int* , real*); /* Yeah, of no use outside of synth_i486.c .*/

/* This is used by the layer 3 decoder, one generic function and 3DNow variants. */
void INT123_dct36 (real *,real *,real *,const real *,real *);
void INT123_dct36_3dnow (real *,real *,real *,const real *,real *);
void INT123_dct36_3dnowext(real *,real *,real *,const real *,real *);
void INT123_dct36_x86_64 (real *,real *,real *,const real *,real *);
void INT123_dct36_sse (real *,real *,real *,const real *,real *);
void INT123_dct36_avx (real *,real *,real *,const real *,real *);
void INT123_dct36_neon (real *,real *,real *,const real *,real *);
void INT123_dct36_neon64 (real *,real *,real *,const real *,real *);

/* Tools for NtoM resampling synth, defined in ntom.c . */
int INT123_synth_ntom_set_step(mpg123_handle *fr); /* prepare ntom decoding */
unsigned long INT123_ntom_val(mpg123_handle *fr, int64_t frame); /* compute INT123_ntom_val for frame offset */
Expand All @@ -232,6 +222,13 @@ int64_t INT123_ntom_frameoff(mpg123_handle *fr, int64_t soff);
/* Initialization of any static data that majy be needed at runtime.
Make sure you call these once before it is too late. */
#ifndef NO_LAYER3

#ifdef OPT_THE_DCT36
// Set the current dct36 function choice. The pointers themselves are to static functions.
void INT123_dct36_choose(mpg123_handle *fr);
int INT123_dct36_match(mpg123_handle *fr, enum optdec t);
#endif

#ifdef RUNTIME_TABLES
void INT123_init_layer3(void);
#endif
Expand Down
4 changes: 1 addition & 3 deletions src/libmpg123/frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,9 @@ struct mpg123_handle_struct
{
#ifdef OPT_MULTI

#ifndef NO_LAYER3
#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
#ifdef OPT_THE_DCT36
void (*the_dct36)(real *,real *,real *,const real *,real *);
#endif
#endif

#endif
enum optdec type;
Expand Down
112 changes: 108 additions & 4 deletions src/libmpg123/layer3.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
#include "../common/debug.h"


/* Predeclare the assembly routines, only called from wrappers here. */
void INT123_dct36_3dnow (real *,real *,real *,const real *,real *);
void INT123_dct36_3dnowext(real *,real *,real *,const real *,real *);
void INT123_dct36_x86_64 (real *,real *,real *,const real *,real *);
void INT123_dct36_sse (real *,real *,real *,const real *,real *);
void INT123_dct36_avx (real *,real *,real *,const real *,real *);
void INT123_dct36_neon (real *,real *,real *,const real *,real *);
void INT123_dct36_neon64 (real *,real *,real *,const real *,real *);

/* define CUT_SFB21 if you want to cut-off the frequency above 16kHz */
#if 0
Expand Down Expand Up @@ -1256,10 +1264,7 @@ static void III_antialias(real xr[SBLIMIT][SSLIMIT],struct gr_info_s *gr_info)
Mathematics of Computation, Volume 32, Number 141, January 1978,
Pages 175-199
*/

/* Calculation of the inverse MDCT
used to be static without 3dnow - does that really matter? */
void INT123_dct36(real *inbuf,real *o1,real *o2,const real *wintab,real *tsbuf)
static void INT123_dct36(real *inbuf,real *o1,real *o2,const real *wintab,real *tsbuf)
{
real tmp[18];

Expand Down Expand Up @@ -1449,6 +1454,105 @@ void INT123_dct36(real *inbuf,real *o1,real *o2,const real *wintab,real *tsbuf)
}
}

// Wrap the assembly routine calls into C functions that serve as jump target to satisfy
// indirect branch protection if the toolchain enables that. Otherwise, we'd need to anticipate
// that in the assembly (and ensure assemblers support endbr64 and friends).
// Loss of efficiency:

// In the case of one static optimization choice, we do not have that problem.

#ifdef OPT_THE_DCT36

#define DCT36_WRAP(asmfunc) \
static void asmfunc ## _wrap(real *inbuf,real *o1,real *o2,const real *wintab,real *tsbuf) \
{ \
return asmfunc(inbuf, o1, o2, wintab, tsbuf); \
}

#ifdef OPT_SSE
DCT36_WRAP(INT123_dct36_sse)
#endif
#ifdef OPT_3DNOWEXT_VINTAGE
DCT36_WRAP(INT123_dct36_3dnowext)
#endif
#ifdef OPT_3DNOW_VINTAGE
DCT36_WRAP(INT123_dct36_3dnow)
#endif
#ifdef OPT_X86_64
DCT36_WRAP(INT123_dct36_x86_64)
#endif
#ifdef OPT_AVX
DCT36_WRAP(INT123_dct36_avx)
#endif
#ifdef OPT_NEON
DCT36_WRAP(INT123_dct36_neon)
#endif
#ifdef OPT_NEON64
DCT36_WRAP(INT123_dct36_neon64)
#endif

int INT123_dct36_match(mpg123_handle *fr, enum optdec t)
{
#ifdef OPT_SSE
if(t == sse && fr->cpu_opts.the_dct36 == INT123_dct36_sse_wrap)
return 1;
#endif
#ifdef OPT_3DNOWEXT_VINTAGE
if(t == dreidnowext_vintage && fr->cpu_opts.the_dct36 == INT123_dct36_3dnowext_wrap)
return 1;
#endif
#ifdef OPT_3DNOW_VINTAGE
if(t == dreidnow_vintage && fr->cpu_opts.the_dct36 == INT123_dct36_3dnow_wrap)
return 1;
#endif
return 0;
}

void INT123_dct36_choose(mpg123_handle *fr)
{
switch(fr->cpu_opts.type)
{
#ifdef OPT_SSE
case sse:
fr->cpu_opts.the_dct36 = INT123_dct36_sse_wrap;
break;
#endif
#ifdef OPT_3DNOWEXT_VINTAGE
case dreidnowext_vintage:
fr->cpu_opts.the_dct36 = INT123_dct36_3dnowext_wrap;
break;
#endif
#ifdef OPT_3DNOW_VINTAGE
case dreidnow_vintage:
fr->cpu_opts.the_dct36 = INT123_dct36_3dnow_wrap;
break;
#endif
#ifdef OPT_AVX
case avx:
fr->cpu_opts.the_dct36 = INT123_dct36_avx;
break;
#endif
#ifdef OPT_X86_64
case x86_64:
fr->cpu_opts.the_dct36 = INT123_dct36_x86_64;
break;
#endif
#ifdef OPT_NEON
case neon:
fr->cpu_opts.the_dct36 = INT123_dct36_neon;
break;
#endif
#ifdef OPT_NEON64
case neon:
fr->cpu_opts.the_dct36 = INT123_dct36_neon64;
break;
#endif
default:
fr->cpu_opts.the_dct36 = INT123_dct36;
}
}

#endif

/* new DCT12 */
static void dct12(real *in,real *rawout1,real *rawout2,register const real *wi,register real *ts)
Expand Down
54 changes: 8 additions & 46 deletions src/libmpg123/optimize.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ static enum optdec sse_or_vintage(mpg123_handle *fr)
enum optdec type;
type = sse_vintage;
# ifdef OPT_SSE
# ifdef OPT_MULTI
if(fr->cpu_opts.the_dct36 == INT123_dct36_sse)
# ifdef OPT_THE_DCT36
if(INT123_dct36_match(fr, sse))
# endif
type = sse;
# endif
Expand Down Expand Up @@ -192,7 +192,7 @@ static int find_dectype(mpg123_handle *fr)
type = dreidnowext;
# ifdef OPT_3DNOWEXT_VINTAGE
# ifdef OPT_MULTI
if(fr->cpu_opts.the_dct36 == INT123_dct36_3dnowext)
if(INT123_dct36_match(fr, dreidnowext_vintage))
# endif
type = dreidnowext_vintage;
# endif
Expand All @@ -210,7 +210,7 @@ static int find_dectype(mpg123_handle *fr)
type = dreidnow;
# ifdef OPT_3DNOW_VINTAGE
# ifdef OPT_MULTI
if(fr->cpu_opts.the_dct36 == INT123_dct36_3dnow)
if(INT123_dct36_match(fr, dreidnow_vintage))
# endif
type = dreidnow_vintage;
# endif
Expand Down Expand Up @@ -503,13 +503,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
#endif

fr->cpu_opts.type = nodec;
#ifdef OPT_MULTI
#ifndef NO_LAYER3
#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
fr->cpu_opts.the_dct36 = INT123_dct36;
#endif
#endif
#endif
/* covers any i386+ cpu; they actually differ only in the INT123_synth_1to1 function, mostly... */
#ifdef OPT_X86
if(cpu_i586(fr->cpu_flags))
Expand All @@ -523,11 +516,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = dn_sse;
fr->cpu_opts.type = sse;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
/* if(cpu_fast_sse(fr->cpu_flags)) */ fr->cpu_opts.the_dct36 = INT123_dct36_sse;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_sse;
# ifdef ACCURATE_ROUNDING
Expand Down Expand Up @@ -590,11 +578,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = dn_dreidnowext_vintage;
fr->cpu_opts.type = dreidnowext_vintage;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
fr->cpu_opts.the_dct36 = INT123_dct36_3dnowext;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnowext;
# endif
Expand All @@ -619,11 +602,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = dn_dreidnow_vintage;
fr->cpu_opts.type = dreidnow_vintage;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
fr->cpu_opts.the_dct36 = INT123_dct36_3dnow;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_3dnow;
# endif
Expand Down Expand Up @@ -723,11 +701,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = "x86-64 (AVX)";
fr->cpu_opts.type = avx;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
fr->cpu_opts.the_dct36 = INT123_dct36_avx;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_avx;
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_avx;
Expand All @@ -749,11 +722,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = "x86-64 (SSE)";
fr->cpu_opts.type = x86_64;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
fr->cpu_opts.the_dct36 = INT123_dct36_x86_64;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_x86_64;
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_x86_64;
Expand Down Expand Up @@ -796,11 +764,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = dn_neon;
fr->cpu_opts.type = neon;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
fr->cpu_opts.the_dct36 = INT123_dct36_neon;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon;
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon;
Expand Down Expand Up @@ -834,11 +797,6 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
{
chosen = dn_neon64;
fr->cpu_opts.type = neon64;
#ifdef OPT_MULTI
# ifndef NO_LAYER3
fr->cpu_opts.the_dct36 = INT123_dct36_neon64;
# endif
#endif
# ifndef NO_16BIT
fr->synths.plain[r_1to1][f_16] = INT123_synth_1to1_neon64;
fr->synths.stereo[r_1to1][f_16] = INT123_synth_1to1_stereo_neon64;
Expand Down Expand Up @@ -897,6 +855,10 @@ int INT123_frame_cpu_opt(mpg123_handle *fr, const char* cpu)
# endif
# endif

#ifdef OPT_THE_DCT36
INT123_dct36_choose(fr);
#endif

#ifdef OPT_DITHER
if(done && dithered)
{
Expand Down
9 changes: 3 additions & 6 deletions src/libmpg123/optimize.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,19 +364,16 @@ extern const int INT123_costab_mmxsse[];
#endif
#endif

/*
Now come two blocks of standard definitions for multi-decoder mode and single-decoder mode.
Most stuff is so automatic that it's indeed generated by some inline shell script.
Remember to use these scripts when possible, instead of direct repetitive hacking.
*/

#ifdef OPT_MULTI

# define defopt nodec

# ifndef NO_LAYER3
# if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
# define OPT_THE_DCT36
# define opt_dct36(fr) ((fr)->cpu_opts.the_dct36)
# endif
# endif

#endif /* OPT_MULTI else */

Expand Down

0 comments on commit bc38cec

Please sign in to comment.