aboutsummaryrefslogtreecommitdiff
path: root/Src/external_dependencies/openmpt-trunk/include/r8brain
diff options
context:
space:
mode:
authorJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
committerJef <jef@targetspot.com>2024-09-24 08:54:57 -0400
commit20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/external_dependencies/openmpt-trunk/include/r8brain
parent537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
downloadwinamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
Initial community commit
Diffstat (limited to 'Src/external_dependencies/openmpt-trunk/include/r8brain')
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h642
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h719
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h1180
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h401
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h804
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc711
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h103
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h820
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h769
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h687
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE21
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt6
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/README.md408
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h1316
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp38
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h1206
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h176
-rw-r--r--Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h306
18 files changed, 10313 insertions, 0 deletions
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
new file mode 100644
index 00000000..98297b65
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
@@ -0,0 +1,642 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPBlockConvolver.h
+ *
+ * @brief Single-block overlap-save convolution processor class.
+ *
+ * This file includes single-block overlap-save convolution processor class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
+#define R8B_CDSPBLOCKCONVOLVER_INCLUDED
+
+#include "CDSPFIRFilter.h"
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+/**
+ * @brief Single-block overlap-save convolution processing class.
+ *
+ * Class that implements single-block overlap-save convolution processing. The
+ * length of a single FFT block used depends on the length of the filter
+ * kernel.
+ *
+ * The rationale behind "single-block" processing is that increasing the FFT
+ * block length by 2 is more efficient than performing convolution at the same
+ * FFT block length but using two blocks.
+ *
+ * This class also implements a built-in resampling by any whole-number
+ * factor, which simplifies the overall resampling objects topology.
+ */
+
+class CDSPBlockConvolver : public CDSPProcessor
+{
+public:
+ /**
+ * Constructor initializes internal variables and constants of *this
+ * object.
+ *
+ * @param aFilter Pre-calculated filter data. Reference to this object is
+ * inhertied by *this object, and the object will be released when *this
+ * object is destroyed. If upsampling is used, filter's gain should be
+ * equal to the upsampling factor.
+ * @param aUpFactor The upsampling factor, positive value. E.g. value of 2
+ * means 2x upsampling should be performed over the input data.
+ * @param aDownFactor The downsampling factor, positive value. E.g. value
+ * of 2 means 2x downsampling should be performed over the output data.
+ * @param PrevLatency Latency, in samples (any value >=0), which was left
+ * in the output signal by a previous process. This value is usually
+ * non-zero if the minimum-phase filters are in use. This value is always
+ * zero if the linear-phase filters are in use.
+ * @param aDoConsumeLatency "True" if the output latency should be
+ * consumed. Does not apply to the fractional part of the latency (if such
+ * part is available).
+ */
+
+ CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
+ const int aDownFactor, const double PrevLatency = 0.0,
+ const bool aDoConsumeLatency = true )
+ : Filter( &aFilter )
+ , UpFactor( aUpFactor )
+ , DownFactor( aDownFactor )
+ , DoConsumeLatency( aDoConsumeLatency )
+ , BlockLen2( 2 << Filter -> getBlockLenBits() )
+ {
+ R8BASSERT( UpFactor > 0 );
+ R8BASSERT( DownFactor > 0 );
+ R8BASSERT( PrevLatency >= 0.0 );
+
+ int fftinBits;
+ UpShift = getBitOccupancy( UpFactor ) - 1;
+
+ if(( 1 << UpShift ) == UpFactor )
+ {
+ fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
+ PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
+ UpFactor;
+
+ InputLen = BlockLen2 - PrevInputLen * UpFactor;
+ }
+ else
+ {
+ UpShift = -1;
+ fftinBits = Filter -> getBlockLenBits() + 1;
+ PrevInputLen = Filter -> getKernelLen() - 1;
+ InputLen = BlockLen2 - PrevInputLen;
+ }
+
+ OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
+ LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
+ Latency = (int) LatencyFrac;
+ const int InLatency = Latency + Filter -> getLatency() - OutOffset;
+ LatencyFrac -= Latency;
+ LatencyFrac /= DownFactor;
+
+ Latency += InputLen + Filter -> getLatency();
+
+ int fftoutBits;
+ InputDelay = 0;
+ DownSkipInit = 0;
+ DownShift = getBitOccupancy( DownFactor ) - 1;
+
+ if(( 1 << DownShift ) == DownFactor )
+ {
+ fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
+
+ if( DownFactor > 1 )
+ {
+ if( UpShift > 0 )
+ {
+ // This case never happens in practice due to mutual
+ // exclusion of "power of 2" DownFactor and UpFactor
+ // values.
+
+ R8BASSERT( UpShift == 0 );
+ }
+ else
+ {
+ // Make sure InputLen is divisible by DownFactor.
+
+ const int ilc = InputLen & ( DownFactor - 1 );
+ PrevInputLen += ilc;
+ InputLen -= ilc;
+ Latency -= ilc;
+
+ // Correct InputDelay for input and filter's latency.
+
+ const int lc = InLatency & ( DownFactor - 1 );
+
+ if( lc > 0 )
+ {
+ InputDelay = DownFactor - lc;
+ }
+
+ if( !DoConsumeLatency )
+ {
+ Latency /= DownFactor;
+ }
+ }
+ }
+ }
+ else
+ {
+ fftoutBits = Filter -> getBlockLenBits() + 1;
+ DownShift = -1;
+
+ if( !DoConsumeLatency && DownFactor > 1 )
+ {
+ DownSkipInit = Latency % DownFactor;
+ Latency /= DownFactor;
+ }
+ }
+
+ R8BASSERT( Latency >= 0 );
+
+ fftin = new CDSPRealFFTKeeper( fftinBits );
+
+ if( fftoutBits == fftinBits )
+ {
+ fftout = fftin;
+ }
+ else
+ {
+ ffto2 = new CDSPRealFFTKeeper( fftoutBits );
+ fftout = ffto2;
+ }
+
+ WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
+ CurInput = &WorkBlocks[ 0 ];
+ CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
+ // CurOutput are address-aligned.
+ PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
+
+ clear();
+
+ R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
+ "fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
+ UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
+ getLatency() );
+ }
+
+ virtual ~CDSPBlockConvolver()
+ {
+ Filter -> unref();
+ }
+
+ virtual int getLatency() const
+ {
+ return( DoConsumeLatency ? 0 : Latency );
+ }
+
+ virtual double getLatencyFrac() const
+ {
+ return( LatencyFrac );
+ }
+
+ virtual int getMaxOutLen( const int MaxInLen ) const
+ {
+ R8BASSERT( MaxInLen >= 0 );
+
+ return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
+ }
+
+ virtual void clear()
+ {
+ memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
+
+ if( DoConsumeLatency )
+ {
+ LatencyLeft = Latency;
+ }
+ else
+ {
+ LatencyLeft = 0;
+
+ if( DownShift > 0 )
+ {
+ memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
+ sizeof( CurOutput[ 0 ]));
+ }
+ else
+ {
+ memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
+ sizeof( CurOutput[ 0 ]));
+
+ memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
+ sizeof( CurOutput[ 0 ]));
+ }
+ }
+
+ memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
+
+ InDataLeft = InputLen - InputDelay;
+ UpSkip = 0;
+ DownSkip = DownSkipInit;
+ }
+
+ virtual int process( double* ip, int l0, double*& op0 )
+ {
+ R8BASSERT( l0 >= 0 );
+ R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
+
+ double* op = op0;
+ int l = l0 * UpFactor;
+ l0 = 0;
+
+ while( l > 0 )
+ {
+ const int Offs = InputLen - InDataLeft;
+
+ if( l < InDataLeft )
+ {
+ InDataLeft -= l;
+
+ if( UpShift >= 0 )
+ {
+ memcpy( &CurInput[ Offs >> UpShift ], ip,
+ ( l >> UpShift ) * sizeof( CurInput[ 0 ]));
+ }
+ else
+ {
+ copyUpsample( ip, &CurInput[ Offs ], l );
+ }
+
+ copyToOutput( Offs - OutOffset, op, l, l0 );
+ break;
+ }
+
+ const int b = InDataLeft;
+ l -= b;
+ InDataLeft = InputLen;
+ int ilu;
+
+ if( UpShift >= 0 )
+ {
+ const int bu = b >> UpShift;
+ memcpy( &CurInput[ Offs >> UpShift ], ip,
+ bu * sizeof( CurInput[ 0 ]));
+
+ ip += bu;
+ ilu = InputLen >> UpShift;
+ }
+ else
+ {
+ copyUpsample( ip, &CurInput[ Offs ], b );
+ ilu = InputLen;
+ }
+
+ const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
+ memcpy( &CurInput[ ilu ], PrevInput, pil );
+ memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
+
+ (*fftin) -> forward( CurInput );
+
+ if( UpShift > 0 )
+ {
+ #if R8B_FLOATFFT
+ mirrorInputSpectrum( (float*) CurInput );
+ #else // R8B_FLOATFFT
+ mirrorInputSpectrum( CurInput );
+ #endif // R8B_FLOATFFT
+ }
+
+ if( Filter -> isZeroPhase() )
+ {
+ (*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
+ CurInput );
+ }
+ else
+ {
+ (*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
+ CurInput );
+ }
+
+ if( DownShift > 0 )
+ {
+ const int z = BlockLen2 >> DownShift;
+
+ #if R8B_FLOATFFT
+ float* const kb = (float*) Filter -> getKernelBlock();
+ float* const p = (float*) CurInput;
+ #else // R8B_FLOATFFT
+ const double* const kb = Filter -> getKernelBlock();
+ double* const p = CurInput;
+ #endif // R8B_FLOATFFT
+
+ p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
+ }
+
+ (*fftout) -> inverse( CurInput );
+
+ copyToOutput( Offs - OutOffset, op, b, l0 );
+
+ double* const tmp = CurInput;
+ CurInput = CurOutput;
+ CurOutput = tmp;
+ }
+
+ return( l0 );
+ }
+
+private:
+ CDSPFIRFilter* Filter; ///< Filter in use.
+ ///<
+ CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
+ ///< the input spectrum (can embed the "power of 2" upsampling).
+ ///<
+ CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
+ ///<
+ CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
+ ///< signal (can embed the "power of 2" downsampling), may point to
+ ///< either "fftin" or "ffto2".
+ ///<
+ int UpFactor; ///< Upsampling factor.
+ ///<
+ int DownFactor; ///< Downsampling factor.
+ ///<
+ bool DoConsumeLatency; ///< "True" if the output latency should be
+ ///< consumed. Does not apply to the fractional part of the latency
+ ///< (if such part is available).
+ ///<
+ int BlockLen2; ///< Equals block length * 2.
+ ///<
+ int OutOffset; ///< Output offset, depends on filter's introduced latency.
+ ///<
+ int PrevInputLen; ///< The length of previous input data saved, used for
+ ///< overlap.
+ ///<
+ int InputLen; ///< The number of input samples that should be accumulated
+ ///< before the input block is processed.
+ ///<
+ int Latency; ///< Processing latency, in samples.
+ ///<
+ double LatencyFrac; ///< Fractional latency, in samples, that is left in
+ ///< the output signal.
+ ///<
+ int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
+ ///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
+ ///<
+ int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
+ ///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
+ ///< 1.
+ ///<
+ int InputDelay; ///< Additional input delay, in samples. Used to make the
+ ///< output delay divisible by DownShift. Used only if UpShift <= 0
+ ///< and DownShift > 0.
+ ///<
+ CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
+ ///< output data blocks, overall capacity = BlockLen2 * 2 +
+ ///< PrevInputLen. Used in the flip-flop manner.
+ ///<
+ double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
+ ///<
+ double* CurInput; ///< Input data buffer, capacity = BlockLen2.
+ ///<
+ double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
+ ///<
+ int InDataLeft; ///< Samples left before processing input and output FFT
+ ///< blocks. Initialized to InputLen on clear.
+ ///<
+ int LatencyLeft; ///< Latency in samples left to skip.
+ ///<
+ int UpSkip; ///< The current upsampling sample skip (value in the range
+ ///< 0 to UpFactor - 1).
+ ///<
+ int DownSkip; ///< The current downsampling sample skip (value in the
+ ///< range 0 to DownFactor - 1). Not used if DownShift > 0.
+ ///<
+ int DownSkipInit; ///< The initial DownSkip value after clear().
+ ///<
+
+ /**
+ * Function copies samples from the input buffer to the output buffer
+ * while inserting zeros inbetween them to perform the whole-numbered
+ * upsampling.
+ *
+ * @param[in,out] ip0 Input buffer. Will be advanced on function's return.
+ * @param[out] op Output buffer.
+ * @param l0 The number of samples to fill in the output buffer, including
+ * both input samples and interpolation (zero) samples.
+ */
+
+ void copyUpsample( double*& ip0, double* op, int l0 )
+ {
+ int b = min( UpSkip, l0 );
+
+ if( b != 0 )
+ {
+ UpSkip -= b;
+ l0 -= b;
+
+ *op = 0.0;
+ op++;
+
+ while( --b != 0 )
+ {
+ *op = 0.0;
+ op++;
+ }
+ }
+
+ double* ip = ip0;
+ const int upf = UpFactor;
+ int l = l0 / upf;
+ int lz = l0 - l * upf;
+
+ if( upf == 3 )
+ {
+ while( l != 0 )
+ {
+ op[ 0 ] = *ip;
+ op[ 1 ] = 0.0;
+ op[ 2 ] = 0.0;
+ ip++;
+ op += upf;
+ l--;
+ }
+ }
+ else
+ if( upf == 5 )
+ {
+ while( l != 0 )
+ {
+ op[ 0 ] = *ip;
+ op[ 1 ] = 0.0;
+ op[ 2 ] = 0.0;
+ op[ 3 ] = 0.0;
+ op[ 4 ] = 0.0;
+ ip++;
+ op += upf;
+ l--;
+ }
+ }
+ else
+ {
+ const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
+
+ while( l != 0 )
+ {
+ *op = *ip;
+ ip++;
+
+ memset( op + 1, 0, zc );
+ op += upf;
+ l--;
+ }
+ }
+
+ if( lz != 0 )
+ {
+ *op = *ip;
+ ip++;
+ op++;
+
+ UpSkip = upf - lz;
+
+ while( --lz != 0 )
+ {
+ *op = 0.0;
+ op++;
+ }
+ }
+
+ ip0 = ip;
+ }
+
+ /**
+ * Function copies sample data from the CurOutput buffer to the specified
+ * output buffer and advances its position. If necessary, this function
+ * "consumes" latency and performs downsampling.
+ *
+ * @param Offs CurOutput buffer offset, can be negative.
+ * @param[out] op0 Output buffer pointer, will be advanced.
+ * @param b The number of output samples available, including those which
+ * are discarded during whole-number downsampling.
+ * @param l0 The overall output sample count, will be increased.
+ */
+
+ void copyToOutput( int Offs, double*& op0, int b, int& l0 )
+ {
+ if( Offs < 0 )
+ {
+ if( Offs + b <= 0 )
+ {
+ Offs += BlockLen2;
+ }
+ else
+ {
+ copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
+ b += Offs;
+ Offs = 0;
+ }
+ }
+
+ if( LatencyLeft != 0 )
+ {
+ if( LatencyLeft >= b )
+ {
+ LatencyLeft -= b;
+ return;
+ }
+
+ Offs += LatencyLeft;
+ b -= LatencyLeft;
+ LatencyLeft = 0;
+ }
+
+ const int df = DownFactor;
+
+ if( DownShift > 0 )
+ {
+ int Skip = Offs & ( df - 1 );
+
+ if( Skip > 0 )
+ {
+ Skip = df - Skip;
+ b -= Skip;
+ Offs += Skip;
+ }
+
+ if( b > 0 )
+ {
+ b = ( b + df - 1 ) >> DownShift;
+ memcpy( op0, &CurOutput[ Offs >> DownShift ],
+ b * sizeof( op0[ 0 ]));
+
+ op0 += b;
+ l0 += b;
+ }
+ }
+ else
+ {
+ if( df > 1 )
+ {
+ const double* ip = &CurOutput[ Offs + DownSkip ];
+ int l = ( b + df - 1 - DownSkip ) / df;
+ DownSkip += l * df - b;
+
+ double* op = op0;
+ l0 += l;
+ op0 += l;
+
+ while( l > 0 )
+ {
+ *op = *ip;
+ ip += df;
+ op++;
+ l--;
+ }
+ }
+ else
+ {
+ memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
+ op0 += b;
+ l0 += b;
+ }
+ }
+ }
+
+ /**
+ * Function performs input spectrum mirroring which is used to perform a
+ * fast "power of 2" upsampling. Such mirroring is equivalent to insertion
+ * of zeros into the input signal.
+ *
+ * @param p Spectrum data block to mirror.
+ * @tparam T Buffer's element type.
+ */
+
+ template< typename T >
+ void mirrorInputSpectrum( T* const p )
+ {
+ const int bl1 = BlockLen2 >> UpShift;
+ const int bl2 = bl1 + bl1;
+ int i;
+
+ for( i = bl1 + 2; i < bl2; i += 2 )
+ {
+ p[ i ] = p[ bl2 - i ];
+ p[ i + 1 ] = -p[ bl2 - i + 1 ];
+ }
+
+ p[ bl1 ] = p[ 1 ];
+ p[ bl1 + 1 ] = 0.0;
+ p[ 1 ] = p[ 0 ];
+
+ for( i = 1; i < UpShift; i++ )
+ {
+ const int z = bl1 << i;
+ memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
+ p[ z + 1 ] = 0.0;
+ }
+ }
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
new file mode 100644
index 00000000..e2a23768
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
@@ -0,0 +1,719 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPFIRFilter.h
+ *
+ * @brief FIR filter generator and filter cache classes.
+ *
+ * This file includes low-pass FIR filter generator and filter cache.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPFIRFILTER_INCLUDED
+#define R8B_CDSPFIRFILTER_INCLUDED
+
+#include "CDSPSincFilterGen.h"
+#include "CDSPRealFFT.h"
+
+namespace r8b {
+
+/**
+ * Enumeration of filter's phase responses.
+ */
+
+enum EDSPFilterPhaseResponse
+{
+ fprLinearPhase = 0, ///< Linear-phase response. Features a linear-phase
+ ///< high-latency response, with the latency expressed as integer
+ ///< value.
+ ///<
+ fprMinPhase ///< Minimum-phase response. Features a minimal latency
+ ///< response, but the response's phase is non-linear. The latency is
+ ///< usually expressed as non-integer value, and usually is small, but
+ ///< is never equal to zero. The minimum-phase filter is transformed
+ ///< from the linear-phase filter. The transformation has precision
+ ///< limits which may skew both the -3 dB point and attenuation of the
+ ///< filter being transformed: as it was measured, the skew happens
+ ///< purely at random, and in most cases it is within tolerable range.
+ ///< In a small (1%) random subset of cases the skew is bigger and
+ ///< cannot be predicted. Minimum-phase transform requires 64-bit
+ ///< floating point FFT precision, results with 32-bit float FFT are
+ ///< far from optimal.
+ ///<
+};
+
+/**
+ * @brief Calculation and storage class for FIR filters.
+ *
+ * Class that implements calculation and storing of a FIR filter (currently
+ * contains low-pass filter calculation routine designed for sample rate
+ * conversion). Objects of this class cannot be created directly, but can be
+ * obtained via the CDSPFilterCache::getLPFilter() static function.
+ */
+
+class CDSPFIRFilter : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPFIRFilter );
+
+ friend class CDSPFIRFilterCache;
+
+public:
+ ~CDSPFIRFilter()
+ {
+ R8BASSERT( RefCount == 0 );
+
+ delete Next;
+ }
+
+ /**
+ * @return The minimal allowed low-pass filter's transition band, in
+ * percent.
+ */
+
+ static double getLPMinTransBand()
+ {
+ return( 0.5 );
+ }
+
+ /**
+ * @return The maximal allowed low-pass filter's transition band, in
+ * percent.
+ */
+
+ static double getLPMaxTransBand()
+ {
+ return( 45.0 );
+ }
+
+ /**
+ * @return The minimal allowed low-pass filter's stop-band attenuation, in
+ * decibel.
+ */
+
+ static double getLPMinAtten()
+ {
+ return( 49.0 );
+ }
+
+ /**
+ * @return The maximal allowed low-pass filter's stop-band attenuation, in
+ * decibel.
+ */
+
+ static double getLPMaxAtten()
+ {
+ return( 218.0 );
+ }
+
+ /**
+ * @return "True" if kernel block of *this filter has zero-phase response.
+ */
+
+ bool isZeroPhase() const
+ {
+ return( IsZeroPhase );
+ }
+
+ /**
+ * @return Filter's latency, in samples (integer part).
+ */
+
+ int getLatency() const
+ {
+ return( Latency );
+ }
+
+ /**
+ * @return Filter's latency, in samples (fractional part). Always zero for
+ * linear-phase filters.
+ */
+
+ double getLatencyFrac() const
+ {
+ return( LatencyFrac );
+ }
+
+ /**
+ * @return Filter kernel length, in samples. Not to be confused with the
+ * block length.
+ */
+
+ int getKernelLen() const
+ {
+ return( KernelLen );
+ }
+
+ /**
+ * @return Filter's block length, espressed as Nth power of 2. The actual
+ * length is twice as large due to zero-padding.
+ */
+
+ int getBlockLenBits() const
+ {
+ return( BlockLenBits );
+ }
+
+ /**
+ * @return Filter's kernel block, in complex-numbered form obtained via
+ * the CDSPRealFFT::forward() function call, zero-padded, gain-adjusted
+ * with the CDSPRealFFT::getInvMulConst() * ReqGain constant, immediately
+ * suitable for convolution. Kernel block may have "zero-phase" response,
+ * depending on the isZeroPhase() function's result.
+ */
+
+ const double* getKernelBlock() const
+ {
+ return( KernelBlock );
+ }
+
+ /**
+ * This function should be called when the filter obtained via the
+ * filter cache is no longer needed.
+ */
+
+ void unref();
+
+private:
+ double ReqNormFreq; ///< Required normalized frequency, 0 to 1 inclusive.
+ ///<
+ double ReqTransBand; ///< Required transition band in percent, as passed
+ ///< by the user.
+ ///<
+ double ReqAtten; ///< Required stop-band attenuation in decibel, as passed
+ ///< by the user (positive value).
+ ///<
+ EDSPFilterPhaseResponse ReqPhase; ///< Required filter's phase response.
+ ///<
+ double ReqGain; ///< Required overall filter's gain.
+ ///<
+ CDSPFIRFilter* Next; ///< Next FIR filter in cache's list.
+ ///<
+ int RefCount; ///< The number of references made to *this FIR filter.
+ ///<
+ bool IsZeroPhase; ///< "True" if kernel block of *this filter has
+ ///< zero-phase response.
+ ///<
+ int Latency; ///< Filter's latency in samples (integer part).
+ ///<
+ double LatencyFrac; ///< Filter's latency in samples (fractional part).
+ ///<
+ int KernelLen; ///< Filter kernel length, in samples.
+ ///<
+ int BlockLenBits; ///< Block length used to store *this FIR filter,
+ ///< expressed as Nth power of 2. This value is used directly by the
+ ///< convolver.
+ ///<
+ CFixedBuffer< double > KernelBlock; ///< FIR filter buffer, capacity
+ ///< equals to 1 << ( BlockLenBits + 1 ). Second part of the buffer
+ ///< contains zero-padding to allow alias-free convolution.
+ ///< Address-aligned.
+ ///<
+
+ CDSPFIRFilter()
+ : RefCount( 1 )
+ {
+ }
+
+ /**
+ * Function builds filter kernel based on the "Req" parameters.
+ *
+ * @param ExtAttenCorrs External attentuation correction table, for
+ * internal use.
+ */
+
+ void buildLPFilter( const double* const ExtAttenCorrs )
+ {
+ const double tb = ReqTransBand * 0.01;
+ double pwr;
+ double fo1;
+ double hl;
+ double atten = -ReqAtten;
+
+ if( tb >= 0.25 )
+ {
+ if( ReqAtten >= 117.0 )
+ {
+ atten -= 1.60;
+ }
+ else
+ if( ReqAtten >= 60.0 )
+ {
+ atten -= 1.91;
+ }
+ else
+ {
+ atten -= 2.25;
+ }
+ }
+ else
+ if( tb >= 0.10 )
+ {
+ if( ReqAtten >= 117.0 )
+ {
+ atten -= 0.69;
+ }
+ else
+ if( ReqAtten >= 60.0 )
+ {
+ atten -= 0.73;
+ }
+ else
+ {
+ atten -= 1.13;
+ }
+ }
+ else
+ {
+ if( ReqAtten >= 117.0 )
+ {
+ atten -= 0.21;
+ }
+ else
+ if( ReqAtten >= 60.0 )
+ {
+ atten -= 0.25;
+ }
+ else
+ {
+ atten -= 0.36;
+ }
+ }
+
+ static const int AttenCorrCount = 264;
+ static const double AttenCorrMin = 49.0;
+ static const double AttenCorrDiff = 176.25;
+ int AttenCorr = (int) floor(( -atten - AttenCorrMin ) *
+ AttenCorrCount / AttenCorrDiff + 0.5 );
+
+ AttenCorr = min( AttenCorrCount, max( 0, AttenCorr ));
+
+ if( ExtAttenCorrs != NULL )
+ {
+ atten -= ExtAttenCorrs[ AttenCorr ];
+ }
+ else
+ if( tb >= 0.25 )
+ {
+ static const double AttenCorrScale = 101.0;
+ static const signed char AttenCorrs[] = {
+ -127, -127, -125, -125, -122, -119, -115, -110, -104, -97,
+ -91, -82, -75, -24, -16, -6, 4, 14, 24, 29, 30, 32, 37, 44,
+ 51, 57, 63, 67, 65, 50, 53, 56, 58, 60, 63, 64, 66, 68, 74,
+ 77, 78, 78, 78, 79, 79, 60, 60, 60, 61, 59, 52, 47, 41, 36,
+ 30, 24, 17, 9, 0, -8, -10, -11, -14, -13, -18, -25, -31, -38,
+ -44, -50, -57, -63, -68, -74, -81, -89, -96, -101, -104, -107,
+ -109, -110, -86, -84, -85, -82, -80, -77, -73, -67, -62, -55,
+ -48, -42, -35, -30, -20, -11, -2, 5, 6, 6, 7, 11, 16, 21, 26,
+ 34, 41, 46, 49, 52, 55, 56, 48, 49, 51, 51, 52, 52, 52, 52,
+ 52, 51, 51, 50, 47, 47, 50, 48, 46, 42, 38, 35, 31, 27, 24,
+ 20, 16, 12, 11, 12, 10, 8, 4, -1, -6, -11, -16, -19, -17, -21,
+ -24, -27, -32, -34, -37, -38, -40, -41, -40, -40, -42, -41,
+ -44, -45, -43, -41, -34, -31, -28, -24, -21, -18, -14, -10,
+ -5, -1, 2, 5, 8, 7, 4, 3, 2, 2, 4, 6, 8, 9, 9, 10, 10, 10, 10,
+ 9, 8, 9, 11, 14, 13, 12, 11, 10, 8, 7, 6, 5, 3, 2, 2, -1, -1,
+ -3, -3, -4, -4, -5, -4, -6, -7, -9, -5, -1, -1, 0, 1, 0, -2,
+ -3, -4, -5, -5, -8, -13, -13, -13, -12, -13, -12, -11, -11,
+ -9, -8, -7, -5, -3, -1, 2, 4, 6, 9, 10, 11, 14, 18, 21, 24,
+ 27, 30, 34, 37, 37, 39, 40 };
+
+ atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+ }
+ else
+ if( tb >= 0.10 )
+ {
+ static const double AttenCorrScale = 210.0;
+ static const signed char AttenCorrs[] = {
+ -113, -118, -122, -125, -126, -97, -95, -92, -92, -89, -82,
+ -75, -69, -48, -42, -36, -30, -22, -14, -5, -2, 1, 6, 13, 22,
+ 28, 35, 41, 48, 55, 56, 56, 61, 65, 71, 77, 81, 83, 85, 85,
+ 74, 74, 73, 72, 71, 70, 68, 64, 59, 56, 49, 52, 46, 42, 36,
+ 32, 26, 20, 13, 7, -2, -6, -10, -15, -20, -27, -33, -38, -44,
+ -43, -48, -53, -57, -63, -69, -73, -75, -79, -81, -74, -76,
+ -77, -77, -78, -81, -80, -80, -78, -76, -65, -62, -59, -56,
+ -51, -48, -44, -38, -33, -25, -19, -13, -5, -1, 2, 7, 13, 17,
+ 21, 25, 30, 35, 40, 45, 50, 53, 56, 57, 55, 58, 59, 62, 64,
+ 67, 67, 68, 68, 62, 61, 61, 59, 59, 57, 57, 55, 52, 48, 42,
+ 38, 35, 31, 26, 20, 15, 13, 10, 7, 3, -2, -8, -13, -17, -23,
+ -28, -34, -37, -40, -41, -45, -48, -50, -53, -57, -59, -62,
+ -63, -63, -57, -57, -56, -56, -54, -54, -53, -49, -48, -41,
+ -38, -33, -31, -26, -23, -18, -12, -9, -7, -7, -3, 0, 5, 9,
+ 14, 16, 20, 22, 21, 23, 25, 27, 28, 29, 34, 33, 35, 33, 31,
+ 30, 29, 29, 26, 26, 25, 24, 20, 19, 15, 10, 8, 4, 1, -2, -6,
+ -10, -16, -19, -23, -26, -27, -30, -34, -39, -43, -47, -51,
+ -52, -54, -56, -58, -59, -62, -63, -66, -65, -65, -64, -59,
+ -57, -54, -52, -48, -44, -42, -37, -32, -22, -17, -10, -3, 5,
+ 13, 22, 30, 40, 50, 60, 72 };
+
+ atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+ }
+ else
+ {
+ static const double AttenCorrScale = 196.0;
+ static const signed char AttenCorrs[] = {
+ -15, -17, -20, -20, -20, -21, -20, -16, -17, -18, -17, -13,
+ -12, -11, -9, -7, -5, -4, -1, 1, 3, 4, 5, 6, 7, 9, 9, 10, 10,
+ 10, 11, 11, 11, 12, 12, 12, 10, 11, 10, 10, 8, 10, 11, 10, 11,
+ 11, 13, 14, 15, 19, 27, 26, 23, 18, 14, 8, 4, -2, -6, -12,
+ -17, -23, -28, -33, -37, -42, -46, -49, -53, -57, -60, -61,
+ -64, -65, -67, -66, -66, -66, -65, -64, -61, -59, -56, -52,
+ -48, -42, -38, -31, -27, -19, -13, -7, -1, 8, 14, 22, 29, 37,
+ 45, 52, 59, 66, 73, 80, 86, 91, 96, 100, 104, 108, 111, 114,
+ 115, 117, 118, 120, 120, 118, 117, 114, 113, 111, 107, 103,
+ 99, 95, 89, 84, 78, 72, 66, 60, 52, 44, 37, 30, 21, 14, 6, -3,
+ -11, -18, -26, -34, -43, -51, -58, -65, -73, -78, -85, -90,
+ -97, -102, -107, -113, -115, -118, -121, -125, -125, -126,
+ -126, -126, -125, -124, -121, -119, -115, -111, -109, -101,
+ -102, -95, -88, -81, -73, -67, -63, -54, -47, -40, -33, -26,
+ -18, -11, -5, 2, 8, 14, 19, 25, 31, 36, 37, 43, 47, 49, 51,
+ 52, 57, 57, 56, 57, 58, 58, 58, 57, 56, 52, 52, 50, 48, 44,
+ 41, 39, 37, 33, 31, 26, 24, 21, 18, 14, 11, 8, 4, 2, -2, -5,
+ -7, -9, -11, -13, -15, -16, -18, -19, -20, -23, -24, -24, -25,
+ -27, -26, -27, -29, -30, -31, -32, -35, -36, -39, -40, -44,
+ -46, -51, -54, -59, -63, -69, -76, -83, -91, -98 };
+
+ atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+ }
+
+ pwr = 7.43932822146293e-8 * sqr( atten ) + 0.000102747434588003 *
+ cos( 0.00785021930010397 * atten ) * cos( 0.633854318781239 +
+ 0.103208573657699 * atten ) - 0.00798132247867036 -
+ 0.000903555213543865 * atten - 0.0969365532127236 * exp(
+ 0.0779275237937911 * atten ) - 1.37304948662012e-5 * atten * cos(
+ 0.00785021930010397 * atten );
+
+ if( pwr <= 0.067665322581 )
+ {
+ if( tb >= 0.25 )
+ {
+ hl = 2.6778150875894 / tb + 300.547590563091 * atan( atan(
+ 2.68959772209918 * pwr )) / ( 5.5099277187035 * tb - tb *
+ tanh( cos( asinh( atten ))));
+
+ fo1 = 0.987205355829873 * tb + 1.00011788929851 * atan2(
+ -0.321432067051302 - 6.19131357321578 * sqrt( pwr ),
+ hl + -1.14861472207245 / ( hl - 14.1821147585957 ) + pow(
+ 0.9521145021664, pow( atan2( 1.12018764830637, tb ),
+ 2.10988901686912 * hl - 20.9691278378345 )));
+ }
+ else
+ if( tb >= 0.10 )
+ {
+ hl = ( 1.56688617018066 + 142.064321294568 * pwr +
+ 0.00419441117131136 * cos( 243.633511747297 * pwr ) -
+ 0.022953443903576 * atten - 0.026629568860284 * cos(
+ 127.715550622571 * pwr )) / tb;
+
+ fo1 = 0.982299356642411 * tb + 0.999441744774215 * asinh((
+ -0.361783054039583 - 5.80540593623676 * sqrt( pwr )) /
+ hl );
+ }
+ else
+ {
+ hl = ( 2.45739657014937 + 269.183679500541 * pwr * cos(
+ 5.73225668178813 + atan2( cosh( 0.988861169868941 -
+ 17.2201556280744 * pwr ), 1.08340138240431 * pwr ))) / tb;
+
+ fo1 = 2.291956939 * tb + 0.01942450693 * sqr( tb ) * hl -
+ 4.67538973161837 * pwr * tb - 1.668433124 * tb *
+ pow( pwr, pwr );
+ }
+ }
+ else
+ {
+ if( tb >= 0.25 )
+ {
+ hl = ( 1.50258368698213 + 158.556968859477 * asinh( pwr ) *
+ tanh( 57.9466246871383 * tanh( pwr )) -
+ 0.0105440479814834 * atten ) / tb;
+
+ fo1 = 0.994024401639321 * tb + ( -0.236282717577215 -
+ 6.8724924545387 * sqrt( sin( pwr ))) / hl;
+ }
+ else
+ if( tb >= 0.10 )
+ {
+ hl = ( 1.50277377248945 + 158.222625721046 * asinh( pwr ) *
+ tanh( 1.02875299001715 + 42.072277322604 * pwr ) -
+ 0.0108380943845632 * atten ) / tb;
+
+ fo1 = 0.992539376734551 * tb + ( -0.251747813037178 -
+ 6.74159892452584 * sqrt( tanh( tanh( tan( pwr ))))) / hl;
+ }
+ else
+ {
+ hl = ( 1.15990238966306 * pwr - 5.02124037125213 * sqr(
+ pwr ) - 0.158676856669827 * atten * cos( 1.1609073390614 *
+ pwr - 6.33932586197475 * pwr * sqr( pwr ))) / tb;
+
+ fo1 = 0.867344453126885 * tb + 0.052693817907757 * tb * log(
+ pwr ) + 0.0895511178735932 * tb * atan( 59.7538527741309 *
+ pwr ) - 0.0745653568081453 * pwr * tb;
+ }
+ }
+
+ double WinParams[ 2 ];
+ WinParams[ 0 ] = 125.0;
+ WinParams[ 1 ] = pwr;
+
+ CDSPSincFilterGen sinc;
+ sinc.Len2 = 0.25 * hl / ReqNormFreq;
+ sinc.Freq1 = 0.0;
+ sinc.Freq2 = R8B_PI * ( 1.0 - fo1 ) * ReqNormFreq;
+ sinc.initBand( CDSPSincFilterGen :: wftKaiser, WinParams, true );
+
+ KernelLen = sinc.KernelLen;
+ BlockLenBits = getBitOccupancy( KernelLen - 1 ) + R8B_EXTFFT;
+ const int BlockLen = 1 << BlockLenBits;
+
+ KernelBlock.alloc( BlockLen * 2 );
+ sinc.generateBand( &KernelBlock[ 0 ],
+ &CDSPSincFilterGen :: calcWindowKaiser );
+
+ if( ReqPhase == fprLinearPhase )
+ {
+ IsZeroPhase = true;
+ Latency = sinc.fl2;
+ LatencyFrac = 0.0;
+ }
+ else
+ {
+ IsZeroPhase = false;
+ double DCGroupDelay;
+
+ calcMinPhaseTransform( &KernelBlock[ 0 ], KernelLen, 16, false,
+ &DCGroupDelay );
+
+ Latency = (int) DCGroupDelay;
+ LatencyFrac = DCGroupDelay - Latency;
+ }
+
+ CDSPRealFFTKeeper ffto( BlockLenBits + 1 );
+
+ if( IsZeroPhase )
+ {
+ // Calculate DC gain.
+
+ double s = 0.0;
+ int i;
+
+ for( i = 0; i < KernelLen; i++ )
+ {
+ s += KernelBlock[ i ];
+ }
+
+ s = ffto -> getInvMulConst() * ReqGain / s;
+
+ // Time-shift the filter so that zero-phase response is produced.
+ // Simultaneously multiply by "s".
+
+ for( i = 0; i <= sinc.fl2; i++ )
+ {
+ KernelBlock[ i ] = KernelBlock[ sinc.fl2 + i ] * s;
+ }
+
+ for( i = 1; i <= sinc.fl2; i++ )
+ {
+ KernelBlock[ BlockLen * 2 - i ] = KernelBlock[ i ];
+ }
+
+ memset( &KernelBlock[ sinc.fl2 + 1 ], 0,
+ ( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
+
+ ffto -> forward( KernelBlock );
+ ffto -> convertToZP( KernelBlock );
+ }
+ else
+ {
+ normalizeFIRFilter( &KernelBlock[ 0 ], KernelLen,
+ ffto -> getInvMulConst() * ReqGain );
+
+ memset( &KernelBlock[ KernelLen ], 0,
+ ( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
+
+ ffto -> forward( KernelBlock );
+ }
+
+ R8BCONSOLE( "CDSPFIRFilter: flt_len=%i latency=%i nfreq=%.4f "
+ "tb=%.1f att=%.1f gain=%.3f\n", KernelLen, Latency,
+ ReqNormFreq, ReqTransBand, ReqAtten, ReqGain );
+ }
+};
+
+/**
+ * @brief FIR filter cache class.
+ *
+ * Class that implements cache for calculated FIR filters. The required FIR
+ * filter should be obtained via the getLPFilter() static function.
+ */
+
+class CDSPFIRFilterCache : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPFIRFilterCache );
+
+ friend class CDSPFIRFilter;
+
+public:
+ /**
+ * @return The number of filters present in the cache now. This value can
+ * be monitored for debugging "forgotten" filters.
+ */
+
+ static int getObjCount()
+ {
+ R8BSYNC( StateSync );
+
+ return( ObjCount );
+ }
+
+ /**
+ * Function calculates or returns reference to a previously calculated
+ * (cached) low-pass FIR filter. Note that the real transition band and
+ * attenuation achieved by the filter varies with the magnitude of the
+ * required attenuation, and are never 100% exact.
+ *
+ * @param ReqNormFreq Required normalized frequency, in the range 0 to 1,
+ * inclusive. This is the point after which the stop-band spans.
+ * @param ReqTransBand Required transition band, in percent of the
+ * 0 to ReqNormFreq spectral bandwidth, in the range
+ * CDSPFIRFilter::getLPMinTransBand() to
+ * CDSPFIRFilter::getLPMaxTransBand(), inclusive. The transition band
+ * specifies the part of the spectrum between the -3 dB and ReqNormFreq
+ * points. The real resulting -3 dB point varies in the range from -3.00
+ * to -3.05 dB, but is generally very close to -3 dB.
+ * @param ReqAtten Required stop-band attenuation in decibel, in the range
+ * CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
+ * inclusive. Note that the actual stop-band attenuation of the resulting
+ * filter may be 0.40-4.46 dB higher.
+ * @param ReqPhase Required filter's phase response.
+ * @param ReqGain Required overall filter's gain (1.0 for unity gain).
+ * @param AttenCorrs Attentuation correction table, to pass to the filter
+ * generation function. For internal use.
+ * @return A reference to a new or a previously calculated low-pass FIR
+ * filter object with the required characteristics. A reference count is
+ * incremented in the returned filter object which should be released
+ * after use via the CDSPFIRFilter::unref() function.
+ */
+
+ static CDSPFIRFilter& getLPFilter( const double ReqNormFreq,
+ const double ReqTransBand, const double ReqAtten,
+ const EDSPFilterPhaseResponse ReqPhase, const double ReqGain,
+ const double* const AttenCorrs = NULL )
+ {
+ R8BASSERT( ReqNormFreq > 0.0 && ReqNormFreq <= 1.0 );
+ R8BASSERT( ReqTransBand >= CDSPFIRFilter :: getLPMinTransBand() );
+ R8BASSERT( ReqTransBand <= CDSPFIRFilter :: getLPMaxTransBand() );
+ R8BASSERT( ReqAtten >= CDSPFIRFilter :: getLPMinAtten() );
+ R8BASSERT( ReqAtten <= CDSPFIRFilter :: getLPMaxAtten() );
+ R8BASSERT( ReqGain > 0.0 );
+
+ R8BSYNC( StateSync );
+
+ CDSPFIRFilter* PrevObj = NULL;
+ CDSPFIRFilter* CurObj = Objects;
+
+ while( CurObj != NULL )
+ {
+ if( CurObj -> ReqNormFreq == ReqNormFreq &&
+ CurObj -> ReqTransBand == ReqTransBand &&
+ CurObj -> ReqAtten == ReqAtten &&
+ CurObj -> ReqPhase == ReqPhase &&
+ CurObj -> ReqGain == ReqGain )
+ {
+ break;
+ }
+
+ if( CurObj -> Next == NULL && ObjCount >= R8B_FILTER_CACHE_MAX )
+ {
+ if( CurObj -> RefCount == 0 )
+ {
+ // Delete the last filter which is not used.
+
+ PrevObj -> Next = NULL;
+ delete CurObj;
+ ObjCount--;
+ }
+ else
+ {
+ // Move the last filter to the top of the list since it
+ // seems to be in use for a long time.
+
+ PrevObj -> Next = NULL;
+ CurObj -> Next = Objects.unkeep();
+ Objects = CurObj;
+ }
+
+ CurObj = NULL;
+ break;
+ }
+
+ PrevObj = CurObj;
+ CurObj = CurObj -> Next;
+ }
+
+ if( CurObj != NULL )
+ {
+ CurObj -> RefCount++;
+
+ if( PrevObj == NULL )
+ {
+ return( *CurObj );
+ }
+
+ // Remove the filter from the list temporarily.
+
+ PrevObj -> Next = CurObj -> Next;
+ }
+ else
+ {
+ // Create a new filter object (with RefCount == 1) and build the
+ // filter kernel.
+
+ CurObj = new CDSPFIRFilter();
+ CurObj -> ReqNormFreq = ReqNormFreq;
+ CurObj -> ReqTransBand = ReqTransBand;
+ CurObj -> ReqAtten = ReqAtten;
+ CurObj -> ReqPhase = ReqPhase;
+ CurObj -> ReqGain = ReqGain;
+ ObjCount++;
+
+ CurObj -> buildLPFilter( AttenCorrs );
+ }
+
+ // Insert the filter at the start of the list.
+
+ CurObj -> Next = Objects.unkeep();
+ Objects = CurObj;
+
+ return( *CurObj );
+ }
+
+private:
+ static CSyncObject StateSync; ///< Cache state synchronizer.
+ ///<
+ static CPtrKeeper< CDSPFIRFilter* > Objects; ///< The chain of cached
+ ///< objects.
+ ///<
+ static int ObjCount; ///< The number of objects currently preset in the
+ ///< cache.
+ ///<
+};
+
+// ---------------------------------------------------------------------------
+// CDSPFIRFilter PUBLIC
+// ---------------------------------------------------------------------------
+
+inline void CDSPFIRFilter :: unref()
+{
+ R8BSYNC( CDSPFIRFilterCache :: StateSync );
+
+ RefCount--;
+}
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPFIRFILTER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
new file mode 100644
index 00000000..9fb770c1
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
@@ -0,0 +1,1180 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPFracInterpolator.h
+ *
+ * @brief Fractional delay interpolator and filter bank classes.
+ *
+ * This file includes fractional delay interpolator class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPFRACINTERPOLATOR_INCLUDED
+#define R8B_CDSPFRACINTERPOLATOR_INCLUDED
+
+#include "CDSPSincFilterGen.h"
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+#if R8B_FLTTEST
+ extern int InterpFilterFracs; ///< Force this number of fractional filter
+ ///< positions. -1 - use default.
+ ///<
+#endif // R8B_FLTTEST
+
+/**
+ * @brief Sinc function-based fractional delay filter bank class.
+ *
+ * Class implements storage and initialization of a bank of sinc-based
+ * fractional delay filters, expressed as 0th, 1st, 2nd or 3rd order
+ * polynomial interpolation coefficients. The filters are windowed by the
+ * "Kaiser" power-raised window function.
+ */
+
+class CDSPFracDelayFilterBank : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPFracDelayFilterBank );
+
+ friend class CDSPFracDelayFilterBankCache;
+
+public:
+ /**
+ * Constructor.
+ *
+ * @param aFilterFracs The number of fractional delay positions to sample,
+ * -1 - use default.
+ * @param aElementSize The size of each filter's tap, in "double" values.
+ * This parameter corresponds to the complexity of interpolation. 4 should
+ * be set for 3rd order, 3 for 2nd order, 2 for linear interpolation, 1
+ * for whole-numbered stepping.
+ * @param aInterpPoints The number of points the interpolation is based
+ * on. This value should not be confused with the ElementSize. Set to 2
+ * for linear or no interpolation.
+ * @param aReqAtten Required filter attentuation.
+ * @param aIsThird "True" if one-third filter is required.
+ */
+
+ CDSPFracDelayFilterBank( const int aFilterFracs, const int aElementSize,
+ const int aInterpPoints, const double aReqAtten, const bool aIsThird )
+ : InitFilterFracs( aFilterFracs )
+ , ElementSize( aElementSize )
+ , InterpPoints( aInterpPoints )
+ , ReqAtten( aReqAtten )
+ , IsThird( aIsThird )
+ , Next( NULL )
+ , RefCount( 1 )
+ {
+ R8BASSERT( ElementSize >= 1 && ElementSize <= 4 );
+
+ // Kaiser window function Params, for half and third-band.
+
+ const double* const Params = getWinParams( ReqAtten, IsThird,
+ FilterLen );
+
+ FilterSize = FilterLen * ElementSize;
+
+ if( InitFilterFracs == -1 )
+ {
+ FilterFracs = (int) ceil( pow( 6.4, ReqAtten / 50.0 ));
+
+ #if R8B_FLTTEST
+
+ if( InterpFilterFracs != -1 )
+ {
+ FilterFracs = InterpFilterFracs;
+ }
+
+ #endif // R8B_FLTTEST
+ }
+ else
+ {
+ FilterFracs = InitFilterFracs;
+ }
+
+ Table.alloc( FilterSize * ( FilterFracs + InterpPoints ));
+
+ CDSPSincFilterGen sinc;
+ sinc.Len2 = FilterLen / 2;
+
+ double* p = Table;
+ const int pc2 = InterpPoints / 2;
+ int i;
+
+ for( i = -pc2 + 1; i <= FilterFracs + pc2; i++ )
+ {
+ sinc.FracDelay = (double) ( FilterFracs - i ) / FilterFracs;
+ sinc.initFrac( CDSPSincFilterGen :: wftKaiser, Params, true );
+ sinc.generateFrac( p, &CDSPSincFilterGen :: calcWindowKaiser,
+ ElementSize );
+
+ normalizeFIRFilter( p, FilterLen, 1.0, ElementSize );
+ p += FilterSize;
+ }
+
+ const int TablePos2 = FilterSize;
+ const int TablePos3 = FilterSize * 2;
+ const int TablePos4 = FilterSize * 3;
+ const int TablePos5 = FilterSize * 4;
+ const int TablePos6 = FilterSize * 5;
+ const int TablePos7 = FilterSize * 6;
+ const int TablePos8 = FilterSize * 7;
+ double* const TableEnd = Table + ( FilterFracs + 1 ) * FilterSize;
+ p = Table;
+
+ if( InterpPoints == 8 )
+ {
+ if( ElementSize == 3 )
+ {
+ // Calculate 2nd order spline (polynomial) interpolation
+ // coefficients using 8 points.
+
+ while( p < TableEnd )
+ {
+ calcSpline2p8Coeffs( p, p[ 0 ], p[ TablePos2 ],
+ p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ],
+ p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]);
+
+ p += ElementSize;
+ }
+
+ #if defined( R8B_SIMD_ISH )
+ shuffle2_3( Table, TableEnd );
+ #endif // SIMD
+ }
+ else
+ if( ElementSize == 4 )
+ {
+ // Calculate 3rd order spline (polynomial) interpolation
+ // coefficients using 8 points.
+
+ while( p < TableEnd )
+ {
+ calcSpline3p8Coeffs( p, p[ 0 ], p[ TablePos2 ],
+ p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ],
+ p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]);
+
+ p += ElementSize;
+ }
+
+ #if defined( R8B_SIMD_ISH )
+ shuffle2_4( Table, TableEnd );
+ #endif // SIMD
+ }
+ }
+ else
+ {
+ if( ElementSize == 2 )
+ {
+ // Calculate linear interpolation coefficients.
+
+ while( p < TableEnd )
+ {
+ p[ 1 ] = p[ TablePos2 ] - p[ 0 ];
+ p += ElementSize;
+ }
+
+ #if defined( R8B_SIMD_ISH )
+ shuffle2_2( Table, TableEnd );
+ #endif // SIMD
+ }
+ }
+
+ R8BCONSOLE( "CDSPFracDelayFilterBank: fracs=%i order=%i taps=%i "
+ "att=%.1f third=%i\n", FilterFracs, ElementSize - 1, FilterLen,
+ ReqAtten, (int) IsThird );
+ }
+
+ ~CDSPFracDelayFilterBank()
+ {
+ delete Next;
+ }
+
+ /**
+ * Function "rounds" the specified attenuation to the nearest effective
+ * value.
+ *
+ * @param[in,out] att Required filter attentuation. Will be rounded to the
+ * nearest value.
+ * @param aIsThird "True" if one-third filter is required.
+ */
+
+ static void roundReqAtten( double& att, const bool aIsThird )
+ {
+ int tmp;
+ getWinParams( att, aIsThird, tmp );
+ }
+
+ /**
+ * Function returns the length of the filter. Always an even number, not
+ * less than 6.
+ */
+
+ int getFilterLen() const
+ {
+ return( FilterLen );
+ }
+
+ /**
+ * Function returns the number of fractional positions sampled by the
+ * bank.
+ */
+
+ int getFilterFracs() const
+ {
+ return( FilterFracs );
+ }
+
+ /**
+ * @param i Filter index, in the range 0 to FilterFracs, inclusive.
+ * @return Reference to the filter.
+ */
+
+ const double& operator []( const int i ) const
+ {
+ R8BASSERT( i >= 0 && i <= FilterFracs );
+
+ return( Table[ i * FilterSize ]);
+ }
+
+ /**
+ * This function should be called when the filter bank obtained via the
+ * filter bank cache is no longer needed.
+ */
+
+ void unref();
+
+private:
+ int FilterLen; ///< Filter length. Always an even number, not less than 6.
+ ///<
+ int FilterFracs; ///< Fractional position count.
+ ///<
+ int InitFilterFracs; ///< Fractional position count as supplied to the
+ ///< constructor, may equal -1.
+ ///<
+ int ElementSize; ///< Filter element size.
+ ///<
+ int InterpPoints; ///< Interpolation points to use.
+ ///<
+ double ReqAtten; ///< Filter's attentuation.
+ ///<
+ bool IsThird; ///< "True" if one-third filter is in use.
+ ///<
+ int FilterSize; ///< This constant specifies the "size" of a single filter
+ ///< in "double" elements.
+ ///<
+ CFixedBuffer< double > Table; ///< The table of fractional delay filters
+ ///< for all discrete fractional x = 0..1 sample positions, and
+ ///< interpolation coefficients.
+ ///<
+ CDSPFracDelayFilterBank* Next; ///< Next filter bank in cache's list.
+ ///<
+ int RefCount; ///< The number of references made to *this filter bank.
+ ///< Not considered for "static" filter bank objects.
+ ///<
+
+ /**
+ * Function returns windowing function parameters for the specified
+ * attenuation and filter type.
+ *
+ * @param[in,out] att Required filter attentuation. Will be rounded to the
+ * nearest value.
+ * @param aIsThird "True" if one-third filter is required.
+ * @param[out] fltlen Resulting filter length.
+ */
+
+ static const double* getWinParams( double& att, const bool aIsThird,
+ int& fltlen )
+ {
+ static const int Coeffs2Base = 8;
+ static const int Coeffs2Count = 12;
+ static const double Coeffs2[ Coeffs2Count ][ 3 ] = {
+ { 4.1308468534586913, 1.1752580009977263, 55.5446 }, // 0.0256
+ { 4.4241520324148826, 1.8004881791443044, 81.4191 }, // 0.0886
+ { 5.2615232289173663, 1.8133318236025469, 96.3392 }, // 0.0481
+ { 5.9433751227216174, 1.8730186391986436, 111.1315 }, // 0.0264
+ { 6.8308658290513815, 1.8549555110340281, 125.4653 }, // 0.0146
+ { 7.6648458290312904, 1.8565766090828464, 139.7379 }, // 0.0081
+ { 8.2038728664307605, 1.9269521820570166, 154.0532 }, // 0.0045
+ { 8.7865150946655142, 1.9775307667441668, 168.2101 }, // 0.0025
+ { 9.5945017884101773, 1.9718456992078597, 182.1076 }, // 0.0014
+ { 10.5163141145985240, 1.9504067820201083, 195.5668 }, // 0.0008
+ { 10.2382465206362470, 2.1608923446870087, 209.0610 }, // 0.0004
+ { 10.9976060250714000, 2.1536533525688935, 222.5010 }, // 0.0003
+ };
+
+ static const int Coeffs3Base = 6;
+ static const int Coeffs3Count = 10;
+ static const double Coeffs3[ Coeffs3Count ][ 3 ] = {
+ { 3.9888564562781847, 1.5869927184268915, 66.5701 }, // 0.0467
+ { 4.6986694038145007, 1.8086068597928262, 86.4715 }, // 0.0136
+ { 5.5995071329337822, 1.8930163360942349, 106.1195 }, // 0.0040
+ { 6.3627287800257228, 1.9945748322093975, 125.2307 }, // 0.0012
+ { 7.4299550711428308, 1.9893400572347544, 144.3469 }, // 0.0004
+ { 8.0667715944075642, 2.0928201458699909, 163.4099 }, // 0.0001
+ { 8.7469970226288822, 2.1640279784268355, 181.0694 }, // 0.0000
+ { 10.0823430069835230, 2.0896678025321922, 199.2880 }, // 0.0000
+ { 10.9222206090489510, 2.1221681162186004, 216.6865 }, // 0.0000
+ { 21.2017743894772010, 1.1856768080118900, 233.9188 }, // 0.0000
+ };
+
+ const double* Params;
+ int i = 0;
+
+ if( aIsThird )
+ {
+ while( i != Coeffs3Count - 1 && Coeffs3[ i ][ 2 ] < att )
+ {
+ i++;
+ }
+
+ Params = &Coeffs3[ i ][ 0 ];
+ att = Coeffs3[ i ][ 2 ];
+ fltlen = Coeffs3Base + i * 2;
+ }
+ else
+ {
+ while( i != Coeffs2Count - 1 && Coeffs2[ i ][ 2 ] < att )
+ {
+ i++;
+ }
+
+ Params = &Coeffs2[ i ][ 0 ];
+ att = Coeffs2[ i ][ 2 ];
+ fltlen = Coeffs2Base + i * 2;
+ }
+
+ return( Params );
+ }
+
+ /**
+ * Function shuffles 2 order-2 filter points for SIMD operation.
+ *
+ * @param p Filter table start pointer.
+ * @param pe Filter table end pointer.
+ */
+
+ static void shuffle2_2( double* p, double* const pe )
+ {
+ while( p != pe )
+ {
+ const double t = p[ 2 ];
+ p[ 2 ] = p[ 1 ];
+ p[ 1 ] = t;
+
+ p += 4;
+ }
+ }
+
+ /**
+ * Function shuffles 2 order-3 filter points for SIMD operation.
+ *
+ * @param p Filter table start pointer.
+ * @param pe Filter table end pointer.
+ */
+
+ static void shuffle2_3( double* p, double* const pe )
+ {
+ while( p != pe )
+ {
+ const double t1 = p[ 1 ];
+ const double t2 = p[ 2 ];
+ const double t3 = p[ 3 ];
+ const double t4 = p[ 4 ];
+ p[ 1 ] = t3;
+ p[ 2 ] = t1;
+ p[ 3 ] = t4;
+ p[ 4 ] = t2;
+
+ p += 6;
+ }
+ }
+
+ /**
+ * Function shuffles 2 order-4 filter points for SIMD operation.
+ *
+ * @param p Filter table start pointer.
+ * @param pe Filter table end pointer.
+ */
+
+ static void shuffle2_4( double* p, double* const pe )
+ {
+ while( p != pe )
+ {
+ const double t1 = p[ 1 ];
+ const double t2 = p[ 2 ];
+ const double t3 = p[ 3 ];
+ const double t4 = p[ 4 ];
+ const double t5 = p[ 5 ];
+ const double t6 = p[ 6 ];
+ p[ 1 ] = t4;
+ p[ 2 ] = t1;
+ p[ 3 ] = t5;
+ p[ 4 ] = t2;
+ p[ 5 ] = t6;
+ p[ 6 ] = t3;
+
+ p += 8;
+ }
+ }
+};
+
+/**
+ * @brief Fractional delay filter cache class.
+ *
+ * Class implements cache storage of fractional delay filter banks.
+ */
+
+class CDSPFracDelayFilterBankCache : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPFracDelayFilterBankCache );
+
+ friend class CDSPFracDelayFilterBank;
+
+public:
+ /**
+ * @return The number of filters present in the cache now. This value can
+ * be monitored for debugging "forgotten" filters.
+ */
+
+ static int getObjCount()
+ {
+ R8BSYNC( StateSync );
+
+ return( ObjCount );
+ }
+
+ /**
+ * Function calculates or returns reference to a previously calculated
+ * (cached) fractional delay filter bank.
+ *
+ * @param aFilterFracs The number of fractional delay positions to sample,
+ * -1 - use default.
+ * @param aElementSize The size of each filter's tap, in "double" values.
+ * @param aInterpPoints The number of points the interpolation is based
+ * on.
+ * @param ReqAtten Required filter attentuation.
+ * @param IsThird "True" if one-third filter is required.
+ * @param IsStatic "True" if a permanent static filter should be returned
+ * that is never removed from the cache until application terminates.
+ */
+
+ static CDSPFracDelayFilterBank& getFilterBank( const int aFilterFracs,
+ const int aElementSize, const int aInterpPoints,
+ double ReqAtten, const bool IsThird, const bool IsStatic )
+ {
+ CDSPFracDelayFilterBank :: roundReqAtten( ReqAtten, IsThird );
+
+ R8BSYNC( StateSync );
+
+ if( IsStatic )
+ {
+ CDSPFracDelayFilterBank* CurObj = StaticObjects;
+
+ while( CurObj != NULL )
+ {
+ if( CurObj -> InitFilterFracs == aFilterFracs &&
+ CurObj -> ElementSize == aElementSize &&
+ CurObj -> InterpPoints == aInterpPoints &&
+ CurObj -> ReqAtten == ReqAtten &&
+ CurObj -> IsThird == IsThird )
+ {
+ return( *CurObj );
+ }
+
+ CurObj = CurObj -> Next;
+ }
+
+ // Create a new filter bank and build it.
+
+ CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize,
+ aInterpPoints, ReqAtten, IsThird );
+
+ // Insert the bank at the start of the list.
+
+ CurObj -> Next = StaticObjects.unkeep();
+ StaticObjects = CurObj;
+
+ return( *CurObj );
+ }
+
+ CDSPFracDelayFilterBank* PrevObj = NULL;
+ CDSPFracDelayFilterBank* CurObj = Objects;
+
+ while( CurObj != NULL )
+ {
+ if( CurObj -> InitFilterFracs == aFilterFracs &&
+ CurObj -> ElementSize == aElementSize &&
+ CurObj -> InterpPoints == aInterpPoints &&
+ CurObj -> ReqAtten == ReqAtten &&
+ CurObj -> IsThird == IsThird )
+ {
+ break;
+ }
+
+ if( CurObj -> Next == NULL && ObjCount >= R8B_FRACBANK_CACHE_MAX )
+ {
+ if( CurObj -> RefCount == 0 )
+ {
+ // Delete the last bank which is not used.
+
+ PrevObj -> Next = NULL;
+ delete CurObj;
+ ObjCount--;
+ }
+ else
+ {
+ // Move the last bank to the top of the list since it
+ // seems to be in use for a long time.
+
+ PrevObj -> Next = NULL;
+ CurObj -> Next = Objects.unkeep();
+ Objects = CurObj;
+ }
+
+ CurObj = NULL;
+ break;
+ }
+
+ PrevObj = CurObj;
+ CurObj = CurObj -> Next;
+ }
+
+ if( CurObj != NULL )
+ {
+ CurObj -> RefCount++;
+
+ if( PrevObj == NULL )
+ {
+ return( *CurObj );
+ }
+
+ // Remove the bank from the list temporarily.
+
+ PrevObj -> Next = CurObj -> Next;
+ }
+ else
+ {
+ // Create a new filter bank (with RefCount == 1) and build it.
+
+ CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize,
+ aInterpPoints, ReqAtten, IsThird );
+
+ ObjCount++;
+ }
+
+ // Insert the bank at the start of the list.
+
+ CurObj -> Next = Objects.unkeep();
+ Objects = CurObj;
+
+ return( *CurObj );
+ }
+
+private:
+ static CSyncObject StateSync; ///< Cache state synchronizer.
+ ///<
+ static CPtrKeeper< CDSPFracDelayFilterBank* > Objects; ///< The chain of
+ ///< cached objects.
+ ///<
+ static CPtrKeeper< CDSPFracDelayFilterBank* > StaticObjects; ///< The
+ ///< chain of static objects.
+ ///<
+ static int ObjCount; ///< The number of objects currently preset in the
+ ///< Objects cache.
+ ///<
+};
+
+// ---------------------------------------------------------------------------
+// CDSPFracDelayFilterBank PUBLIC
+// ---------------------------------------------------------------------------
+
+inline void CDSPFracDelayFilterBank :: unref()
+{
+ R8BSYNC( CDSPFracDelayFilterBankCache :: StateSync );
+
+ RefCount--;
+}
+
+/**
+ * @param l Number 1.
+ * @param s Number 2.
+ * @param[out] GCD Resulting GCD.
+ * @return "True" if the greatest common denominator of 2 numbers was
+ * found.
+ */
+
+inline bool findGCD( double l, double s, double& GCD )
+{
+ int it = 0;
+
+ while( it < 50 )
+ {
+ if( s <= 0.0 )
+ {
+ GCD = l;
+ return( true );
+ }
+
+ const double r = l - s;
+ l = s;
+ s = ( r < 0.0 ? -r : r );
+ it++;
+ }
+
+ return( false );
+}
+
+/**
+ * Function evaluates source and destination sample rate ratio and returns
+ * the required input and output stepping. Function returns "false" if
+ * whole stepping cannot be used to perform interpolation using these sample
+ * rates.
+ *
+ * @param SSampleRate Source sample rate.
+ * @param DSampleRate Destination sample rate.
+ * @param[out] ResInStep Resulting input step.
+ * @param[out] ResOutStep Resulting output step.
+ * @return "True" if stepping was acquired.
+ */
+
+inline bool getWholeStepping( const double SSampleRate,
+ const double DSampleRate, int& ResInStep, int& ResOutStep )
+{
+ double GCD;
+
+ if( !findGCD( SSampleRate, DSampleRate, GCD ) || GCD < 1.0 )
+ {
+ return( false );
+ }
+
+ const double InStep0 = SSampleRate / GCD;
+ ResInStep = (int) InStep0;
+ const double OutStep0 = DSampleRate / GCD;
+ ResOutStep = (int) OutStep0;
+
+ if( InStep0 != ResInStep || OutStep0 != ResOutStep )
+ {
+ return( false );
+ }
+
+ if( ResOutStep > 1500 )
+ {
+ // Do not allow large output stepping due to low cache
+ // performance of large filter banks.
+
+ return( false );
+ }
+
+ return( true );
+}
+
+/**
+ * @brief Fractional delay filter bank-based interpolator class.
+ *
+ * Class implements the fractional delay interpolator. This implementation at
+ * first puts the input signal into a ring buffer and then performs
+ * interpolation. The interpolation is performed using sinc-based fractional
+ * delay filters. These filters are contained in a bank, and for higher
+ * precision they are interpolated between adjacent filters.
+ *
+ * To increase sample timing precision, this class uses "resettable counter"
+ * approach. This gives zero overall sample timing error. With the
+ * R8B_FASTTIMING configuration option enabled, the sample timing experiences
+ * a very minor drift.
+ */
+
+class CDSPFracInterpolator : public CDSPProcessor
+{
+public:
+ /**
+ * Constructor initalizes the interpolator. It is important to call the
+ * getMaxOutLen() function afterwards to obtain the optimal output buffer
+ * length.
+ *
+ * @param aSrcSampleRate Source sample rate.
+ * @param aDstSampleRate Destination sample rate.
+ * @param ReqAtten Required filter attentuation.
+ * @param IsThird "True" if one-third filter is required.
+ * @param PrevLatency Latency, in samples (any value >=0), which was left
+ * in the output signal by a previous process. This latency will be
+ * consumed completely.
+ */
+
+ CDSPFracInterpolator( const double aSrcSampleRate,
+ const double aDstSampleRate, const double ReqAtten,
+ const bool IsThird, const double PrevLatency )
+ : SrcSampleRate( aSrcSampleRate )
+ , DstSampleRate( aDstSampleRate )
+ #if R8B_FASTTIMING
+ , FracStep( aSrcSampleRate / aDstSampleRate )
+ #endif // R8B_FASTTIMING
+ {
+ R8BASSERT( SrcSampleRate > 0.0 );
+ R8BASSERT( DstSampleRate > 0.0 );
+ R8BASSERT( PrevLatency >= 0.0 );
+ R8BASSERT( BufLenBits >= 5 );
+
+ InitFracPos = PrevLatency;
+ Latency = (int) InitFracPos;
+ InitFracPos -= Latency;
+
+ R8BASSERT( Latency >= 0 );
+
+ #if R8B_FLTTEST
+
+ IsWhole = false;
+ LatencyFrac = 0.0;
+ FilterBank = new CDSPFracDelayFilterBank( -1, 3, 8, ReqAtten,
+ IsThird );
+
+ #else // R8B_FLTTEST
+
+ IsWhole = getWholeStepping( SrcSampleRate, DstSampleRate, InStep,
+ OutStep );
+
+ if( IsWhole )
+ {
+ InitFracPosW = (int) ( InitFracPos * OutStep );
+ LatencyFrac = InitFracPos - (double) InitFracPosW / OutStep;
+ FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank(
+ OutStep, 1, 2, ReqAtten, IsThird, false );
+ }
+ else
+ {
+ LatencyFrac = 0.0;
+ FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank(
+ -1, 3, 8, ReqAtten, IsThird, true );
+ }
+
+ #endif // R8B_FLTTEST
+
+ FilterLen = FilterBank -> getFilterLen();
+ fl2 = FilterLen >> 1;
+ fll = fl2 - 1;
+ flo = fll + fl2;
+ flb = BufLen - fll;
+
+ R8BASSERT(( 1 << BufLenBits ) >= FilterLen * 3 );
+
+ static const CConvolveFn FltConvFn0[ 13 ] = {
+ &CDSPFracInterpolator :: convolve0< 6 >,
+ &CDSPFracInterpolator :: convolve0< 8 >,
+ &CDSPFracInterpolator :: convolve0< 10 >,
+ &CDSPFracInterpolator :: convolve0< 12 >,
+ &CDSPFracInterpolator :: convolve0< 14 >,
+ &CDSPFracInterpolator :: convolve0< 16 >,
+ &CDSPFracInterpolator :: convolve0< 18 >,
+ &CDSPFracInterpolator :: convolve0< 20 >,
+ &CDSPFracInterpolator :: convolve0< 22 >,
+ &CDSPFracInterpolator :: convolve0< 24 >,
+ &CDSPFracInterpolator :: convolve0< 26 >,
+ &CDSPFracInterpolator :: convolve0< 28 >,
+ &CDSPFracInterpolator :: convolve0< 30 >
+ };
+
+ convfn = ( IsWhole ? FltConvFn0[ fl2 - 3 ] :
+ &CDSPFracInterpolator :: convolve2 );
+
+ R8BCONSOLE( "CDSPFracInterpolator: src=%.2f dst=%.2f taps=%i "
+ "fracs=%i whole=%i third=%i step=%.6f\n", SrcSampleRate,
+ DstSampleRate, FilterLen, ( IsWhole ? OutStep :
+ FilterBank -> getFilterFracs() ), (int) IsWhole, (int) IsThird,
+ aSrcSampleRate / aDstSampleRate );
+
+ clear();
+ }
+
+ virtual ~CDSPFracInterpolator()
+ {
+ #if R8B_FLTTEST
+
+ delete FilterBank;
+
+ #else // R8B_FLTTEST
+
+ FilterBank -> unref();
+
+ #endif // R8B_FLTTEST
+ }
+
+ virtual int getLatency() const
+ {
+ return( 0 );
+ }
+
+ virtual double getLatencyFrac() const
+ {
+ return( LatencyFrac );
+ }
+
+ virtual int getMaxOutLen( const int MaxInLen ) const
+ {
+ R8BASSERT( MaxInLen >= 0 );
+
+ return( (int) ceil( MaxInLen * DstSampleRate / SrcSampleRate ) + 1 );
+ }
+
+ virtual void clear()
+ {
+ LatencyLeft = Latency;
+ BufLeft = 0;
+ WritePos = 0;
+ ReadPos = flb; // Set "read" position to account for filter's
+ // latency at zero fractional delay.
+
+ memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+
+ if( IsWhole )
+ {
+ InPosFracW = InitFracPosW;
+ }
+ else
+ {
+ InPosFrac = InitFracPos;
+
+ #if !R8B_FASTTIMING
+ InCounter = 0;
+ InPosInt = 0;
+ InPosShift = InitFracPos * DstSampleRate / SrcSampleRate;
+ #endif // !R8B_FASTTIMING
+ }
+ }
+
+ virtual int process( double* ip, int l, double*& op0 )
+ {
+ R8BASSERT( l >= 0 );
+ R8BASSERT( ip != op0 || l == 0 || SrcSampleRate > DstSampleRate );
+
+ if( LatencyLeft != 0 )
+ {
+ if( LatencyLeft >= l )
+ {
+ LatencyLeft -= l;
+ return( 0 );
+ }
+
+ l -= LatencyLeft;
+ ip += LatencyLeft;
+ LatencyLeft = 0;
+ }
+
+ double* op = op0;
+
+ while( l > 0 )
+ {
+ // Add new input samples to both halves of the ring buffer.
+
+ const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+ double* const wp1 = Buf + WritePos;
+ memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+ if( WritePos < flo )
+ {
+ const int c = min( b, flo - WritePos );
+ memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+ }
+
+ ip += b;
+ WritePos = ( WritePos + b ) & BufLenMask;
+ l -= b;
+ BufLeft += b;
+
+ // Produce as many output samples as possible.
+
+ op = ( *this.*convfn )( op );
+ }
+
+ #if !R8B_FASTTIMING
+
+ if( !IsWhole && InCounter > 1000 )
+ {
+ // Reset the interpolation position counter to achieve a
+ // higher sample timing precision.
+
+ InCounter = 0;
+ InPosInt = 0;
+ InPosShift = InPosFrac * DstSampleRate / SrcSampleRate;
+ }
+
+ #endif // !R8B_FASTTIMING
+
+ return( (int) ( op - op0 ));
+ }
+
+private:
+ static const int BufLenBits = 8; ///< The length of the ring buffer,
+ ///< expressed as Nth power of 2. This value can be reduced if it is
+ ///< known that only short input buffers will be passed to the
+ ///< interpolator. The minimum value of this parameter is 5, and
+ ///< 1 << BufLenBits should be at least 3 times larger than the
+ ///< FilterLen. However, this condition can be easily met if the input
+ ///< signal is suitably downsampled first before the interpolation is
+ ///< performed.
+ ///<
+ static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+ ///< buffer. The actual length is twice as long to allow "beyond max
+ ///< position" positioning.
+ ///<
+ static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+ ///< position wrapping.
+ ///<
+ int FilterLen; ///< Filter length, in taps. Even value.
+ ///<
+ int fl2; ///< Right-side (half) filter length.
+ ///<
+ int fll; ///< Input latency.
+ ///<
+ int flo; ///< Overrun length.
+ ///<
+ int flb; ///< Initial read position and maximal buffer write length.
+ ///<
+ double Buf[ BufLen + 29 ]; ///< The ring buffer, including overrun
+ ///< protection for maximal filter length.
+ ///<
+ double SrcSampleRate; ///< Source sample rate.
+ ///<
+ double DstSampleRate; ///< Destination sample rate.
+ ///<
+ bool IsWhole; ///< "True" if whole-number stepping is in use.
+ ///<
+ int InStep; ///< Input whole-number stepping.
+ ///<
+ int OutStep; ///< Output whole-number stepping (corresponds to filter bank
+ ///< size).
+ ///<
+ double InitFracPos; ///< Initial fractional position, in samples, in the
+ ///< range [0; 1).
+ ///<
+ int InitFracPosW; ///< Initial fractional position for whole-number
+ ///< stepping.
+ ///<
+ int Latency; ///< Initial latency that should be removed from the input.
+ ///<
+ double LatencyFrac; ///< Left-over fractional latency.
+ ///<
+ int BufLeft; ///< The number of samples left in the buffer to process.
+ ///<
+ int WritePos; ///< The current buffer write position. Incremented together
+ ///< with the BufLeft variable.
+ ///<
+ int ReadPos; ///< The current buffer read position.
+ ///<
+ int LatencyLeft; ///< Input latency left to remove.
+ ///<
+ double InPosFrac; ///< Interpolation position (fractional part).
+ ///<
+ int InPosFracW; ///< Interpolation position (fractional part) for
+ ///< whole-number stepping. Corresponds to the index into the filter
+ ///< bank.
+ ///<
+ CDSPFracDelayFilterBank* FilterBank; ///< Filter bank in use, may be
+ ///< whole-number stepping filter bank or static bank.
+ ///<
+#if R8B_FASTTIMING
+ double FracStep; ///< Fractional sample timing step.
+#else // R8B_FASTTIMING
+ int InCounter; ///< Interpolation step counter.
+ ///<
+ int InPosInt; ///< Interpolation position (integer part).
+ ///<
+ double InPosShift; ///< Interpolation position fractional shift.
+ ///<
+#endif // R8B_FASTTIMING
+
+ typedef double*( CDSPFracInterpolator :: *CConvolveFn )( double* op ); ///<
+ ///< Convolution function type.
+ ///<
+ CConvolveFn convfn; ///< Convolution function in use.
+ ///<
+
+ /**
+ * Convolution function for 0th order resampling.
+ *
+ * @param[out] op Output buffer.
+ * @return Advanced "op" value.
+ * @tparam fltlen Filter length, in taps.
+ */
+
+ template< int fltlen >
+ double* convolve0( double* op )
+ {
+ while( BufLeft > fl2 )
+ {
+ const double* const ftp = &(*FilterBank)[ InPosFracW ];
+ const double* const rp = Buf + ReadPos;
+ int i;
+
+ #if defined( R8B_SSE2 ) && !defined( __INTEL_COMPILER )
+
+ __m128d s = _mm_setzero_pd();
+
+ for( i = 0; i < fltlen; i += 2 )
+ {
+ const __m128d m = _mm_mul_pd( _mm_load_pd( ftp + i ),
+ _mm_loadu_pd( rp + i ));
+
+ s = _mm_add_pd( s, m );
+ }
+
+ _mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 )));
+
+ #elif defined( R8B_NEON )
+
+ float64x2_t s = vdupq_n_f64( 0.0 );
+
+ for( i = 0; i < fltlen; i += 2 )
+ {
+ s = vmlaq_f64( s, vld1q_f64( ftp + i ), vld1q_f64( rp + i ));
+ }
+
+ *op = vaddvq_f64( s );
+
+ #else // SIMD
+
+ double s = 0.0;
+
+ for( i = 0; i < fltlen; i++ )
+ {
+ s += ftp[ i ] * rp[ i ];
+ }
+
+ *op = s;
+
+ #endif // SIMD
+
+ op++;
+
+ InPosFracW += InStep;
+ const int PosIncr = InPosFracW / OutStep;
+ InPosFracW -= PosIncr * OutStep;
+
+ ReadPos = ( ReadPos + PosIncr ) & BufLenMask;
+ BufLeft -= PosIncr;
+ }
+
+ return( op );
+ }
+
+ /**
+ * Convolution function for 2nd order resampling.
+ *
+ * @param[out] op Output buffer.
+ * @return Advanced "op" value.
+ */
+
+ double* convolve2( double* op )
+ {
+ const CDSPFracDelayFilterBank& fb = *FilterBank;
+ const int fltlen = FilterLen;
+
+ while( BufLeft > fl2 )
+ {
+ double x = InPosFrac * fb.getFilterFracs();
+ const int fti = (int) x; // Function table index.
+ x -= fti; // Coefficient for interpolation between
+ // adjacent fractional delay filters.
+ const double x2d = x * x;
+ const double* ftp = &fb[ fti ];
+ const double* const rp = Buf + ReadPos;
+ int i;
+
+ #if defined( R8B_SSE2 ) && defined( R8B_SIMD_ISH )
+
+ const __m128d x1 = _mm_set1_pd( x );
+ const __m128d x2 = _mm_set1_pd( x2d );
+ __m128d s = _mm_setzero_pd();
+
+ for( i = 0; i < fltlen; i += 2 )
+ {
+ const __m128d ftp2 = _mm_load_pd( ftp + 2 );
+ const __m128d xx1 = _mm_mul_pd( ftp2, x1 );
+ const __m128d ftp4 = _mm_load_pd( ftp + 4 );
+ const __m128d xx2 = _mm_mul_pd( ftp4, x2 );
+ const __m128d ftp0 = _mm_load_pd( ftp );
+ ftp += 6;
+
+ const __m128d rpi = _mm_loadu_pd( rp + i );
+ const __m128d xxs = _mm_add_pd( ftp0, _mm_add_pd( xx1, xx2 ));
+
+ s = _mm_add_pd( s, _mm_mul_pd( rpi, xxs ));
+ }
+
+ _mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 )));
+
+ #elif defined( R8B_NEON ) && defined( R8B_SIMD_ISH )
+
+ const float64x2_t x1 = vdupq_n_f64( x );
+ const float64x2_t x2 = vdupq_n_f64( x2d );
+ float64x2_t s = vdupq_n_f64( 0.0 );
+
+ for( i = 0; i < fltlen; i += 2 )
+ {
+ const float64x2_t ftp2 = vld1q_f64( ftp + 2 );
+ const float64x2_t xx1 = vmulq_f64( ftp2, x1 );
+ const float64x2_t ftp4 = vld1q_f64( ftp + 4 );
+ const float64x2_t xx2 = vmulq_f64( ftp4, x2 );
+ const float64x2_t ftp0 = vld1q_f64( ftp );
+ ftp += 6;
+
+ const float64x2_t rpi = vld1q_f64( rp + i );
+ const float64x2_t xxs = vaddq_f64( ftp0,
+ vaddq_f64( xx1, xx2 ));
+
+ s = vmlaq_f64( s, rpi, xxs );
+ }
+
+ *op = vaddvq_f64( s );
+
+ #else // SIMD
+
+ double s = 0.0;
+
+ for( i = 0; i < fltlen; i++ )
+ {
+ s += ( ftp[ 0 ] + ftp[ 1 ] * x + ftp[ 2 ] * x2d ) * rp[ i ];
+ ftp += 3;
+ }
+
+ *op = s;
+
+ #endif // SIMD
+
+ op++;
+
+ #if R8B_FASTTIMING
+
+ InPosFrac += FracStep;
+ const int PosIncr = (int) InPosFrac;
+ InPosFrac -= PosIncr;
+
+ #else // R8B_FASTTIMING
+
+ InCounter++;
+ const double NextInPos = ( InCounter + InPosShift ) *
+ SrcSampleRate / DstSampleRate;
+
+ const int NextInPosInt = (int) NextInPos;
+ const int PosIncr = NextInPosInt - InPosInt;
+ InPosInt = NextInPosInt;
+ InPosFrac = NextInPos - NextInPosInt;
+
+ #endif // R8B_FASTTIMING
+
+ ReadPos = ( ReadPos + PosIncr ) & BufLenMask;
+ BufLeft -= PosIncr;
+ }
+
+ return( op );
+ }
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPFRACINTERPOLATOR_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
new file mode 100644
index 00000000..2a10f421
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
@@ -0,0 +1,401 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPHBDownsampler.h
+ *
+ * @brief Half-band downsampling convolver class.
+ *
+ * This file includes half-band downsampling convolver class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPHBDOWNSAMPLER_INCLUDED
+#define R8B_CDSPHBDOWNSAMPLER_INCLUDED
+
+#include "CDSPHBUpsampler.h"
+
+namespace r8b {
+
+/**
+ * @brief Half-band downsampler class.
+ *
+ * Class implements brute-force half-band 2X downsampling that uses small
+ * sparse symmetric FIR filters. The output has 2.0 gain.
+ */
+
+class CDSPHBDownsampler : public CDSPProcessor
+{
+public:
+ /**
+ * Constructor initalizes the half-band downsampler.
+ *
+ * @param ReqAtten Required half-band filter attentuation.
+ * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+ * downsampling ratio, e.g. at 4x downsampling 0 is used, at 8x
+ * downsampling 1 is used, etc.
+ * @param IsThird "True" if 1/3 resampling is performed.
+ * @param PrevLatency Latency, in samples (any value >=0), which was left
+ * in the output signal by a previous process. Whole-number latency will
+ * be consumed by *this object while remaining fractional latency can be
+ * obtained via the getLatencyFrac() function.
+ */
+
+ CDSPHBDownsampler( const double ReqAtten, const int SteepIndex,
+ const bool IsThird, const double PrevLatency )
+ {
+ static const CConvolveFn FltConvFn[ 14 ] = {
+ &CDSPHBDownsampler :: convolve1, &CDSPHBDownsampler :: convolve2,
+ &CDSPHBDownsampler :: convolve3, &CDSPHBDownsampler :: convolve4,
+ &CDSPHBDownsampler :: convolve5, &CDSPHBDownsampler :: convolve6,
+ &CDSPHBDownsampler :: convolve7, &CDSPHBDownsampler :: convolve8,
+ &CDSPHBDownsampler :: convolve9, &CDSPHBDownsampler :: convolve10,
+ &CDSPHBDownsampler :: convolve11, &CDSPHBDownsampler :: convolve12,
+ &CDSPHBDownsampler :: convolve13,
+ &CDSPHBDownsampler :: convolve14 };
+
+ int fltt;
+ double att;
+
+ if( IsThird )
+ {
+ CDSPHBUpsampler :: getHBFilterThird( ReqAtten, SteepIndex, fltp,
+ fltt, att );
+ }
+ else
+ {
+ CDSPHBUpsampler :: getHBFilter( ReqAtten, SteepIndex, fltp, fltt,
+ att );
+ }
+
+ convfn = FltConvFn[ fltt - 1 ];
+ fll = fltt * 2 - 1;
+ fl2 = fll;
+ flo = fll + fl2;
+ flb = BufLen - fll;
+ BufRP = Buf + fll;
+
+ LatencyFrac = PrevLatency * 0.5;
+ Latency = (int) LatencyFrac;
+ LatencyFrac -= Latency;
+
+ R8BASSERT( Latency >= 0 );
+
+ R8BCONSOLE( "CDSPHBDownsampler: taps=%i third=%i att=%.1f io=1/2\n",
+ fltt, (int) IsThird, att );
+
+ clear();
+ }
+
+ virtual int getLatency() const
+ {
+ return( 0 );
+ }
+
+ virtual double getLatencyFrac() const
+ {
+ return( LatencyFrac );
+ }
+
+ virtual int getMaxOutLen( const int MaxInLen ) const
+ {
+ R8BASSERT( MaxInLen >= 0 );
+
+ return(( MaxInLen + 1 ) >> 1 );
+ }
+
+ virtual void clear()
+ {
+ LatencyLeft = Latency;
+ BufLeft = 0;
+ WritePos = 0;
+ ReadPos = flb; // Set "read" position to account for filter's latency.
+
+ memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+ }
+
+ virtual int process( double* ip, int l, double*& op0 )
+ {
+ R8BASSERT( l >= 0 );
+
+ double* op = op0;
+
+ while( l > 0 )
+ {
+ // Add new input samples to both halves of the ring buffer.
+
+ const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+ double* const wp1 = Buf + WritePos;
+ memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+ if( WritePos < flo )
+ {
+ const int c = min( b, flo - WritePos );
+ memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+ }
+
+ ip += b;
+ WritePos = ( WritePos + b ) & BufLenMask;
+ l -= b;
+ BufLeft += b;
+
+ // Produce output.
+
+ if( BufLeft > fl2 )
+ {
+ const int c = ( BufLeft - fl2 + 1 ) >> 1;
+
+ double* const opend = op + c;
+ ( *convfn )( op, opend, fltp, BufRP, ReadPos );
+
+ op = opend;
+ const int c2 = c + c;
+ ReadPos = ( ReadPos + c2 ) & BufLenMask;
+ BufLeft -= c2;
+ }
+ }
+
+ int ol = (int) ( op - op0 );
+
+ if( LatencyLeft != 0 )
+ {
+ if( LatencyLeft >= ol )
+ {
+ LatencyLeft -= ol;
+ return( 0 );
+ }
+
+ ol -= LatencyLeft;
+ op0 += LatencyLeft;
+ LatencyLeft = 0;
+ }
+
+ return( ol );
+ }
+
+private:
+ static const int BufLenBits = 10; ///< The length of the ring buffer,
+ ///< expressed as Nth power of 2. This value can be reduced if it is
+ ///< known that only short input buffers will be passed to the
+ ///< interpolator. The minimum value of this parameter is 5, and
+ ///< 1 << BufLenBits should be at least 3 times larger than the
+ ///< FilterLen.
+ ///<
+ static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+ ///< buffer. The actual length is twice as long to allow "beyond max
+ ///< position" positioning.
+ ///<
+ static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+ ///< position wrapping.
+ ///<
+ double Buf[ BufLen + 54 ]; ///< The ring buffer, including overrun
+ ///< protection for the largest filter.
+ ///<
+ const double* fltp; ///< Half-band filter taps.
+ ///<
+ int fll; ///< Input latency.
+ ///<
+ int fl2; ///< Right-side filter length.
+ ///<
+ int flo; ///< Overrun length.
+ ///<
+ int flb; ///< Initial read position and maximal buffer write length.
+ ///<
+ const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
+ ///<
+ int Latency; ///< Initial latency that should be removed from the output.
+ ///<
+ double LatencyFrac; ///< Fractional latency left on the output.
+ ///<
+ int BufLeft; ///< The number of samples left in the buffer to process.
+ ///< When this value is below FilterLenD2Plus1, the interpolation
+ ///< cycle ends.
+ ///<
+ int WritePos; ///< The current buffer write position. Incremented together
+ ///< with the BufLeft variable.
+ ///<
+ int ReadPos; ///< The current buffer read position.
+ ///<
+ int LatencyLeft; ///< Latency left to remove.
+ ///<
+ typedef void( *CConvolveFn )( double* op, double* const opend,
+ const double* const flt, const double* const rp0, int rpos ); ///<
+ ///< Convolution function type.
+ ///<
+ CConvolveFn convfn; ///< Convolution function in use.
+ ///<
+
+#define R8BHBC1( fn ) \
+ static void fn( double* op, double* const opend, const double* const flt, \
+ const double* const rp0, int rpos ) \
+ { \
+ while( op != opend ) \
+ { \
+ const double* const rp = rp0 + rpos; \
+ *op = rp[ 0 ] +
+
+#define R8BHBC2 \
+ rpos = ( rpos + 2 ) & BufLenMask; \
+ op++; \
+ } \
+ }
+
+ R8BHBC1( convolve1 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve2 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve3 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve4 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve5 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve6 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve7 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve8 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve9 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+ flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve10 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+ flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+ flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve11 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+ flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+ flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+ flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve12 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+ flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+ flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+ flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+ flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve13 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+ flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+ flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+ flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+ flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
+ flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]);
+ R8BHBC2
+
+ R8BHBC1( convolve14 )
+ flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+ flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+ flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+ flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+ flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+ flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+ flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+ flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+ flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+ flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+ flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+ flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
+ flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]) +
+ flt[ 13 ] * ( rp[ 27 ] + rp[ -27 ]);
+ R8BHBC2
+
+#undef R8BHBC1
+#undef R8BHBC2
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPHBDOWNSAMPLER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
new file mode 100644
index 00000000..e9b03c70
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
@@ -0,0 +1,804 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPHBUpsampler.h
+ *
+ * @brief Half-band upsampling class.
+ *
+ * This file includes half-band upsampling class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPHBUPSAMPLER_INCLUDED
+#define R8B_CDSPHBUPSAMPLER_INCLUDED
+
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+/**
+ * @brief Half-band upsampling class.
+ *
+ * Class implements brute-force half-band 2X upsampling that uses small
+ * sparse symmetric FIR filters. It is very efficient and should be used at
+ * latter upsampling steps after initial steep 2X upsampling.
+ */
+
+class CDSPHBUpsampler : public CDSPProcessor
+{
+public:
+ /**
+ * Function that provides filter data for various steepness indices and
+ * attenuations.
+ *
+ * @param ReqAtten Required half-band filter attentuation.
+ * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+ * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
+ * etc.
+ */
+
+ static void getHBFilter( const double ReqAtten, const int SteepIndex,
+ const double*& flt, int& fltt, double& att )
+ {
+ static const double HBKernel_4A[ 4 ] = { // -54.5176 dB, 4
+ 6.1729335650971517e-001, -1.5963945620743250e-001,
+ 5.5073370934086312e-002, -1.4603578989932850e-002,};
+ static const double HBKernel_5A[ 5 ] = { // -66.3075 dB, 4
+ 6.2068807424902472e-001, -1.6827573634467302e-001,
+ 6.5263016720721170e-002, -2.2483331611592005e-002,
+ 5.2917326684281110e-003,};
+ static const double HBKernel_6A[ 6 ] = { // -89.5271 dB, 4
+ 6.2187202340480707e-001, -1.7132842113816371e-001,
+ 6.9019169178765674e-002, -2.5799728312695277e-002,
+ 7.4880112525741666e-003, -1.2844465869952567e-003,};
+ static const double HBKernel_7A[ 7 ] = { // -105.2842 dB, 4
+ 6.2354494135775851e-001, -1.7571220703702045e-001,
+ 7.4529843603968457e-002, -3.0701736822442153e-002,
+ 1.0716755639039573e-002, -2.7833422930759735e-003,
+ 4.1118797093875510e-004,};
+ static const double HBKernel_8A[ 8 ] = { // -121.0063 dB, 4
+ 6.2488363107953926e-001, -1.7924942606514119e-001,
+ 7.9068155655640557e-002, -3.4907523415495731e-002,
+ 1.3710256799907897e-002, -4.3991142586987933e-003,
+ 1.0259190163889602e-003, -1.3278941979339359e-004,};
+ static const double HBKernel_9A[ 9 ] = { // -136.6982 dB, 4
+ 6.2597763804021977e-001, -1.8216414325139055e-001,
+ 8.2879104876726728e-002, -3.8563442248249404e-002,
+ 1.6471530499739394e-002, -6.0489108881335227e-003,
+ 1.7805283804140392e-003, -3.7533200112729561e-004,
+ 4.3172840558735476e-005,};
+ static const double HBKernel_10A[ 10 ] = { // -152.3572 dB, 4
+ 6.2688767582974092e-001, -1.8460766807559420e-001,
+ 8.6128943000481864e-002, -4.1774474147006607e-002,
+ 1.9014801985747346e-002, -7.6870397465866507e-003,
+ 2.6264590175341853e-003, -7.1106660285478562e-004,
+ 1.3645852036179345e-004, -1.4113888783332969e-005,};
+ static const double HBKernel_11A[ 11 ] = { // -183.7962 dB, 4
+ 6.2667167706948146e-001, -1.8407153342635879e-001,
+ 8.5529995610836046e-002, -4.1346831462361310e-002,
+ 1.8844831691322637e-002, -7.7125170365394992e-003,
+ 2.7268674860562087e-003, -7.9745028501057233e-004,
+ 1.8116344606360795e-004, -2.8569149754241848e-005,
+ 2.3667022010173616e-006,};
+ static const double HBKernel_12A[ 12 ] = { // -199.4768 dB, 4
+ 6.2747849730367999e-001, -1.8623616784506747e-001,
+ 8.8409755898467945e-002, -4.4207468821462342e-002,
+ 2.1149175945115381e-002, -9.2551508371115209e-003,
+ 3.5871562170822330e-003, -1.1923167653750219e-003,
+ 3.2627812189920129e-004, -6.9106902511490413e-005,
+ 1.0122897863125124e-005, -7.7531878906846174e-007,};
+ static const double HBKernel_13A[ 13 ] = { // -215.1364 dB, 4
+ 6.2816416252367324e-001, -1.8809076955230414e-001,
+ 9.0918539867353029e-002, -4.6765502683599310e-002,
+ 2.3287520498995663e-002, -1.0760627245014184e-002,
+ 4.4853922948425683e-003, -1.6438775426910800e-003,
+ 5.1441312354764978e-004, -1.3211725685765050e-004,
+ 2.6191319837779187e-005, -3.5802430606313093e-006,
+ 2.5491278270628601e-007,};
+ static const double HBKernel_14A[ 14 ] = { // -230.7526 dB, 4
+ 6.2875473120929948e-001, -1.8969941936903847e-001,
+ 9.3126094480960403e-002, -4.9067251179869126e-002,
+ 2.5273008851199916e-002, -1.2218646153393291e-002,
+ 5.4048942085580280e-003, -2.1409919546078581e-003,
+ 7.4250292812927973e-004, -2.1924542206832172e-004,
+ 5.3015808983125091e-005, -9.8743034923598196e-006,
+ 1.2650391141650221e-006, -8.4146674637474946e-008,};
+ static const int FltCountA = 11;
+ static const int FlttBaseA = 4;
+ static const double FltAttensA[ FltCountA ] = {
+ 54.5176, 66.3075, 89.5271, 105.2842, 121.0063, 136.6982, 152.3572, 183.7962, 199.4768, 215.1364, 230.7526, };
+ static const double* const FltPtrsA[ FltCountA ] = {
+ HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, HBKernel_10A, HBKernel_11A, HBKernel_12A, HBKernel_13A, HBKernel_14A, };
+ static const double HBKernel_2B[ 2 ] = { // -56.6007 dB, 8
+ 5.7361525854329076e-001, -7.5092074924827903e-002,};
+ static const double HBKernel_3B[ 3 ] = { // -83.0295 dB, 8
+ 5.9277038608066912e-001, -1.0851340190268854e-001,
+ 1.5813570475513079e-002,};
+ static const double HBKernel_4B[ 4 ] = { // -123.4724 dB, 8
+ 6.0140277542879617e-001, -1.2564483854574138e-001,
+ 2.7446500598038322e-002, -3.2051079559057435e-003,};
+ static const double HBKernel_5B[ 5 ] = { // -152.4411 dB, 8
+ 6.0818642429088932e-001, -1.3981140187175697e-001,
+ 3.8489164054503623e-002, -7.6218861797853104e-003,
+ 7.5772358130952392e-004,};
+ static const double HBKernel_6B[ 6 ] = { // -181.2501 dB, 8
+ 6.1278392271464355e-001, -1.5000053762513338e-001,
+ 4.7575323511364960e-002, -1.2320702802243476e-002,
+ 2.1462442592348487e-003, -1.8425092381892940e-004,};
+ static const double HBKernel_7B[ 7 ] = { // -209.9472 dB, 8
+ 6.1610372263478952e-001, -1.5767891882524138e-001,
+ 5.5089691170294691e-002, -1.6895755656366061e-002,
+ 3.9416643438213977e-003, -6.0603623791604668e-004,
+ 4.5632602433393365e-005,};
+ static const double HBKernel_8B[ 8 ] = { // -238.5616 dB, 8
+ 6.1861282914465976e-001, -1.6367179451225150e-001,
+ 6.1369861342939716e-002, -2.1184466539006987e-002,
+ 5.9623357510842061e-003, -1.2483098507454090e-003,
+ 1.7099297537964702e-004, -1.1448313239478885e-005,};
+ static const int FltCountB = 7;
+ static const int FlttBaseB = 2;
+ static const double FltAttensB[ FltCountB ] = {
+ 56.6007, 83.0295, 123.4724, 152.4411, 181.2501, 209.9472, 238.5616, };
+ static const double* const FltPtrsB[ FltCountB ] = {
+ HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, HBKernel_7B, HBKernel_8B, };
+ static const double HBKernel_2C[ 2 ] = { // -89.0473 dB, 16
+ 5.6430278013478008e-001, -6.4338068855763375e-002,};
+ static const double HBKernel_3C[ 3 ] = { // -130.8951 dB, 16
+ 5.8706402915551448e-001, -9.9362380958670449e-002,
+ 1.2298637065869358e-002,};
+ static const double HBKernel_4C[ 4 ] = { // -172.3192 dB, 16
+ 5.9896586134984675e-001, -1.2111680603434927e-001,
+ 2.4763118076458895e-002, -2.6121758132212989e-003,};
+ static const double HBKernel_5C[ 5 ] = { // -213.4984 dB, 16
+ 6.0626808285230716e-001, -1.3588224032740795e-001,
+ 3.5544305238309003e-002, -6.5127022377289654e-003,
+ 5.8255449565950768e-004,};
+ static const double HBKernel_6C[ 6 ] = { // -254.5186 dB, 16
+ 6.1120171263351242e-001, -1.4654486853757870e-001,
+ 4.4582959299131253e-002, -1.0840543858123995e-002,
+ 1.7343706485509962e-003, -1.3363018567985596e-004,};
+ static const int FltCountC = 5;
+ static const int FlttBaseC = 2;
+ static const double FltAttensC[ FltCountC ] = {
+ 89.0473, 130.8951, 172.3192, 213.4984, 254.5186, };
+ static const double* const FltPtrsC[ FltCountC ] = {
+ HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, HBKernel_6C, };
+ static const double HBKernel_1D[ 1 ] = { // -54.4754 dB, 32
+ 5.0188900022775451e-001,};
+ static const double HBKernel_2D[ 2 ] = { // -113.2139 dB, 32
+ 5.6295152180538044e-001, -6.2953706070191726e-002,};
+ static const double HBKernel_3D[ 3 ] = { // -167.1447 dB, 32
+ 5.8621968728755036e-001, -9.8080551656524531e-002,
+ 1.1860868761997080e-002,};
+ static const double HBKernel_4D[ 4 ] = { // -220.6519 dB, 32
+ 5.9835028657163591e-001, -1.1999986086623511e-001,
+ 2.4132530854004228e-002, -2.4829565686819706e-003,};
+ static const int FltCountD = 4;
+ static const int FlttBaseD = 1;
+ static const double FltAttensD[ FltCountD ] = {
+ 54.4754, 113.2139, 167.1447, 220.6519, };
+ static const double* const FltPtrsD[ FltCountD ] = {
+ HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
+ static const double HBKernel_1E[ 1 ] = { // -66.5391 dB, 64
+ 5.0047102586416625e-001,};
+ static const double HBKernel_2E[ 2 ] = { // -137.3173 dB, 64
+ 5.6261293163933568e-001, -6.2613067826620017e-002,};
+ static const double HBKernel_3E[ 3 ] = { // -203.2997 dB, 64
+ 5.8600808139396787e-001, -9.7762185880067784e-002,
+ 1.1754104554493029e-002,};
+ static const double HBKernel_4E[ 4 ] = { // -268.8550 dB, 64
+ 5.9819599352772002e-001, -1.1972157555011861e-001,
+ 2.3977305567947922e-002, -2.4517235455853992e-003,};
+ static const int FltCountE = 4;
+ static const int FlttBaseE = 1;
+ static const double FltAttensE[ FltCountE ] = {
+ 66.5391, 137.3173, 203.2997, 268.8550, };
+ static const double* const FltPtrsE[ FltCountE ] = {
+ HBKernel_1E, HBKernel_2E, HBKernel_3E, HBKernel_4E, };
+ static const double HBKernel_1F[ 1 ] = { // -82.4633 dB, 128
+ 5.0007530666642896e-001,};
+ static const double HBKernel_2F[ 2 ] = { // -161.4049 dB, 128
+ 5.6252823610146030e-001, -6.2528244608044792e-002,};
+ static const double HBKernel_3F[ 3 ] = { // -239.4313 dB, 128
+ 5.8595514744674237e-001, -9.7682725156791952e-002,
+ 1.1727577711117231e-002,};
+ static const int FltCountF = 3;
+ static const int FlttBaseF = 1;
+ static const double FltAttensF[ FltCountF ] = {
+ 82.4633, 161.4049, 239.4313, };
+ static const double* const FltPtrsF[ FltCountF ] = {
+ HBKernel_1F, HBKernel_2F, HBKernel_3F, };
+ static const double HBKernel_1G[ 1 ] = { // -94.5052 dB, 256
+ 5.0001882524896712e-001,};
+ static const double HBKernel_2G[ 2 ] = { // -185.4886 dB, 256
+ 5.6250705922479682e-001, -6.2507059756378394e-002,};
+ static const double HBKernel_3G[ 3 ] = { // -275.5501 dB, 256
+ 5.8594191201187384e-001, -9.7662868266991207e-002,
+ 1.1720956255134043e-002,};
+ static const int FltCountG = 3;
+ static const int FlttBaseG = 1;
+ static const double FltAttensG[ FltCountG ] = {
+ 94.5052, 185.4886, 275.5501, };
+ static const double* const FltPtrsG[ FltCountG ] = {
+ HBKernel_1G, HBKernel_2G, HBKernel_3G, };
+
+ int k = 0;
+
+ if( SteepIndex <= 0 )
+ {
+ while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsA[ k ];
+ fltt = FlttBaseA + k;
+ att = FltAttensA[ k ];
+ }
+ else
+ if( SteepIndex == 1 )
+ {
+ while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsB[ k ];
+ fltt = FlttBaseB + k;
+ att = FltAttensB[ k ];
+ }
+ else
+ if( SteepIndex == 2 )
+ {
+ while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsC[ k ];
+ fltt = FlttBaseC + k;
+ att = FltAttensC[ k ];
+ }
+ else
+ if( SteepIndex == 3 )
+ {
+ while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsD[ k ];
+ fltt = FlttBaseD + k;
+ att = FltAttensD[ k ];
+ }
+ else
+ if( SteepIndex == 4 )
+ {
+ while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsE[ k ];
+ fltt = FlttBaseE + k;
+ att = FltAttensE[ k ];
+ }
+ else
+ if( SteepIndex == 5 )
+ {
+ while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsF[ k ];
+ fltt = FlttBaseF + k;
+ att = FltAttensF[ k ];
+ }
+ else
+ {
+ while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsG[ k ];
+ fltt = FlttBaseG + k;
+ att = FltAttensG[ k ];
+ }
+ }
+
+ /**
+ * Function that provides filter data for various steepness indices and
+ * attenuations. For 1/3 resamplings.
+ *
+ * @param ReqAtten Required half-band filter attentuation.
+ * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+ * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
+ * etc.
+ */
+
+ static void getHBFilterThird( const double ReqAtten, const int SteepIndex,
+ const double*& flt, int& fltt, double& att )
+ {
+ static const double HBKernel_3A[ 3 ] = { // -66.3726 dB, 6
+ 5.9811355069551475e-001, -1.1793396656733847e-001,
+ 2.0300557211946322e-002,};
+ static const double HBKernel_4A[ 4 ] = { // -90.2546 dB, 6
+ 6.0645499250612578e-001, -1.3555496505481171e-001,
+ 3.4022804962365975e-002, -4.9535418595798757e-003,};
+ static const double HBKernel_5A[ 5 ] = { // -126.5507 dB, 6
+ 6.1014115058940210e-001, -1.4393081816629907e-001,
+ 4.1760642892852244e-002, -8.9692183234056175e-003,
+ 9.9871340618342070e-004,};
+ static const double HBKernel_6A[ 6 ] = { // -150.1839 dB, 6
+ 6.1439563420546972e-001, -1.5360187826905250e-001,
+ 5.0840891345687034e-002, -1.4053648740561121e-002,
+ 2.6771286587305727e-003, -2.5815816044823123e-004,};
+ static const double HBKernel_7A[ 7 ] = { // -173.7068 dB, 6
+ 6.1747493476329918e-001, -1.6087373733313212e-001,
+ 5.8263075641409430e-002, -1.8872408173431318e-002,
+ 4.7421376543513687e-003, -8.0196529612267474e-004,
+ 6.7964807393798996e-005,};
+ static const double HBKernel_8A[ 8 ] = { // -197.1454 dB, 6
+ 6.1980610947775050e-001, -1.6654070578314714e-001,
+ 6.4416567441730327e-002, -2.3307744348719822e-002,
+ 6.9909157372312443e-003, -1.5871946293364403e-003,
+ 2.4017727382382763e-004, -1.8125308241541697e-005,};
+ static const double HBKernel_9A[ 9 ] = { // -220.5199 dB, 6
+ 6.2163188951899306e-001, -1.7108115323810941e-001,
+ 6.9588370095600260e-002, -2.7339625080613838e-002,
+ 9.2954469183791771e-003, -2.5537179959555429e-003,
+ 5.2572290897951021e-004, -7.1813356135154921e-005,
+ 4.8802382808892154e-006,};
+ static const int FltCountA = 7;
+ static const int FlttBaseA = 3;
+ static const double FltAttensA[ FltCountA ] = {
+ 66.3726, 90.2546, 126.5507, 150.1839, 173.7068, 197.1454, 220.5199, };
+ static const double* const FltPtrsA[ FltCountA ] = {
+ HBKernel_3A, HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, };
+ static const double HBKernel_2B[ 2 ] = { // -71.0965 dB, 12
+ 5.6748544264806311e-001, -6.7764090509431732e-002,};
+ static const double HBKernel_3B[ 3 ] = { // -115.7707 dB, 12
+ 5.8793612182667199e-001, -1.0070583248877293e-001,
+ 1.2771337947163834e-002,};
+ static const double HBKernel_4B[ 4 ] = { // -152.1535 dB, 12
+ 5.9960155600862808e-001, -1.2228154335199336e-001,
+ 2.5433718917694709e-002, -2.7537562530837154e-003,};
+ static const double HBKernel_5B[ 5 ] = { // -188.2914 dB, 12
+ 6.0676859170554343e-001, -1.3689667009876413e-001,
+ 3.6288512631926818e-002, -6.7838855305035351e-003,
+ 6.2345167677087547e-004,};
+ static const double HBKernel_6B[ 6 ] = { // -224.2705 dB, 12
+ 6.1161456341904397e-001, -1.4743901958274458e-001,
+ 4.5344160157313275e-002, -1.1207371780924531e-002,
+ 1.8328497112594935e-003, -1.4518193006359589e-004,};
+ static const int FltCountB = 5;
+ static const int FlttBaseB = 2;
+ static const double FltAttensB[ FltCountB ] = {
+ 71.0965, 115.7707, 152.1535, 188.2914, 224.2705, };
+ static const double* const FltPtrsB[ FltCountB ] = {
+ HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, };
+ static const double HBKernel_1C[ 1 ] = { // -49.4544 dB, 24
+ 5.0336730531430562e-001,};
+ static const double HBKernel_2C[ 2 ] = { // -103.1970 dB, 24
+ 5.6330232648142819e-001, -6.3309247177420452e-002,};
+ static const double HBKernel_3C[ 3 ] = { // -152.1195 dB, 24
+ 5.8643891113580415e-001, -9.8411593011583087e-002,
+ 1.1972706651483846e-002,};
+ static const double HBKernel_4C[ 4 ] = { // -200.6182 dB, 24
+ 5.9851012363917222e-001, -1.2028885239978220e-001,
+ 2.4294521083140615e-002, -2.5157924156609776e-003,};
+ static const double HBKernel_5C[ 5 ] = { // -248.8730 dB, 24
+ 6.0590922882030196e-001, -1.3515953438018685e-001,
+ 3.5020857107815606e-002, -6.3256196990467053e-003,
+ 5.5506815147598793e-004,};
+ static const int FltCountC = 5;
+ static const int FlttBaseC = 1;
+ static const double FltAttensC[ FltCountC ] = {
+ 49.4544, 103.1970, 152.1195, 200.6182, 248.8730, };
+ static const double* const FltPtrsC[ FltCountC ] = {
+ HBKernel_1C, HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, };
+ static const double HBKernel_1D[ 1 ] = { // -61.5357 dB, 48
+ 5.0083794231068057e-001,};
+ static const double HBKernel_2D[ 2 ] = { // -127.3167 dB, 48
+ 5.6270074379958690e-001, -6.2701174487726344e-002,};
+ static const double HBKernel_3D[ 3 ] = { // -188.2990 dB, 48
+ 5.8606296210323228e-001, -9.7844644765123029e-002,
+ 1.1781683046528768e-002,};
+ static const double HBKernel_4D[ 4 ] = { // -248.8580 dB, 48
+ 5.9823601243162516e-001, -1.1979368994739022e-001,
+ 2.4017458606412575e-002, -2.4597810910081913e-003,};
+ static const int FltCountD = 4;
+ static const int FlttBaseD = 1;
+ static const double FltAttensD[ FltCountD ] = {
+ 61.5357, 127.3167, 188.2990, 248.8580, };
+ static const double* const FltPtrsD[ FltCountD ] = {
+ HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
+ static const double HBKernel_1E[ 1 ] = { // -77.4651 dB, 96
+ 5.0013388897382527e-001,};
+ static const double HBKernel_2E[ 2 ] = { // -151.4084 dB, 96
+ 5.6255019604317880e-001, -6.2550222932381064e-002,};
+ static const double HBKernel_3E[ 3 ] = { // -224.4365 dB, 96
+ 5.8596887234201078e-001, -9.7703321113080305e-002,
+ 1.1734448777069783e-002,};
+ static const int FltCountE = 3;
+ static const int FlttBaseE = 1;
+ static const double FltAttensE[ FltCountE ] = {
+ 77.4651, 151.4084, 224.4365, };
+ static const double* const FltPtrsE[ FltCountE ] = {
+ HBKernel_1E, HBKernel_2E, HBKernel_3E, };
+ static const double HBKernel_1F[ 1 ] = { // -89.5075 dB, 192
+ 5.0003346776264190e-001,};
+ static const double HBKernel_2F[ 2 ] = { // -175.4932 dB, 192
+ 5.6251254964097952e-001, -6.2512551321105267e-002,};
+ static const double HBKernel_3F[ 3 ] = { // -260.5645 dB, 192
+ 5.8594534336747051e-001, -9.7668015838639821e-002,
+ 1.1722672471262996e-002,};
+ static const int FltCountF = 3;
+ static const int FlttBaseF = 1;
+ static const double FltAttensF[ FltCountF ] = {
+ 89.5075, 175.4932, 260.5645, };
+ static const double* const FltPtrsF[ FltCountF ] = {
+ HBKernel_1F, HBKernel_2F, HBKernel_3F, };
+ static const double HBKernel_1G[ 1 ] = { // -101.5490 dB, 384
+ 5.0000836666064941e-001,};
+ static const double HBKernel_2G[ 2 ] = { // -199.5761 dB, 384
+ 5.6250313744943459e-001, -6.2503137554435345e-002,};
+ static const double HBKernel_3G[ 3 ] = { // -296.5185 dB, 384
+ 5.8593945786963764e-001, -9.7659186853499613e-002,
+ 1.1719728983863425e-002,};
+ static const int FltCountG = 3;
+ static const int FlttBaseG = 1;
+ static const double FltAttensG[ FltCountG ] = {
+ 101.5490, 199.5761, 296.5185, };
+ static const double* const FltPtrsG[ FltCountG ] = {
+ HBKernel_1G, HBKernel_2G, HBKernel_3G, };
+
+ int k = 0;
+
+ if( SteepIndex <= 0 )
+ {
+ while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsA[ k ];
+ fltt = FlttBaseA + k;
+ att = FltAttensA[ k ];
+ }
+ else
+ if( SteepIndex == 1 )
+ {
+ while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsB[ k ];
+ fltt = FlttBaseB + k;
+ att = FltAttensB[ k ];
+ }
+ else
+ if( SteepIndex == 2 )
+ {
+ while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsC[ k ];
+ fltt = FlttBaseC + k;
+ att = FltAttensC[ k ];
+ }
+ else
+ if( SteepIndex == 3 )
+ {
+ while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsD[ k ];
+ fltt = FlttBaseD + k;
+ att = FltAttensD[ k ];
+ }
+ else
+ if( SteepIndex == 4 )
+ {
+ while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsE[ k ];
+ fltt = FlttBaseE + k;
+ att = FltAttensE[ k ];
+ }
+ else
+ if( SteepIndex == 5 )
+ {
+ while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsF[ k ];
+ fltt = FlttBaseF + k;
+ att = FltAttensF[ k ];
+ }
+ else
+ {
+ while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
+ {
+ k++;
+ }
+
+ flt = FltPtrsG[ k ];
+ fltt = FlttBaseG + k;
+ att = FltAttensG[ k ];
+ }
+ }
+
+ /**
+ * Constructor initalizes the half-band upsampler.
+ *
+ * @param ReqAtten Required half-band filter attentuation.
+ * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+ * upsampling ratio, e.g. at 4x upsampling 0 is used, at 8x upsampling 1
+ * is used, etc.
+ * @param IsThird "True" if 1/3 of frequency response resampling is
+ * performed.
+ * @param PrevLatency Latency, in samples (any value >=0), which was left
+ * in the output signal by a previous process. Whole-number latency will
+ * be consumed by *this object while remaining fractional latency can be
+ * obtained via the getLatencyFrac() function.
+ * @param aDoConsumeLatency "True" if the output latency should be
+ * consumed. Does not apply to the fractional part of the latency (if such
+ * part is available).
+ */
+
+ CDSPHBUpsampler( const double ReqAtten, const int SteepIndex,
+ const bool IsThird, const double PrevLatency,
+ const bool aDoConsumeLatency = true )
+ : DoConsumeLatency( aDoConsumeLatency )
+ {
+ static const CConvolveFn FltConvFn[ 14 ] = {
+ &CDSPHBUpsampler :: convolve1, &CDSPHBUpsampler :: convolve2,
+ &CDSPHBUpsampler :: convolve3, &CDSPHBUpsampler :: convolve4,
+ &CDSPHBUpsampler :: convolve5, &CDSPHBUpsampler :: convolve6,
+ &CDSPHBUpsampler :: convolve7, &CDSPHBUpsampler :: convolve8,
+ &CDSPHBUpsampler :: convolve9, &CDSPHBUpsampler :: convolve10,
+ &CDSPHBUpsampler :: convolve11, &CDSPHBUpsampler :: convolve12,
+ &CDSPHBUpsampler :: convolve13, &CDSPHBUpsampler :: convolve14 };
+
+ const double* fltp0;
+ int fltt;
+ double att;
+
+ if( IsThird )
+ {
+ getHBFilterThird( ReqAtten, SteepIndex, fltp0, fltt, att );
+ }
+ else
+ {
+ getHBFilter( ReqAtten, SteepIndex, fltp0, fltt, att );
+ }
+
+ // Copy obtained filter to address-aligned buffer.
+
+ fltp = alignptr( FltBuf, 16 );
+ memcpy( fltp, fltp0, fltt * sizeof( fltp[ 0 ]));
+
+ convfn = FltConvFn[ fltt - 1 ];
+ fll = fltt - 1;
+ fl2 = fltt;
+ flo = fll + fl2;
+ BufRP = Buf + fll;
+
+ LatencyFrac = PrevLatency * 2.0;
+ Latency = (int) LatencyFrac;
+ LatencyFrac -= Latency;
+
+ R8BASSERT( Latency >= 0 );
+
+ if( DoConsumeLatency )
+ {
+ flb = BufLen - fll;
+ }
+ else
+ {
+ Latency += fl2 + fl2;
+ flb = BufLen - flo;
+ }
+
+ R8BCONSOLE( "CDSPHBUpsampler: sti=%i third=%i taps=%i att=%.1f "
+ "io=2/1\n", SteepIndex, (int) IsThird, fltt, att );
+
+ clear();
+ }
+
+ virtual int getLatency() const
+ {
+ return( DoConsumeLatency ? 0 : Latency );
+ }
+
+ virtual double getLatencyFrac() const
+ {
+ return( LatencyFrac );
+ }
+
+ virtual int getMaxOutLen( const int MaxInLen ) const
+ {
+ R8BASSERT( MaxInLen >= 0 );
+
+ return( MaxInLen << 1 );
+ }
+
+ virtual void clear()
+ {
+ if( DoConsumeLatency )
+ {
+ LatencyLeft = Latency;
+ BufLeft = 0;
+ }
+ else
+ {
+ LatencyLeft = 0;
+ BufLeft = fl2;
+ }
+
+ WritePos = 0;
+ ReadPos = flb; // Set "read" position to account for filter's latency.
+
+ memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+ }
+
+ virtual int process( double* ip, int l, double*& op0 )
+ {
+ R8BASSERT( l >= 0 );
+
+ double* op = op0;
+
+ while( l > 0 )
+ {
+ // Add new input samples to both halves of the ring buffer.
+
+ const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+ double* const wp1 = Buf + WritePos;
+ memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+ if( WritePos < flo )
+ {
+ const int c = min( b, flo - WritePos );
+ memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+ }
+
+ ip += b;
+ WritePos = ( WritePos + b ) & BufLenMask;
+ l -= b;
+ BufLeft += b;
+
+ // Produce output.
+
+ if( BufLeft > fl2 )
+ {
+ const int c = BufLeft - fl2;
+
+ double* const opend = op + ( c + c );
+ ( *convfn )( op, opend, fltp, BufRP, ReadPos );
+
+ op = opend;
+ ReadPos = ( ReadPos + c ) & BufLenMask;
+ BufLeft -= c;
+ }
+ }
+
+ int ol = (int) ( op - op0 );
+
+ if( LatencyLeft != 0 )
+ {
+ if( LatencyLeft >= ol )
+ {
+ LatencyLeft -= ol;
+ return( 0 );
+ }
+
+ ol -= LatencyLeft;
+ op0 += LatencyLeft;
+ LatencyLeft = 0;
+ }
+
+ return( ol );
+ }
+
+private:
+ static const int BufLenBits = 9; ///< The length of the ring buffer,
+ ///< expressed as Nth power of 2. This value can be reduced if it is
+ ///< known that only short input buffers will be passed to the
+ ///< interpolator. The minimum value of this parameter is 5, and
+ ///< 1 << BufLenBits should be at least 3 times larger than the
+ ///< FilterLen.
+ ///<
+ static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+ ///< buffer. The actual length is twice as long to allow "beyond max
+ ///< position" positioning.
+ ///<
+ static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+ ///< position wrapping.
+ ///<
+ double Buf[ BufLen + 27 ]; ///< The ring buffer, including overrun
+ ///< protection for the largest filter.
+ ///<
+ double FltBuf[ 14 + 2 ]; ///< Holder for half-band filter taps, used with
+ ///< 16-byte address-aligning, for SIMD use.
+ ///<
+ double* fltp; ///< Half-band filter taps, points to FltBuf.
+ ///<
+ int fll; ///< Input latency.
+ ///<
+ int fl2; ///< Right-side filter length.
+ ///<
+ int flo; ///< Overrrun length.
+ ///<
+ int flb; ///< Initial read position and maximal buffer write length.
+ ///<
+ const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
+ ///<
+ bool DoConsumeLatency; ///< "True" if the output latency should be
+ ///< consumed. Does not apply to the fractional part of the latency
+ ///< (if such part is available).
+ ///<
+ int Latency; ///< Initial latency that should be removed from the output.
+ ///<
+ double LatencyFrac; ///< Fractional latency left on the output.
+ ///<
+ int BufLeft; ///< The number of samples left in the buffer to process.
+ ///< When this value is below FilterLenD2Plus1, the interpolation
+ ///< cycle ends.
+ ///<
+ int WritePos; ///< The current buffer write position. Incremented together
+ ///< with the BufLeft variable.
+ ///<
+ int ReadPos; ///< The current buffer read position.
+ ///<
+ int LatencyLeft; ///< Latency left to remove.
+ ///<
+ typedef void( *CConvolveFn )( double* op, double* const opend,
+ const double* const flt, const double* const rp0, int rpos ); ///<
+ ///< Convolution function type.
+ ///<
+ CConvolveFn convfn; ///< Convolution function in use.
+ ///<
+
+#define R8BHBC1( fn ) \
+ static void fn( double* op, double* const opend, const double* const flt, \
+ const double* const rp0, int rpos ) \
+ { \
+ while( op != opend ) \
+ { \
+ const double* const rp = rp0 + rpos; \
+ op[ 0 ] = rp[ 0 ];
+
+#define R8BHBC2 \
+ rpos = ( rpos + 1 ) & BufLenMask; \
+ op += 2; \
+ } \
+ }
+
+#include "CDSPHBUpsampler.inc"
+
+#undef R8BHBC1
+#undef R8BHBC2
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPHBUPSAMPLER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
new file mode 100644
index 00000000..f9431b1e
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
@@ -0,0 +1,711 @@
+// Auto-generated by `genhbc`, do not edit!
+
+#if defined( R8B_SSE2 )
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve3 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve5 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve7 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve9 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve11 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve13 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+ _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ),
+ _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+#elif defined( R8B_NEON )
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve3 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve5 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve7 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve9 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve11 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve13 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+ vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ),
+ vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+#else // SIMD
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]);
+R8BHBC2
+
+R8BHBC1( convolve3 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]);
+R8BHBC2
+
+R8BHBC1( convolve5 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]);
+R8BHBC2
+
+R8BHBC1( convolve7 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]);
+R8BHBC2
+
+R8BHBC1( convolve9 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]);
+R8BHBC2
+
+R8BHBC1( convolve11 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+ + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]);
+R8BHBC2
+
+R8BHBC1( convolve13 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+ + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+ + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+ + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+ + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+ + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+ + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+ + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+ + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+ + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+ + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+ + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+ + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+ + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+ + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ])
+ + flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]);
+R8BHBC2
+
+#endif // SIMD
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
new file mode 100644
index 00000000..51c7cbc9
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
@@ -0,0 +1,103 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPProcessor.h
+ *
+ * @brief The base virtual class for DSP processing algorithms.
+ *
+ * This file includes the base virtual class for DSP processing algorithm
+ * classes like FIR filtering and interpolation.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPPROCESSOR_INCLUDED
+#define R8B_CDSPPROCESSOR_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @brief The base virtual class for DSP processing algorithms.
+ *
+ * This class can be used as a base class for various DSP processing
+ * algorithms (processors). DSP processors that are derived from this class
+ * can be seamlessly integrated into various DSP processing graphs.
+ */
+
+class CDSPProcessor : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPProcessor );
+
+public:
+ CDSPProcessor()
+ {
+ }
+
+ virtual ~CDSPProcessor()
+ {
+ }
+
+ /**
+ * @return The latency, in samples, which is present in the output signal.
+ * This value is usually zero if the DSP processor "consumes" the latency
+ * automatically.
+ */
+
+ virtual int getLatency() const = 0;
+
+ /**
+ * @return Fractional latency, in samples, which is present in the output
+ * signal. This value is usually zero if a linear-phase filtering is used.
+ * With minimum-phase filters in use, this value can be non-zero even if
+ * the getLatency() function returns zero.
+ */
+
+ virtual double getLatencyFrac() const = 0;
+
+ /**
+ * @param MaxInLen The number of samples planned to process at once, at
+ * most.
+ * @return The maximal length of the output buffer required when
+ * processing the "MaxInLen" number of input samples.
+ */
+
+ virtual int getMaxOutLen( const int MaxInLen ) const = 0;
+
+ /**
+ * Function clears (resets) the state of *this object and returns it to
+ * the state after construction. All input data accumulated in the
+ * internal buffer so far will be discarded.
+ */
+
+ virtual void clear() = 0;
+
+ /**
+ * Function performs DSP processing.
+ *
+ * @param ip Input data pointer.
+ * @param l0 How many samples to process.
+ * @param[out] op0 Output data pointer. The capacity of this buffer should
+ * be equal to the value returned by the getMaxOutLen() function for the
+ * given "l0". This buffer can be equal to "ip" only if the
+ * getMaxOutLen( l0 ) function returned a value lesser than "l0". This
+ * pointer can be incremented on function's return if latency compensation
+ * was performed by the processor. Note that on function's return, this
+ * pointer may point to some internal buffers, including the "ip" buffer,
+ * ignoring the originally passed value.
+ * @return The number of output samples written to the "op0" buffer and
+ * available after processing. This value can be smaller or larger in
+ * comparison to the original "l0" value due to processing and filter's
+ * latency compensation that took place, and due to resampling if it was
+ * performed.
+ */
+
+ virtual int process( double* ip, int l0, double*& op0 ) = 0;
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPPROCESSOR_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
new file mode 100644
index 00000000..5d88fa33
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
@@ -0,0 +1,820 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPRealFFT.h
+ *
+ * @brief Real-valued FFT transform class.
+ *
+ * This file includes FFT object implementation. All created FFT objects are
+ * kept in a global list after use, for a future reusal. Such approach
+ * minimizes time necessary to initialize the FFT object of the required
+ * length.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPREALFFT_INCLUDED
+#define R8B_CDSPREALFFT_INCLUDED
+
+#include "r8bbase.h"
+
+#if !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
+ #include "fft4g.h"
+#endif // !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
+
+#if R8B_PFFFT
+ #include "pffft.h"
+#endif // R8B_PFFFT
+
+#if R8B_PFFFT_DOUBLE
+ #include "pffft_double/pffft_double.h"
+#endif // R8B_PFFFT_DOUBLE
+
+namespace r8b {
+
+/**
+ * @brief Real-valued FFT transform class.
+ *
+ * Class implements a wrapper for real-valued discrete fast Fourier transform
+ * functions. The object of this class can only be obtained via the
+ * CDSPRealFFTKeeper class.
+ *
+ * Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ *
+ * Also uses Intel IPP library functions if available (if the R8B_IPP=1 macro
+ * was defined). Note that IPP library's FFT functions are 2-3 times more
+ * efficient on the modern Intel Core i7-3770K processor than Ooura's
+ * functions. It may be worthwhile investing in IPP. Note, that FFT functions
+ * take less than 20% of the overall sample rate conversion time. However,
+ * when the "power of 2" resampling is used the performance of FFT functions
+ * becomes "everything".
+ */
+
+class CDSPRealFFT : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPRealFFT );
+
+ friend class CDSPRealFFTKeeper;
+
+public:
+ /**
+ * @return A multiplication constant that should be used after inverse
+ * transform to obtain a correct value scale.
+ */
+
+ double getInvMulConst() const
+ {
+ return( InvMulConst );
+ }
+
+ /**
+ * @return The length (the number of real values in a transform) of *this
+ * FFT object, expressed as Nth power of 2.
+ */
+
+ int getLenBits() const
+ {
+ return( LenBits );
+ }
+
+ /**
+ * @return The length (the number of real values in a transform) of *this
+ * FFT object.
+ */
+
+ int getLen() const
+ {
+ return( Len );
+ }
+
+ /**
+ * Function performs in-place forward FFT.
+ *
+ * @param[in,out] p Pointer to data block to transform, length should be
+ * equal to *this object's getLen().
+ */
+
+ void forward( double* const p ) const
+ {
+ #if R8B_FLOATFFT
+
+ float* const op = (float*) p;
+ int i;
+
+ for( i = 0; i < Len; i++ )
+ {
+ op[ i ] = (float) p[ i ];
+ }
+
+ #endif // R8B_FLOATFFT
+
+ #if R8B_IPP
+
+ ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer );
+
+ #elif R8B_PFFFT
+
+ pffft_transform_ordered( setup, op, op, work, PFFFT_FORWARD );
+
+ #elif R8B_PFFFT_DOUBLE
+
+ pffftd_transform_ordered( setup, p, p, work, PFFFT_FORWARD );
+
+ #else // R8B_PFFFT_DOUBLE
+
+ ooura_fft :: rdft( Len, 1, p, wi.getPtr(), wd.getPtr() );
+
+ #endif // R8B_IPP
+ }
+
+ /**
+ * Function performs in-place inverse FFT.
+ *
+ * @param[in,out] p Pointer to data block to transform, length should be
+ * equal to *this object's getLen().
+ */
+
+ void inverse( double* const p ) const
+ {
+ #if R8B_IPP
+
+ ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer );
+
+ #elif R8B_PFFFT
+
+ pffft_transform_ordered( setup, (float*) p, (float*) p, work,
+ PFFFT_BACKWARD );
+
+ #elif R8B_PFFFT_DOUBLE
+
+ pffftd_transform_ordered( setup, p, p, work, PFFFT_BACKWARD );
+
+ #else // R8B_PFFFT_DOUBLE
+
+ ooura_fft :: rdft( Len, -1, p, wi.getPtr(), wd.getPtr() );
+
+ #endif // R8B_IPP
+
+ #if R8B_FLOATFFT
+
+ const float* const ip = (const float*) p;
+ int i;
+
+ for( i = Len - 1; i >= 0; i-- )
+ {
+ p[ i ] = ip[ i ];
+ }
+
+ #endif // R8B_FLOATFFT
+ }
+
+ /**
+ * Function multiplies two complex-valued data blocks and places result in
+ * a new data block. Length of all data blocks should be equal to *this
+ * object's block length. Input blocks should have been produced with the
+ * forward() function of *this object.
+ *
+ * @param aip1 Input data block 1.
+ * @param aip2 Input data block 2.
+ * @param[out] aop Output data block, should not be equal to aip1 nor
+ * aip2.
+ */
+
+ void multiplyBlocks( const double* const aip1, const double* const aip2,
+ double* const aop ) const
+ {
+ #if R8B_FLOATFFT
+
+ const float* const ip1 = (const float*) aip1;
+ const float* const ip2 = (const float*) aip2;
+ float* const op = (float*) aop;
+
+ #else // R8B_FLOATFFT
+
+ const double* const ip1 = aip1;
+ const double* const ip2 = aip2;
+ double* const op = aop;
+
+ #endif // R8B_FLOATFFT
+
+ #if R8B_IPP
+
+ ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len );
+
+ #else // R8B_IPP
+
+ op[ 0 ] = ip1[ 0 ] * ip2[ 0 ];
+ op[ 1 ] = ip1[ 1 ] * ip2[ 1 ];
+
+ int i = 2;
+
+ while( i < Len )
+ {
+ op[ i ] = ip1[ i ] * ip2[ i ] - ip1[ i + 1 ] * ip2[ i + 1 ];
+ op[ i + 1 ] = ip1[ i ] * ip2[ i + 1 ] + ip1[ i + 1 ] * ip2[ i ];
+ i += 2;
+ }
+
+ #endif // R8B_IPP
+ }
+
+ /**
+ * Function multiplies two complex-valued data blocks in-place. Length of
+ * both data blocks should be equal to *this object's block length. Blocks
+ * should have been produced with the forward() function of *this object.
+ *
+ * @param aip Input data block 1.
+ * @param[in,out] aop Output/input data block 2.
+ */
+
+ void multiplyBlocks( const double* const aip, double* const aop ) const
+ {
+ #if R8B_FLOATFFT
+
+ const float* const ip = (const float*) aip;
+ float* const op = (float*) aop;
+ float t;
+
+ #else // R8B_FLOATFFT
+
+ const double* const ip = aip;
+ double* const op = aop;
+
+ #if !R8B_IPP
+ double t;
+ #endif // !R8B_IPP
+
+ #endif // R8B_FLOATFFT
+
+ #if R8B_IPP
+
+ ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len );
+
+ #else // R8B_IPP
+
+ op[ 0 ] *= ip[ 0 ];
+ op[ 1 ] *= ip[ 1 ];
+
+ int i = 2;
+
+ while( i < Len )
+ {
+ t = op[ i ] * ip[ i ] - op[ i + 1 ] * ip[ i + 1 ];
+ op[ i + 1 ] = op[ i ] * ip[ i + 1 ] + op[ i + 1 ] * ip[ i ];
+ op[ i ] = t;
+ i += 2;
+ }
+
+ #endif // R8B_IPP
+ }
+
+ /**
+ * Function multiplies two complex-valued data blocks in-place,
+ * considering that the "ip" block contains "zero-phase" response. Length
+ * of both data blocks should be equal to *this object's block length.
+ * Blocks should have been produced with the forward() function of *this
+ * object.
+ *
+ * @param aip Input data block 1, "zero-phase" response. This block should
+ * be first transformed via the convertToZP() function.
+ * @param[in,out] aop Output/input data block 2.
+ */
+
+ void multiplyBlocksZP( const double* const aip, double* const aop ) const
+ {
+ #if R8B_FLOATFFT
+
+ const float* const ip = (const float*) aip;
+ float* const op = (float*) aop;
+
+ #else // R8B_FLOATFFT
+
+ const double* ip = aip;
+ double* op = aop;
+
+ #endif // R8B_FLOATFFT
+
+ // SIMD implementations assume that pointers are address-aligned.
+
+ #if !R8B_FLOATFFT && defined( R8B_SSE2 )
+
+ int c8 = Len >> 3;
+
+ while( c8 != 0 )
+ {
+ const __m128d iv1 = _mm_load_pd( ip );
+ const __m128d iv2 = _mm_load_pd( ip + 2 );
+ const __m128d ov1 = _mm_load_pd( op );
+ const __m128d ov2 = _mm_load_pd( op + 2 );
+ _mm_store_pd( op, _mm_mul_pd( iv1, ov1 ));
+ _mm_store_pd( op + 2, _mm_mul_pd( iv2, ov2 ));
+
+ const __m128d iv3 = _mm_load_pd( ip + 4 );
+ const __m128d ov3 = _mm_load_pd( op + 4 );
+ const __m128d iv4 = _mm_load_pd( ip + 6 );
+ const __m128d ov4 = _mm_load_pd( op + 6 );
+ _mm_store_pd( op + 4, _mm_mul_pd( iv3, ov3 ));
+ _mm_store_pd( op + 6, _mm_mul_pd( iv4, ov4 ));
+
+ ip += 8;
+ op += 8;
+ c8--;
+ }
+
+ int c = Len & 7;
+
+ while( c != 0 )
+ {
+ *op *= *ip;
+ ip++;
+ op++;
+ c--;
+ }
+
+ #elif !R8B_FLOATFFT && defined( R8B_NEON )
+
+ int c8 = Len >> 3;
+
+ while( c8 != 0 )
+ {
+ const float64x2_t iv1 = vld1q_f64( ip );
+ const float64x2_t iv2 = vld1q_f64( ip + 2 );
+ const float64x2_t ov1 = vld1q_f64( op );
+ const float64x2_t ov2 = vld1q_f64( op + 2 );
+ vst1q_f64( op, vmulq_f64( iv1, ov1 ));
+ vst1q_f64( op + 2, vmulq_f64( iv2, ov2 ));
+
+ const float64x2_t iv3 = vld1q_f64( ip + 4 );
+ const float64x2_t iv4 = vld1q_f64( ip + 6 );
+ const float64x2_t ov3 = vld1q_f64( op + 4 );
+ const float64x2_t ov4 = vld1q_f64( op + 6 );
+ vst1q_f64( op + 4, vmulq_f64( iv3, ov3 ));
+ vst1q_f64( op + 6, vmulq_f64( iv4, ov4 ));
+
+ ip += 8;
+ op += 8;
+ c8--;
+ }
+
+ int c = Len & 7;
+
+ while( c != 0 )
+ {
+ *op *= *ip;
+ ip++;
+ op++;
+ c--;
+ }
+
+ #else // SIMD
+
+ int i;
+
+ for( i = 0; i < Len; i++ )
+ {
+ op[ i ] *= ip[ i ];
+ }
+
+ #endif // SIMD
+ }
+
+ /**
+ * Function converts the specified forward-transformed block into
+ * "zero-phase" form suitable for use with the multiplyBlocksZ() function.
+ *
+ * @param[in,out] ap Block to transform.
+ */
+
+ void convertToZP( double* const ap ) const
+ {
+ #if R8B_FLOATFFT
+
+ float* const p = (float*) ap;
+
+ #else // R8B_FLOATFFT
+
+ double* const p = ap;
+
+ #endif // R8B_FLOATFFT
+
+ int i = 2;
+
+ while( i < Len )
+ {
+ p[ i + 1 ] = p[ i ];
+ i += 2;
+ }
+ }
+
+private:
+ int LenBits; ///< Length of FFT block (expressed as Nth power of 2).
+ ///<
+ int Len; ///< Length of FFT block (number of real values).
+ ///<
+ double InvMulConst; ///< Inverse FFT multiply constant.
+ ///<
+ CDSPRealFFT* Next; ///< Next object in a singly-linked list.
+ ///<
+
+ #if R8B_IPP
+ IppsFFTSpec_R_64f* SPtr; ///< Pointer to initialized data buffer
+ ///< to be passed to IPP's FFT functions.
+ ///<
+ CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer.
+ ///<
+ CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer.
+ ///<
+ #elif R8B_PFFFT
+ PFFFT_Setup* setup; ///< PFFFT setup object.
+ ///<
+ CFixedBuffer< float > work; ///< Working buffer.
+ ///<
+ #elif R8B_PFFFT_DOUBLE
+ PFFFTD_Setup* setup; ///< PFFFTD setup object.
+ ///<
+ CFixedBuffer< double > work; ///< Working buffer.
+ ///<
+ #else // R8B_PFFFT_DOUBLE
+ CFixedBuffer< int > wi; ///< Working buffer (ints).
+ ///<
+ CFixedBuffer< double > wd; ///< Working buffer (doubles).
+ ///<
+ #endif // R8B_IPP
+
+ /**
+ * A simple class that keeps the pointer to the object and deletes it
+ * automatically.
+ */
+
+ class CObjKeeper
+ {
+ R8BNOCTOR( CObjKeeper );
+
+ public:
+ CObjKeeper()
+ : Object( NULL )
+ {
+ }
+
+ ~CObjKeeper()
+ {
+ delete Object;
+ }
+
+ CObjKeeper& operator = ( CDSPRealFFT* const aObject )
+ {
+ Object = aObject;
+ return( *this );
+ }
+
+ operator CDSPRealFFT* () const
+ {
+ return( Object );
+ }
+
+ private:
+ CDSPRealFFT* Object; ///< FFT object being kept.
+ ///<
+ };
+
+ CDSPRealFFT()
+ {
+ }
+
+ /**
+ * Constructor initializes FFT object.
+ *
+ * @param aLenBits The length of FFT block (Nth power of 2), specifies the
+ * number of real values in a block. Values from 1 to 30 inclusive are
+ * supported.
+ */
+
+ CDSPRealFFT( const int aLenBits )
+ : LenBits( aLenBits )
+ , Len( 1 << aLenBits )
+ #if R8B_IPP
+ , InvMulConst( 1.0 / Len )
+ #elif R8B_PFFFT
+ , InvMulConst( 1.0 / Len )
+ #elif R8B_PFFFT_DOUBLE
+ , InvMulConst( 1.0 / Len )
+ #else // R8B_PFFFT_DOUBLE
+ , InvMulConst( 2.0 / Len )
+ #endif // R8B_IPP
+ {
+ #if R8B_IPP
+
+ int SpecSize;
+ int SpecBufferSize;
+ int BufferSize;
+
+ ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY,
+ ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize );
+
+ CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize );
+ SpecBuffer.alloc( SpecSize );
+ WorkBuffer.alloc( BufferSize );
+
+ ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY,
+ ippAlgHintFast, SpecBuffer, InitBuffer );
+
+ #elif R8B_PFFFT
+
+ setup = pffft_new_setup( Len, PFFFT_REAL );
+ work.alloc( Len );
+
+ #elif R8B_PFFFT_DOUBLE
+
+ setup = pffftd_new_setup( Len, PFFFT_REAL );
+ work.alloc( Len );
+
+ #else // R8B_PFFFT_DOUBLE
+
+ wi.alloc( (int) ceil( 2.0 + sqrt( (double) ( Len >> 1 ))));
+ wi[ 0 ] = 0;
+ wd.alloc( Len >> 1 );
+
+ #endif // R8B_IPP
+ }
+
+ ~CDSPRealFFT()
+ {
+ #if R8B_PFFFT
+ pffft_destroy_setup( setup );
+ #elif R8B_PFFFT_DOUBLE
+ pffftd_destroy_setup( setup );
+ #endif // R8B_PFFFT_DOUBLE
+
+ delete Next;
+ }
+};
+
+/**
+ * @brief A "keeper" class for real-valued FFT transform objects.
+ *
+ * Class implements "keeper" functionality for handling CDSPRealFFT objects.
+ * The allocated FFT objects are placed on the global static list of objects
+ * for future reuse instead of deallocation.
+ */
+
+class CDSPRealFFTKeeper : public R8B_BASECLASS
+{
+ R8BNOCTOR( CDSPRealFFTKeeper );
+
+public:
+ CDSPRealFFTKeeper()
+ : Object( NULL )
+ {
+ }
+
+ /**
+ * Function acquires FFT object with the specified block length.
+ *
+ * @param LenBits The length of FFT block (Nth power of 2), in the range
+ * [1; 30] inclusive, specifies the number of real values in a FFT block.
+ */
+
+ CDSPRealFFTKeeper( const int LenBits )
+ {
+ Object = acquire( LenBits );
+ }
+
+ ~CDSPRealFFTKeeper()
+ {
+ if( Object != NULL )
+ {
+ release( Object );
+ }
+ }
+
+ /**
+ * @return Pointer to the acquired FFT object.
+ */
+
+ const CDSPRealFFT* operator -> () const
+ {
+ R8BASSERT( Object != NULL );
+
+ return( Object );
+ }
+
+ /**
+ * Function acquires FFT object with the specified block length. This
+ * function can be called any number of times.
+ *
+ * @param LenBits The length of FFT block (Nth power of 2), in the range
+ * [1; 30] inclusive, specifies the number of real values in a FFT block.
+ */
+
+ void init( const int LenBits )
+ {
+ if( Object != NULL )
+ {
+ if( Object -> LenBits == LenBits )
+ {
+ return;
+ }
+
+ release( Object );
+ }
+
+ Object = acquire( LenBits );
+ }
+
+ /**
+ * Function releases a previously acquired FFT object.
+ */
+
+ void reset()
+ {
+ if( Object != NULL )
+ {
+ release( Object );
+ Object = NULL;
+ }
+ }
+
+private:
+ CDSPRealFFT* Object; ///< FFT object.
+ ///<
+
+ static CSyncObject StateSync; ///< FFTObjects synchronizer.
+ ///<
+ static CDSPRealFFT :: CObjKeeper FFTObjects[]; ///< Pool of FFT objects of
+ ///< various lengths.
+ ///<
+
+ /**
+ * Function acquires FFT object from the global pool.
+ *
+ * @param LenBits FFT block length (expressed as Nth power of 2).
+ */
+
+ CDSPRealFFT* acquire( const int LenBits )
+ {
+ R8BASSERT( LenBits > 0 && LenBits <= 30 );
+
+ R8BSYNC( StateSync );
+
+ if( FFTObjects[ LenBits ] == NULL )
+ {
+ return( new CDSPRealFFT( LenBits ));
+ }
+
+ CDSPRealFFT* ffto = FFTObjects[ LenBits ];
+ FFTObjects[ LenBits ] = ffto -> Next;
+
+ return( ffto );
+ }
+
+ /**
+ * Function releases a previously acquired FFT object.
+ *
+ * @param ffto FFT object to release.
+ */
+
+ void release( CDSPRealFFT* const ffto )
+ {
+ R8BSYNC( StateSync );
+
+ ffto -> Next = FFTObjects[ ffto -> LenBits ];
+ FFTObjects[ ffto -> LenBits ] = ffto;
+ }
+};
+
+/**
+ * Function calculates the minimum-phase transform of the filter kernel, using
+ * a discrete Hilbert transform in cepstrum domain.
+ *
+ * For more details, see part III.B of
+ * http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf
+ *
+ * @param[in,out] Kernel Filter kernel buffer.
+ * @param KernelLen Filter kernel's length, in samples.
+ * @param LenMult Kernel length multiplier. Used as a coefficient of the
+ * "oversampling" in the frequency domain. Such oversampling is needed to
+ * improve the precision of the minimum-phase transform. If the filter's
+ * attenuation is high, this multiplier should be increased or otherwise the
+ * required attenuation will not be reached due to "smoothing" effect of this
+ * transform.
+ * @param DoFinalMul "True" if the final multiplication after transform should
+ * be performed or not. Such multiplication returns the gain of the signal to
+ * its original value. This parameter can be set to "false" if normalization
+ * of the resulting filter kernel is planned to be used.
+ * @param[out] DCGroupDelay If not NULL, this variable receives group delay
+ * at DC offset, in samples (can be a non-integer value).
+ */
+
+inline void calcMinPhaseTransform( double* const Kernel, const int KernelLen,
+ const int LenMult = 2, const bool DoFinalMul = true,
+ double* const DCGroupDelay = NULL )
+{
+ R8BASSERT( KernelLen > 0 );
+ R8BASSERT( LenMult >= 2 );
+
+ const int LenBits = getBitOccupancy(( KernelLen * LenMult ) - 1 );
+ const int Len = 1 << LenBits;
+ const int Len2 = Len >> 1;
+ int i;
+
+ CFixedBuffer< double > ip( Len );
+ CFixedBuffer< double > ip2( Len2 + 1 );
+
+ memcpy( &ip[ 0 ], Kernel, KernelLen * sizeof( ip[ 0 ]));
+ memset( &ip[ KernelLen ], 0, ( Len - KernelLen ) * sizeof( ip[ 0 ]));
+
+ CDSPRealFFTKeeper ffto( LenBits );
+ ffto -> forward( ip );
+
+ // Create the "log |c|" spectrum while saving the original power spectrum
+ // in the "ip2" buffer.
+
+ #if R8B_FLOATFFT
+ float* const aip = (float*) &ip[ 0 ];
+ float* const aip2 = (float*) &ip2[ 0 ];
+ const float nzbias = 1e-35;
+ #else // R8B_FLOATFFT
+ double* const aip = &ip[ 0 ];
+ double* const aip2 = &ip2[ 0 ];
+ const double nzbias = 1e-300;
+ #endif // R8B_FLOATFFT
+
+ aip2[ 0 ] = aip[ 0 ];
+ aip[ 0 ] = log( fabs( aip[ 0 ]) + nzbias );
+ aip2[ Len2 ] = aip[ 1 ];
+ aip[ 1 ] = log( fabs( aip[ 1 ]) + nzbias );
+
+ for( i = 1; i < Len2; i++ )
+ {
+ aip2[ i ] = sqrt( aip[ i * 2 ] * aip[ i * 2 ] +
+ aip[ i * 2 + 1 ] * aip[ i * 2 + 1 ]);
+
+ aip[ i * 2 ] = log( aip2[ i ] + nzbias );
+ aip[ i * 2 + 1 ] = 0.0;
+ }
+
+ // Convert to cepstrum and apply discrete Hilbert transform.
+
+ ffto -> inverse( ip );
+
+ const double m1 = ffto -> getInvMulConst();
+ const double m2 = -m1;
+
+ ip[ 0 ] = 0.0;
+
+ for( i = 1; i < Len2; i++ )
+ {
+ ip[ i ] *= m1;
+ }
+
+ ip[ Len2 ] = 0.0;
+
+ for( i = Len2 + 1; i < Len; i++ )
+ {
+ ip[ i ] *= m2;
+ }
+
+ // Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and
+ // perform its exponentiation, multiplied by the power spectrum previously
+ // saved in the "ip2" buffer.
+
+ ffto -> forward( ip );
+
+ aip[ 0 ] = aip2[ 0 ];
+ aip[ 1 ] = aip2[ Len2 ];
+
+ for( i = 1; i < Len2; i++ )
+ {
+ aip[ i * 2 + 0 ] = cos( aip[ i * 2 + 1 ]) * aip2[ i ];
+ aip[ i * 2 + 1 ] = sin( aip[ i * 2 + 1 ]) * aip2[ i ];
+ }
+
+ ffto -> inverse( ip );
+
+ if( DoFinalMul )
+ {
+ for( i = 0; i < KernelLen; i++ )
+ {
+ Kernel[ i ] = ip[ i ] * m1;
+ }
+ }
+ else
+ {
+ memcpy( &Kernel[ 0 ], &ip[ 0 ], KernelLen * sizeof( Kernel[ 0 ]));
+ }
+
+ if( DCGroupDelay != NULL )
+ {
+ double tmp;
+
+ calcFIRFilterResponseAndGroupDelay( Kernel, KernelLen, 0.0,
+ tmp, tmp, *DCGroupDelay );
+ }
+}
+
+} // namespace r8b
+
+#endif // VOX_CDSPREALFFT_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
new file mode 100644
index 00000000..9eb4a43d
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
@@ -0,0 +1,769 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPResampler.h
+ *
+ * @brief The master sample rate converter (resampler) class.
+ *
+ * This file includes the master sample rate converter (resampler) class that
+ * combines all elements of this library into a single front-end class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPRESAMPLER_INCLUDED
+#define R8B_CDSPRESAMPLER_INCLUDED
+
+#include "CDSPHBDownsampler.h"
+#include "CDSPHBUpsampler.h"
+#include "CDSPBlockConvolver.h"
+#include "CDSPFracInterpolator.h"
+
+namespace r8b {
+
+/**
+ * @brief The master sample rate converter (resampler) class.
+ *
+ * This class can be considered the "master" sample rate converter (resampler)
+ * class since it combines all functionality of this library into a single
+ * front-end class to perform sample rate conversion to/from any sample rate,
+ * including non-integer sample rates.
+ *
+ * Note that objects of this class can be constructed on the stack as it has a
+ * small member data size. The default template parameters of this class are
+ * suited for 27-bit fixed point resampling.
+ *
+ * Use the CDSPResampler16 class for 16-bit resampling.
+ *
+ * Use the CDSPResampler16IR class for 16-bit impulse response resampling.
+ *
+ * Use the CDSPResampler24 class for 24-bit resampling (including 32-bit
+ * floating point resampling).
+ */
+
+class CDSPResampler : public CDSPProcessor
+{
+public:
+ /**
+ * Constructor initalizes the resampler object.
+ *
+ * Note that increasing the transition band and decreasing attenuation
+ * reduces the filter length, this in turn reduces the "input before
+ * output" delay. However, the filter length has only a minor influence on
+ * the overall resampling speed.
+ *
+ * It should be noted that the ReqAtten specifies the minimal difference
+ * between the loudest input signal component and the produced aliasing
+ * artifacts during resampling. For example, if ReqAtten=100 was specified
+ * when performing 2x upsampling, the analysis of the resulting signal may
+ * display high-frequency components which are quieter than the loudest
+ * part of the input signal by only 100 decibel meaning the high-frequency
+ * part did not become "magically" completely silent after resampling. You
+ * have to specify a higher ReqAtten value if you need a totally clean
+ * high-frequency content. On the other hand, it may not be reasonable to
+ * have a high-frequency content cleaner than the input signal itself: if
+ * the input signal is 16-bit, setting ReqAtten to 180 will make its
+ * high-frequency content 24-bit, but the original part of the signal will
+ * remain 16-bit.
+ *
+ * @param SrcSampleRate Source signal sample rate. Both sample rates can
+ * be specified as a ratio, e.g. SrcSampleRate = 1.0, DstSampleRate = 2.0.
+ * @param DstSampleRate Destination signal sample rate. The "power of 2"
+ * ratios between the source and destination sample rates force resampler
+ * to use several fast "power of 2" resampling steps, without using
+ * fractional interpolation at all.
+ * @param aMaxInLen The maximal planned length of the input buffer (in
+ * samples) that will be passed to the resampler. The resampler relies on
+ * this value as it allocates intermediate buffers. Input buffers longer
+ * than this value should never be supplied to the resampler. Note that
+ * upsampling produces more samples than was provided on input, so at
+ * higher upsampling ratios it is advisable to use smaller MaxInLen
+ * values to reduce memory footprint. When downsampling, a larger MaxInLen
+ * is suggested in order to increase downsampling performance.
+ * @param ReqTransBand Required transition band, in percent of the
+ * spectral space of the input signal (or the output signal if
+ * downsampling is performed) between filter's -3 dB point and the Nyquist
+ * frequency. The range is from CDSPFIRFilter::getLPMinTransBand() to
+ * CDSPFIRFilter::getLPMaxTransBand(), inclusive. When upsampling 88200 or
+ * 96000 audio to a higher sample rates the ReqTransBand can be
+ * considerably increased, up to 30. The selection of ReqTransBand depends
+ * on the level of desire to preserve the high-frequency content. While
+ * values 0.5 to 2 are extremely "greedy" settings, not necessary in most
+ * cases, values 2 to 3 can be used in most cases. Values 3 to 4 are
+ * relaxed settings, but they still offer a flat frequency response up to
+ * 21kHz with 44.1k source or destination sample rate.
+ * @param ReqAtten Required stop-band attenuation in decibel, in the
+ * range CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
+ * inclusive. The actual attenuation may be 0.40-4.46 dB higher. The
+ * general formula for selecting the ReqAtten is 6.02 * Bits + 40, where
+ * "Bits" is the bit resolution (e.g. 16, 24), "40" is an added resolution
+ * for dynamic signals; this value can be decreased to 20 to 10 if the
+ * signal being resampled is non-dynamic (e.g., an impulse response or
+ * filter, with a non-steep frequency response).
+ * @param ReqPhase Required filter's phase response. Note that this
+ * setting does not affect interpolator's phase response which is always
+ * linear-phase. Also note that if the "power of 2" resampling was engaged
+ * by the resampler together with the minimum-phase response, the audio
+ * stream may become fractionally delayed, depending on the minimum-phase
+ * filter's actual fractional delay. Linear-phase filters do not have
+ * fractional delay.
+ * @see CDSPFIRFilterCache::getLPFilter()
+ */
+
+ CDSPResampler( const double SrcSampleRate, const double DstSampleRate,
+ const int aMaxInLen, const double ReqTransBand = 2.0,
+ const double ReqAtten = 206.91,
+ const EDSPFilterPhaseResponse ReqPhase = fprLinearPhase )
+ : StepCapacity( 0 )
+ , StepCount( 0 )
+ , MaxInLen( aMaxInLen )
+ , CurMaxOutLen( aMaxInLen )
+ , LatencyFrac( 0.0 )
+ {
+ R8BASSERT( SrcSampleRate > 0.0 );
+ R8BASSERT( DstSampleRate > 0.0 );
+ R8BASSERT( MaxInLen > 0 );
+
+ R8BCONSOLE( "* CDSPResampler: src=%.1f dst=%.1f len=%i tb=%.1f "
+ "att=%.2f ph=%i\n", SrcSampleRate, DstSampleRate, aMaxInLen,
+ ReqTransBand, ReqAtten, (int) ReqPhase );
+
+ if( SrcSampleRate == DstSampleRate )
+ {
+ return;
+ }
+
+ TmpBufCapacities[ 0 ] = 0;
+ TmpBufCapacities[ 1 ] = 0;
+ CurTmpBuf = 0;
+
+ // Try some common efficient ratios requiring only a single step.
+
+ const int CommonRatioCount = 5;
+ const int CommonRatios[ CommonRatioCount ][ 2 ] = {
+ { 1, 2 },
+ { 1, 3 },
+ { 2, 3 },
+ { 3, 2 },
+ { 3, 4 }
+ };
+
+ int i;
+
+ for( i = 0; i < CommonRatioCount; i++ )
+ {
+ const int num = CommonRatios[ i ][ 0 ];
+ const int den = CommonRatios[ i ][ 1 ];
+
+ if( SrcSampleRate * num == DstSampleRate * den )
+ {
+ addProcessor( new CDSPBlockConvolver(
+ CDSPFIRFilterCache :: getLPFilter(
+ 1.0 / ( num > den ? num : den ), ReqTransBand,
+ ReqAtten, ReqPhase, num ), num, den, LatencyFrac ));
+
+ createTmpBuffers();
+ return;
+ }
+ }
+
+ // Try whole-number power-of-2 or 3*power-of-2 upsampling.
+
+ for( i = 2; i <= 3; i++ )
+ {
+ bool WasFound = false;
+ int c = 0;
+
+ while( true )
+ {
+ const double NewSR = SrcSampleRate * ( i << c );
+
+ if( NewSR == DstSampleRate )
+ {
+ WasFound = true;
+ break;
+ }
+
+ if( NewSR > DstSampleRate )
+ {
+ break;
+ }
+
+ c++;
+ }
+
+ if( WasFound )
+ {
+ addProcessor( new CDSPBlockConvolver(
+ CDSPFIRFilterCache :: getLPFilter( 1.0 / i, ReqTransBand,
+ ReqAtten, ReqPhase, i ), i, 1, LatencyFrac ));
+
+ const bool IsThird = ( i == 3 );
+
+ for( i = 0; i < c; i++ )
+ {
+ addProcessor( new CDSPHBUpsampler( ReqAtten, i, IsThird,
+ LatencyFrac ));
+ }
+
+ createTmpBuffers();
+ return;
+ }
+ }
+
+ if( DstSampleRate * 2.0 > SrcSampleRate )
+ {
+ // Upsampling or fractional downsampling down to 2X.
+
+ const double NormFreq = ( DstSampleRate > SrcSampleRate ? 0.5 :
+ 0.5 * DstSampleRate / SrcSampleRate );
+
+ addProcessor( new CDSPBlockConvolver(
+ CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
+ ReqAtten, ReqPhase, 2.0 ), 2, 1, LatencyFrac ));
+
+ // Try intermediate interpolated resampling with subsequent 2X
+ // or 3X upsampling.
+
+ const double tbw = 0.0175; // Intermediate filter's transition
+ // band extension coefficient.
+ const double ThreshSampleRate = SrcSampleRate /
+ ( 1.0 - tbw * ReqTransBand ); // Make sure intermediate
+ // filter's transition band is not steeper than ReqTransBand
+ // (this keeps the latency under control).
+
+ int c = 0;
+ int div = 1;
+
+ while( true )
+ {
+ const int ndiv = div * 2;
+
+ if( DstSampleRate < ThreshSampleRate * ndiv )
+ {
+ break;
+ }
+
+ div = ndiv;
+ c++;
+ }
+
+ int c2 = 0;
+ int div2 = 1;
+
+ while( true )
+ {
+ const int ndiv = div * ( c2 == 0 ? 3 : 2 );
+
+ if( DstSampleRate < ThreshSampleRate * ndiv )
+ {
+ break;
+ }
+
+ div2 = ndiv;
+ c2++;
+ }
+
+ const double SrcSampleRate2 = SrcSampleRate * 2.0;
+ int tmp1;
+ int tmp2;
+
+ if( c == 1 && getWholeStepping( SrcSampleRate2, DstSampleRate,
+ tmp1, tmp2 ))
+ {
+ // Do not use intermediate interpolation if whole stepping is
+ // available as it performs very fast.
+
+ c = 0;
+ }
+
+ if( c > 0 )
+ {
+ // Add steps using intermediate interpolation.
+
+ int num;
+
+ if( c2 > 0 && div2 > div )
+ {
+ div = div2;
+ c = c2;
+ num = 3;
+ }
+ else
+ {
+ num = 2;
+ }
+
+ addProcessor( new CDSPFracInterpolator( SrcSampleRate2 * div,
+ DstSampleRate, ReqAtten, false, LatencyFrac ));
+
+ double tb = ( 1.0 - SrcSampleRate * div / DstSampleRate ) /
+ tbw; // Divide TransBand by a constant that assures a
+ // linear response in the pass-band.
+
+ if( tb > CDSPFIRFilter :: getLPMaxTransBand() )
+ {
+ tb = CDSPFIRFilter :: getLPMaxTransBand();
+ }
+
+ addProcessor( new CDSPBlockConvolver(
+ CDSPFIRFilterCache :: getLPFilter( 1.0 / num, tb,
+ ReqAtten, ReqPhase, num ), num, 1, LatencyFrac ));
+
+ const bool IsThird = ( num == 3 );
+
+ for( i = 1; i < c; i++ )
+ {
+ addProcessor( new CDSPHBUpsampler( ReqAtten, i - 1,
+ IsThird, LatencyFrac ));
+ }
+ }
+ else
+ {
+ addProcessor( new CDSPFracInterpolator( SrcSampleRate2,
+ DstSampleRate, ReqAtten, false, LatencyFrac ));
+ }
+
+ createTmpBuffers();
+ return;
+ }
+
+ // Use downsampling steps, including power-of-2 downsampling.
+
+ double CheckSR = DstSampleRate * 4.0;
+ int c = 0;
+ double FinGain = 1.0;
+
+ while( CheckSR <= SrcSampleRate )
+ {
+ c++;
+ CheckSR *= 2.0;
+ FinGain *= 0.5;
+ }
+
+ const int SrcSRDiv = ( 1 << c );
+ int downf;
+ double NormFreq = 0.5;
+ bool UseInterp = true;
+ bool IsThird = false;
+
+ for( downf = 2; downf <= 3; downf++ )
+ {
+ if( DstSampleRate * SrcSRDiv * downf == SrcSampleRate )
+ {
+ NormFreq = 1.0 / downf;
+ UseInterp = false;
+ IsThird = ( downf == 3 );
+ break;
+ }
+ }
+
+ if( UseInterp )
+ {
+ downf = 1;
+ NormFreq = DstSampleRate * SrcSRDiv / SrcSampleRate;
+ IsThird = ( NormFreq * 3.0 <= 1.0 );
+ }
+
+ for( i = 0; i < c; i++ )
+ {
+ // Use a fixed very relaxed 2X downsampling filters, that at
+ // the final stage only guarantees stop-band between 0.75 and
+ // pi. 0.5-0.75 range will be aliased to 0.25-0.5 range which
+ // will then be filtered out by the final filter.
+
+ addProcessor( new CDSPHBDownsampler( ReqAtten, c - 1 - i, IsThird,
+ LatencyFrac ));
+ }
+
+ addProcessor( new CDSPBlockConvolver(
+ CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
+ ReqAtten, ReqPhase, FinGain ), 1, downf, LatencyFrac ));
+
+ if( UseInterp )
+ {
+ addProcessor( new CDSPFracInterpolator( SrcSampleRate,
+ DstSampleRate * SrcSRDiv, ReqAtten, IsThird, LatencyFrac ));
+ }
+
+ createTmpBuffers();
+ }
+
+ virtual ~CDSPResampler()
+ {
+ int i;
+
+ for( i = 0; i < StepCount; i++ )
+ {
+ delete Steps[ i ];
+ }
+ }
+
+ virtual int getLatency() const
+ {
+ return( 0 );
+ }
+
+ virtual double getLatencyFrac() const
+ {
+ return( LatencyFrac );
+ }
+
+ /**
+ * This function ignores the supplied parameter and returns the maximal
+ * output buffer length that depends on the MaxInLen supplied to the
+ * constructor.
+ */
+
+ virtual int getMaxOutLen( const int/* MaxInLen */ ) const
+ {
+ return( CurMaxOutLen );
+ }
+
+ /**
+ * Function clears (resets) the state of *this object and returns it to
+ * the state after construction. All input data accumulated in the
+ * internal buffer so far will be discarded.
+ *
+ * This function makes it possible to use *this object for converting
+ * separate streams from the same source sample rate to the same
+ * destination sample rate without reconstructing the object. It is more
+ * efficient to clear the state of the resampler object than to destroy it
+ * and create a new object.
+ */
+
+ virtual void clear()
+ {
+ int i;
+
+ for( i = 0; i < StepCount; i++ )
+ {
+ Steps[ i ] -> clear();
+ }
+ }
+
+ /**
+ * Function performs sample rate conversion.
+ *
+ * If the source and destination sample rates are equal, the resampler
+ * will do nothing and will simply return the input buffer unchanged.
+ *
+ * You do not need to allocate an intermediate output buffer for use with
+ * this function. If required, the resampler will allocate a suitable
+ * intermediate output buffer itself.
+ *
+ * @param ip0 Input buffer. This buffer is never used as output buffer by
+ * this function. This pointer may be returned in "op0" if no resampling
+ * is happening (source sample rate equals destination sample rate).
+ * @param l The number of samples available in the input buffer. Should
+ * not exceed the MaxInLen supplied in the constructor.
+ * @param[out] op0 This variable receives the pointer to the resampled
+ * data. On function's return, this pointer points to *this object's
+ * internal buffer. In real-time applications it is suggested to pass this
+ * pointer to the next output audio block and consume any data left from
+ * the previous output audio block first before calling the process()
+ * function again. The buffer pointed to by the "op0" on return is owned
+ * by the resampler, so it should not be freed by the caller.
+ * @return The number of samples available in the "op0" output buffer. If
+ * the data from the output buffer "op0" is going to be written to a
+ * bigger output buffer, it is suggested to check the returned number of
+ * samples so that no overflow of the bigger output buffer happens.
+ */
+
+ virtual int process( double* ip0, int l, double*& op0 )
+ {
+ R8BASSERT( l >= 0 );
+
+ double* ip = ip0;
+ int i;
+
+ for( i = 0; i < StepCount; i++ )
+ {
+ double* op = TmpBufs[ i & 1 ];
+ l = Steps[ i ] -> process( ip, l, op );
+ ip = op;
+ }
+
+ op0 = ip;
+ return( l );
+ }
+
+ /**
+ * Function performs resampling of an input sample buffer of the specified
+ * length in the "one-shot" mode. This function can be useful when impulse
+ * response resampling is required.
+ *
+ * @param ip Input buffer pointer.
+ * @param iplen Length of the input buffer in samples.
+ * @param[out] op Output buffer pointer.
+ * @param oplen Length of the output buffer in samples.
+ * @tparam Tin Input buffer's element type.
+ * @tparam Tout Output buffer's element type.
+ */
+
+ template< typename Tin, typename Tout >
+ void oneshot( const Tin* ip, int iplen, Tout* op, int oplen )
+ {
+ CFixedBuffer< double > Buf( MaxInLen );
+ bool IsZero = false;
+
+ while( oplen > 0 )
+ {
+ int rc;
+ double* p;
+ int i;
+
+ if( iplen == 0 )
+ {
+ rc = MaxInLen;
+ p = &Buf[ 0 ];
+
+ if( !IsZero )
+ {
+ IsZero = true;
+ memset( p, 0, MaxInLen * sizeof( p[ 0 ]));
+ }
+ }
+ else
+ {
+ rc = min( iplen, MaxInLen );
+
+ if( sizeof( Tin ) == sizeof( double ))
+ {
+ p = (double*) ip;
+ }
+ else
+ {
+ p = &Buf[ 0 ];
+
+ for( i = 0; i < rc; i++ )
+ {
+ p[ i ] = ip[ i ];
+ }
+ }
+
+ ip += rc;
+ iplen -= rc;
+ }
+
+ double* op0;
+ int wc = process( p, rc, op0 );
+ wc = min( oplen, wc );
+
+ for( i = 0; i < wc; i++ )
+ {
+ op[ i ] = (Tout) op0[ i ];
+ }
+
+ op += wc;
+ oplen -= wc;
+ }
+
+ clear();
+ }
+
+ /**
+ * Function obtains overall input sample count required to produce first
+ * output sample. Function works by iteratively passing 1 sample at a time
+ * until output begins. This is a relatively CPU-consuming operation. This
+ * function should be called after the clear() function call or after
+ * object's construction. The function itself calls the clear() function
+ * before return.
+ *
+ * Note that it is advisable to cache the value returned by this function,
+ * for each SrcSampleRate/DstSampleRate pair, if it is called frequently.
+ */
+
+ int getInLenBeforeOutStart()
+ {
+ int inc = 0;
+
+ while( true )
+ {
+ double ins = 0.0;
+ double* op;
+
+ if( process( &ins, 1, op ) > 0 )
+ {
+ clear();
+ return( inc );
+ }
+
+ inc++;
+ }
+ }
+
+private:
+ CFixedBuffer< CDSPProcessor* > Steps; ///< Array of processing steps.
+ ///<
+ int StepCapacity; ///< The capacity of the Steps array.
+ ///<
+ int StepCount; ///< The number of created processing steps.
+ ///<
+ int MaxInLen; ///< Maximal input length.
+ ///<
+ CFixedBuffer< double > TmpBufAll; ///< Buffer containing both temporary
+ ///< buffers.
+ ///<
+ double* TmpBufs[ 2 ]; ///< Temporary output buffers.
+ ///<
+ int TmpBufCapacities[ 2 ]; ///< Capacities of temporary buffers, updated
+ ///< during processing steps building.
+ ///<
+ int CurTmpBuf; ///< Current temporary buffer.
+ ///<
+ int CurMaxOutLen; ///< Current maximal output length.
+ ///<
+ double LatencyFrac; ///< Current fractional latency. After object's
+ ///< construction, equals to the remaining fractional latency in the
+ ///< output.
+ ///<
+
+ /**
+ * Function adds processor, updates MaxOutLen variable and adjusts length
+ * of temporary internal buffers.
+ *
+ * @param Proc Processor to add. This pointer is inherited and will be
+ * destroyed on *this object's destruction.
+ */
+
+ void addProcessor( CDSPProcessor* const Proc )
+ {
+ if( StepCount == StepCapacity )
+ {
+ // Reallocate and increase Steps array's capacity.
+
+ const int NewCapacity = StepCapacity + 8;
+ Steps.realloc( StepCapacity, NewCapacity );
+ StepCapacity = NewCapacity;
+ }
+
+ LatencyFrac = Proc -> getLatencyFrac();
+ CurMaxOutLen = Proc -> getMaxOutLen( CurMaxOutLen );
+
+ if( CurMaxOutLen > TmpBufCapacities[ CurTmpBuf ])
+ {
+ TmpBufCapacities[ CurTmpBuf ] = CurMaxOutLen;
+ }
+
+ CurTmpBuf ^= 1;
+
+ Steps[ StepCount ] = Proc;
+ StepCount++;
+ }
+
+ /**
+ * Function creates temporary buffers.
+ */
+
+ void createTmpBuffers()
+ {
+ const int ol = TmpBufCapacities[ 0 ] + TmpBufCapacities[ 1 ];
+
+ if( ol > 0 )
+ {
+ TmpBufAll.alloc( ol );
+ TmpBufs[ 0 ] = &TmpBufAll[ 0 ];
+ TmpBufs[ 1 ] = &TmpBufAll[ TmpBufCapacities[ 0 ]];
+ }
+
+ R8BCONSOLE( "* CDSPResampler: init done\n" );
+ }
+};
+
+/**
+ * @brief The resampler class for 16-bit resampling.
+ *
+ * This class defines resampling parameters suitable for 16-bit resampling,
+ * using linear-phase low-pass filter. See the r8b::CDSPResampler class for
+ * details.
+ */
+
+class CDSPResampler16 : public CDSPResampler
+{
+public:
+ /**
+ * Constructor initializes the 16-bit resampler. See the
+ * r8b::CDSPResampler class for details.
+ *
+ * @param SrcSampleRate Source signal sample rate.
+ * @param DstSampleRate Destination signal sample rate.
+ * @param aMaxInLen The maximal planned length of the input buffer (in
+ * samples) that will be passed to the resampler.
+ * @param ReqTransBand Required transition band, in percent.
+ */
+
+ CDSPResampler16( const double SrcSampleRate, const double DstSampleRate,
+ const int aMaxInLen, const double ReqTransBand = 2.0 )
+ : CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+ 136.45, fprLinearPhase )
+ {
+ }
+};
+
+/**
+ * @brief The resampler class for 16-bit impulse response resampling.
+ *
+ * This class defines resampling parameters suitable for 16-bit impulse
+ * response resampling, using linear-phase low-pass filter. Impulse responses
+ * are non-dynamic signals, and thus need resampler with a lesser SNR. See the
+ * r8b::CDSPResampler class for details.
+ */
+
+class CDSPResampler16IR : public CDSPResampler
+{
+public:
+ /**
+ * Constructor initializes the 16-bit impulse response resampler. See the
+ * r8b::CDSPResampler class for details.
+ *
+ * @param SrcSampleRate Source signal sample rate.
+ * @param DstSampleRate Destination signal sample rate.
+ * @param aMaxInLen The maximal planned length of the input buffer (in
+ * samples) that will be passed to the resampler.
+ * @param ReqTransBand Required transition band, in percent.
+ */
+
+ CDSPResampler16IR( const double SrcSampleRate, const double DstSampleRate,
+ const int aMaxInLen, const double ReqTransBand = 2.0 )
+ : CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+ 109.56, fprLinearPhase )
+ {
+ }
+};
+
+/**
+ * @brief The resampler class for 24-bit resampling.
+ *
+ * This class defines resampling parameters suitable for 24-bit resampling
+ * (including 32-bit floating point resampling), using linear-phase low-pass
+ * filter. See the r8b::CDSPResampler class for details.
+ */
+
+class CDSPResampler24 : public CDSPResampler
+{
+public:
+ /**
+ * Constructor initializes the 24-bit resampler (including 32-bit floating
+ * point). See the r8b::CDSPResampler class for details.
+ *
+ * @param SrcSampleRate Source signal sample rate.
+ * @param DstSampleRate Destination signal sample rate.
+ * @param aMaxInLen The maximal planned length of the input buffer (in
+ * samples) that will be passed to the resampler.
+ * @param ReqTransBand Required transition band, in percent.
+ */
+
+ CDSPResampler24( const double SrcSampleRate, const double DstSampleRate,
+ const int aMaxInLen, const double ReqTransBand = 2.0 )
+ : CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+ 180.15, fprLinearPhase )
+ {
+ }
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPRESAMPLER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
new file mode 100644
index 00000000..32aa2e5a
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
@@ -0,0 +1,687 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPSincFilterGen.h
+ *
+ * @brief Sinc function-based FIR filter generator class.
+ *
+ * This file includes the CDSPSincFilterGen class implementation that
+ * generates FIR filters.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPSINCFILTERGEN_INCLUDED
+#define R8B_CDSPSINCFILTERGEN_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @brief Sinc function-based FIR filter generator class.
+ *
+ * Structure that holds state used to perform generation of sinc functions of
+ * various types, windowed by the Blackman window by default (but the window
+ * function can be changed if necessary).
+ */
+
+class CDSPSincFilterGen
+{
+public:
+ double Len2; ///< Required half filter kernel's length in samples (can be
+ ///< a fractional value). Final physical kernel length will be
+ ///< provided in the KernelLen variable. Len2 should be >= 2.
+ ///<
+ int KernelLen; ///< Resulting length of the filter kernel, this variable
+ ///< is set after the call to one of the "init" functions.
+ ///<
+ int fl2; ///< Internal "half kernel length" value. This value can be used
+ ///< as filter's latency in samples (taps), this variable is set after
+ ///< the call to one of the "init" functions.
+ ///<
+
+ union
+ {
+ struct
+ {
+ double Freq1; ///< Required corner circular frequency 1 [0; pi].
+ ///< Used only in the generateBand() function.
+ ///<
+ double Freq2; ///< Required corner circular frequency 2 [0; pi].
+ ///< Used only in the generateBand() function. The range
+ ///< [Freq1; Freq2] defines a pass band for the generateBand()
+ ///< function.
+ ///<
+ };
+
+ struct
+ {
+ double FracDelay; ///< Fractional delay in the range [0; 1], used
+ ///< only in the generateFrac() function. Note that the
+ ///< FracDelay parameter is actually inversed. At 0.0 value it
+ ///< produces 1 sample delay (with the latency equal to fl2),
+ ///< at 1.0 value it produces 0 sample delay (with the latency
+ ///< equal to fl2 - 1).
+ ///<
+ };
+ };
+
+ /**
+ * Window function type.
+ */
+
+ enum EWindowFunctionType
+ {
+ wftCosine, ///< Generalized cosine window function. No parameters
+ ///< required. The "Power" parameter is optional.
+ ///<
+ wftKaiser, ///< Kaiser window function. Requires the "Beta" parameter.
+ ///< The "Power" parameter is optional.
+ ///<
+ wftGaussian ///< Gaussian window function. Requires the "Sigma"
+ ///< parameter. The "Power" parameter is optional.
+ ///<
+ };
+
+ typedef double( CDSPSincFilterGen :: *CWindowFunc )(); ///< Window
+ ///< calculation function pointer type.
+ ///<
+
+ /**
+ * Function initializes *this structure for generation of a window
+ * function, odd-sized.
+ *
+ * @param WinType Window function type.
+ * @param Params Window function's parameters. If NULL, the table values
+ * may be used.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function. If "true", the power factor should be specified as the
+ * last value in the Params array. If Params is NULL, the table or default
+ * value of -1.0 (off) will be used.
+ */
+
+ void initWindow( const EWindowFunctionType WinType = wftCosine,
+ const double* const Params = NULL, const bool UsePower = false )
+ {
+ R8BASSERT( Len2 >= 2.0 );
+
+ fl2 = (int) floor( Len2 );
+ KernelLen = fl2 + fl2 + 1;
+
+ setWindow( WinType, Params, UsePower, true );
+ }
+
+ /**
+ * Function initializes *this structure for generation of band-limited
+ * sinc filter kernel. The generateBand() function should be used to
+ * calculate the filter.
+ *
+ * @param WinType Window function type.
+ * @param Params Window function's parameters. If NULL, the table values
+ * may be used.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function. If "true", the power factor should be specified as the
+ * last value in the Params array. If Params is NULL, the table or default
+ * value of -1.0 (off) will be used.
+ */
+
+ void initBand( const EWindowFunctionType WinType = wftCosine,
+ const double* const Params = NULL, const bool UsePower = false )
+ {
+ R8BASSERT( Len2 >= 2.0 );
+
+ fl2 = (int) floor( Len2 );
+ KernelLen = fl2 + fl2 + 1;
+ f1.init( Freq1, 0.0 );
+ f2.init( Freq2, 0.0 );
+
+ setWindow( WinType, Params, UsePower, true );
+ }
+
+ /**
+ * Function initializes *this structure for Hilbert transformation filter
+ * calculation. Freq1 and Freq2 variables are not used.
+ * The generateHilbert() function should be used to calculate the filter.
+ *
+ * @param WinType Window function type.
+ * @param Params Window function's parameters. If NULL, the table values
+ * may be used.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function. If "true", the power factor should be specified as the
+ * last value in the Params array. If Params is NULL, the table or default
+ * value of -1.0 (off) will be used.
+ */
+
+ void initHilbert( const EWindowFunctionType WinType = wftCosine,
+ const double* const Params = NULL, const bool UsePower = false )
+ {
+ R8BASSERT( Len2 >= 2.0 );
+
+ fl2 = (int) floor( Len2 );
+ KernelLen = fl2 + fl2 + 1;
+
+ setWindow( WinType, Params, UsePower, true );
+ }
+
+ /**
+ * Function initializes *this structure for generation of full-bandwidth
+ * fractional delay sinc filter kernel. Freq1 and Freq2 variables are not
+ * used. The generateFrac() function should be used to calculate the
+ * filter.
+ *
+ * @param WinType Window function type.
+ * @param Params Window function's parameters. If NULL, the table values
+ * may be used.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function. If "true", the power factor should be specified as the
+ * last value in the Params array. If Params is NULL, the table or default
+ * value of -1.0 (off) will be used.
+ */
+
+ void initFrac( const EWindowFunctionType WinType = wftCosine,
+ const double* const Params = NULL, const bool UsePower = false )
+ {
+ R8BASSERT( Len2 >= 2.0 );
+
+ fl2 = (int) ceil( Len2 );
+ KernelLen = fl2 + fl2;
+
+ setWindow( WinType, Params, UsePower, false, FracDelay );
+ }
+
+ /**
+ * @return The next "Hann" window function coefficient.
+ */
+
+ double calcWindowHann()
+ {
+ return( 0.5 + 0.5 * w1.generate() );
+ }
+
+ /**
+ * @return The next "Hamming" window function coefficient.
+ */
+
+ double calcWindowHamming()
+ {
+ return( 0.54 + 0.46 * w1.generate() );
+ }
+
+ /**
+ * @return The next "Blackman" window function coefficient.
+ */
+
+ double calcWindowBlackman()
+ {
+ return( 0.42 + 0.5 * w1.generate() + 0.08 * w2.generate() );
+ }
+
+ /**
+ * @return The next "Nuttall" window function coefficient.
+ */
+
+ double calcWindowNuttall()
+ {
+ return( 0.355768 + 0.487396 * w1.generate() +
+ 0.144232 * w2.generate() + 0.012604 * w3.generate() );
+ }
+
+ /**
+ * @return The next "Blackman-Nuttall" window function coefficient.
+ */
+
+ double calcWindowBlackmanNuttall()
+ {
+ return( 0.3635819 + 0.4891775 * w1.generate() +
+ 0.1365995 * w2.generate() + 0.0106411 * w3.generate() );
+ }
+
+ /**
+ * @return The next "Kaiser" window function coefficient.
+ */
+
+ double calcWindowKaiser()
+ {
+ const double n = 1.0 - sqr( wn / Len2 + KaiserLen2Frac );
+ wn++;
+
+ if( n < 0.0 )
+ {
+ return( 0.0 );
+ }
+
+ return( besselI0( KaiserBeta * sqrt( n )) / KaiserDiv );
+ }
+
+ /**
+ * @return The next "Gaussian" window function coefficient.
+ */
+
+ double calcWindowGaussian()
+ {
+ const double f = exp( -0.5 * sqr( wn / GaussianSigma +
+ GaussianSigmaFrac ));
+
+ wn++;
+
+ return( f );
+ }
+
+ /**
+ * Function calculates window function only.
+ *
+ * @param[out] op Output buffer, length = KernelLen.
+ * @param wfunc Window calculation function to use.
+ */
+
+ void generateWindow( double* op,
+ CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+ {
+ op += fl2;
+ double* op2 = op;
+
+ int l = fl2;
+
+ if( Power < 0.0 )
+ {
+ *op = ( *this.*wfunc )();
+
+ while( l > 0 )
+ {
+ const double v = ( *this.*wfunc )();
+
+ op++;
+ op2--;
+ *op = v;
+ *op2 = v;
+ l--;
+ }
+ }
+ else
+ {
+ *op = pows(( *this.*wfunc )(), Power );
+
+ while( l > 0 )
+ {
+ const double v = pows(( *this.*wfunc )(), Power );
+
+ op++;
+ op2--;
+ *op = v;
+ *op2 = v;
+ l--;
+ }
+ }
+ }
+
+ /**
+ * Function calculates band-limited windowed sinc function-based filter
+ * kernel.
+ *
+ * @param[out] op Output buffer, length = KernelLen.
+ * @param wfunc Window calculation function to use.
+ */
+
+ void generateBand( double* op,
+ CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+ {
+ op += fl2;
+ double* op2 = op;
+ f1.generate();
+ f2.generate();
+ int t = 1;
+
+ if( Power < 0.0 )
+ {
+ *op = ( Freq2 - Freq1 ) * ( *this.*wfunc )() / R8B_PI;
+
+ while( t <= fl2 )
+ {
+ const double v = ( f2.generate() - f1.generate() ) *
+ ( *this.*wfunc )() / ( t * R8B_PI );
+
+ op++;
+ op2--;
+ *op = v;
+ *op2 = v;
+ t++;
+ }
+ }
+ else
+ {
+ *op = ( Freq2 - Freq1 ) * pows(( *this.*wfunc )(), Power ) /
+ R8B_PI;
+
+ while( t <= fl2 )
+ {
+ const double v = ( f2.generate() - f1.generate() ) *
+ pows(( *this.*wfunc )(), Power ) / ( t * R8B_PI );
+
+ op++;
+ op2--;
+ *op = v;
+ *op2 = v;
+ t++;
+ }
+ }
+ }
+
+ /**
+ * Function calculates windowed Hilbert transformer filter kernel.
+ *
+ * @param[out] op Output buffer, length = KernelLen.
+ * @param wfunc Window calculation function to use.
+ */
+
+ void generateHilbert( double* op,
+ CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+ {
+ static const double fvalues[ 2 ] = { 0.0, 2.0 / R8B_PI };
+ op += fl2;
+ double* op2 = op;
+
+ ( *this.*wfunc )();
+ *op = 0.0;
+
+ int t = 1;
+
+ if( Power < 0.0 )
+ {
+ while( t <= fl2 )
+ {
+ const double v = fvalues[ t & 1 ] * ( *this.*wfunc )() / t;
+ op++;
+ op2--;
+ *op = v;
+ *op2 = -v;
+ t++;
+ }
+ }
+ else
+ {
+ while( t <= fl2 )
+ {
+ const double v = fvalues[ t & 1 ] *
+ pows( ( *this.*wfunc )(), Power ) / t;
+
+ op++;
+ op2--;
+ *op = v;
+ *op2 = -v;
+ t++;
+ }
+ }
+ }
+
+ /**
+ * Function calculates windowed fractional delay filter kernel.
+ *
+ * @param[out] op Output buffer, length = KernelLen.
+ * @param wfunc Window calculation function to use.
+ * @param opinc Output buffer increment, in "op" elements.
+ */
+
+ void generateFrac( double* op,
+ CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman,
+ const int opinc = 1 )
+ {
+ R8BASSERT( opinc != 0 );
+
+ double f[ 2 ];
+ f[ 0 ] = sin( FracDelay * R8B_PI ) / R8B_PI;
+ f[ 1 ] = -f[ 0 ];
+
+ int t = -fl2;
+
+ if( t + FracDelay < -Len2 )
+ {
+ ( *this.*wfunc )();
+ *op = 0.0;
+ op += opinc;
+ t++;
+ }
+
+ int IsZeroX = ( fabs( FracDelay - 1.0 ) < 0x1p-42 );
+ int mt = 0 - IsZeroX;
+ IsZeroX = ( IsZeroX || fabs( FracDelay ) < 0x1p-42 );
+
+ if( Power < 0.0 )
+ {
+ while( t < mt )
+ {
+ *op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
+ op += opinc;
+ t++;
+ }
+
+ if( IsZeroX ) // t+FracDelay==0
+ {
+ *op = ( *this.*wfunc )();
+ }
+ else
+ {
+ *op = f[ t & 1 ] * ( *this.*wfunc )() / FracDelay; // t==0
+ }
+
+ mt = fl2 - 2;
+
+ while( t < mt )
+ {
+ op += opinc;
+ t++;
+ *op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
+ }
+
+ op += opinc;
+ t++;
+ const double ut = t + FracDelay;
+ *op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * ( *this.*wfunc )() / ut );
+ }
+ else
+ {
+ while( t < mt )
+ {
+ *op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+ ( t + FracDelay );
+
+ op += opinc;
+ t++;
+ }
+
+ if( IsZeroX ) // t+FracDelay==0
+ {
+ *op = pows( ( *this.*wfunc )(), Power );
+ }
+ else
+ {
+ *op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+ FracDelay; // t==0
+ }
+
+ mt = fl2 - 2;
+
+ while( t < mt )
+ {
+ op += opinc;
+ t++;
+ *op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+ ( t + FracDelay );
+ }
+
+ op += opinc;
+ t++;
+ const double ut = t + FracDelay;
+ *op = ( ut > Len2 ? 0.0 : f[ t & 1 ] *
+ pows( ( *this.*wfunc )(), Power ) / ut );
+ }
+ }
+
+private:
+ double Power; ///< The power factor used to raise the window function.
+ ///< Equals a negative value if the power factor should not be used.
+ ///<
+ CSineGen f1; ///< Sine function 1. Used in the generateBand() function.
+ ///<
+ CSineGen f2; ///< Sine function 2. Used in the generateBand() function.
+ ///<
+ int wn; ///< Window function integer position. 0 - center of the window
+ ///< function. This variable may not be used by some window functions.
+ ///<
+ CSineGen w1; ///< Cosine wave 1 for window function.
+ ///<
+ CSineGen w2; ///< Cosine wave 2 for window function.
+ ///<
+ CSineGen w3; ///< Cosine wave 3 for window function.
+ ///<
+
+ union
+ {
+ struct
+ {
+ double KaiserBeta; ///< Kaiser window function's "Beta"
+ ///< coefficient.
+ ///<
+ double KaiserDiv; ///< Kaiser window function's divisor.
+ ///<
+ double KaiserLen2Frac; ///< Equals FracDelay / Len2.
+ ///<
+ };
+
+ struct
+ {
+ double GaussianSigma; ///< Gaussian window function's "Sigma"
+ ///< coefficient.
+ ///<
+ double GaussianSigmaFrac; ///< Equals FracDelay / GaussianSigma.
+ ///<
+ };
+ };
+
+ /**
+ * Function initializes Kaiser window function calculation. The FracDelay
+ * variable should be initialized when using this window function.
+ *
+ * @param Params Function parameters. If NULL, the default values will be
+ * used. If not NULL, the first parameter should specify the "Beta" value.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function.
+ * @param IsCentered "True" if centered window should be used. This
+ * parameter usually equals to "false" for fractional delay filters only.
+ */
+
+ void setWindowKaiser( const double* Params, const bool UsePower,
+ const bool IsCentered )
+ {
+ wn = ( IsCentered ? 0 : -fl2 );
+
+ if( Params == NULL )
+ {
+ KaiserBeta = 9.5945013206755156;
+ Power = ( UsePower ? 1.9718457932433306 : -1.0 );
+ }
+ else
+ {
+ KaiserBeta = clampr( Params[ 0 ], 1.0, 350.0 );
+ Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
+ }
+
+ KaiserDiv = besselI0( KaiserBeta );
+ KaiserLen2Frac = FracDelay / Len2;
+ }
+
+ /**
+ * Function initializes Gaussian window function calculation. The FracDelay
+ * variable should be initialized when using this window function.
+ *
+ * @param Params Function parameters. If NULL, the table values will be
+ * used. If not NULL, the first parameter should specify the "Sigma"
+ * value.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function.
+ * @param IsCentered "True" if centered window should be used. This
+ * parameter usually equals to "false" for fractional delay filters only.
+ */
+
+ void setWindowGaussian( const double* Params, const bool UsePower,
+ const bool IsCentered )
+ {
+ wn = ( IsCentered ? 0 : -fl2 );
+
+ if( Params == NULL )
+ {
+ GaussianSigma = 1.0;
+ Power = -1.0;
+ }
+ else
+ {
+ GaussianSigma = clampr( fabs( Params[ 0 ]), 1e-1, 100.0 );
+ Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
+ }
+
+ GaussianSigma *= Len2;
+ GaussianSigmaFrac = FracDelay / GaussianSigma;
+ }
+
+ /**
+ * Function initializes calculation of window function of the specified
+ * type.
+ *
+ * @param WinType Window function type.
+ * @param Params Window function's parameters. If NULL, the table values
+ * may be used.
+ * @param UsePower "True" if the power factor should be used to raise the
+ * window function. If "true", the power factor should be specified as the
+ * last value in the Params array. If Params is NULL, the table or default
+ * value of -1.0 (off) will be used.
+ * @param IsCentered "True" if centered window should be used. This
+ * parameter usually equals to "false" for fractional delay filters only.
+ * @param UseFracDelay Fractional delay to use.
+ */
+
+ void setWindow( const EWindowFunctionType WinType,
+ const double* const Params, const bool UsePower,
+ const bool IsCentered, const double UseFracDelay = 0.0 )
+ {
+ FracDelay = UseFracDelay;
+
+ if( WinType == wftCosine )
+ {
+ if( IsCentered )
+ {
+ w1.init( R8B_PI / Len2, R8B_PId2 );
+ w2.init( R8B_2PI / Len2, R8B_PId2 );
+ w3.init( R8B_3PI / Len2, R8B_PId2 );
+ }
+ else
+ {
+ const double step1 = R8B_PI / Len2;
+ w1.init( step1, R8B_PId2 - step1 * fl2 + step1 * FracDelay );
+
+ const double step2 = R8B_2PI / Len2;
+ w2.init( step2, R8B_PId2 - step2 * fl2 + step2 * FracDelay );
+
+ const double step3 = R8B_3PI / Len2;
+ w3.init( step3, R8B_PId2 - step3 * fl2 + step3 * FracDelay );
+ }
+
+ Power = ( UsePower && Params != NULL ? Params[ 0 ] : -1.0 );
+ }
+ else
+ if( WinType == wftKaiser )
+ {
+ setWindowKaiser( Params, UsePower, IsCentered );
+ }
+ else
+ if( WinType == wftGaussian )
+ {
+ setWindowGaussian( Params, UsePower, IsCentered );
+ }
+ }
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPSINCFILTERGEN_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
new file mode 100644
index 00000000..669a7839
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2013-2022 Aleksey Vaneev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
new file mode 100644
index 00000000..2fcb3b9d
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
@@ -0,0 +1,6 @@
+r8brain-free resampling library from https://github.com/avaneev/r8brain-free-src
+version 5.6.
+The example program, DLL and "other" folder, PFFFT sources and logo file have been removed.
+No further local changes have been made.
+For building, premake is used to generate Visual Studio project files.
+See ../build/premake/ for details.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
new file mode 100644
index 00000000..af0412c1
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
@@ -0,0 +1,408 @@
+# r8brain-free-src - High-Quality Resampler #
+
+## Introduction ##
+
+Open source (under the MIT license) high-quality professional audio sample
+rate converter (SRC) / resampler C++ library. Features routines for SRC,
+both up- and downsampling, to/from any sample rate, including non-integer
+sample rates: it can be also used for conversion to/from SACD/DSD sample
+rates, and even go beyond that. SRC routines were implemented in a
+multi-platform C++ code, and have a high level of optimality. Also suitable
+for fast general-purpose 1D time-series resampling / interpolation (with
+relaxed filter parameters).
+
+The structure of this library's objects is such that they can be frequently
+created and destroyed in large applications with a minimal performance impact
+due to a high level of reusability of its most "initialization-expensive"
+objects: the fast Fourier transform and FIR filter objects.
+
+The SRC algorithm at first produces 2X oversampled (relative to the source
+sample rate, or the destination sample rate if the downsampling is performed)
+signal, then performs interpolation using a bank of short (8 to 30 taps,
+depending on the required precision) polynomial-interpolated sinc
+function-based fractional delay filters. This puts the algorithm into the
+league of the fastest among the most precise SRC algorithms. The more precise
+alternative being only the whole number-factored SRC, which can be slower.
+
+P.S. Please credit the creator of this library in your documentation in the
+following way: "Sample rate converter designed by Aleksey Vaneev of Voxengo".
+
+## Requirements ##
+
+C++ compiler and system with the "double" floating point type (53-bit
+mantissa) support. No explicit code for the "float" type is present in this
+library, because as practice has shown the "float"-based code performs
+considerably slower on a modern processor, at least in this library. This
+library does not have dependencies beside the standard C library, the
+"windows.h" on Windows and the "pthread.h" on macOS and Linux.
+
+## Links ##
+
+* [Documentation](https://www.voxengo.com/public/r8brain-free-src/Documentation/)
+* [Discussion](https://www.kvraudio.com/forum/viewtopic.php?t=389711)
+
+## Usage Information ##
+
+The sample rate converter (resampler) is represented by the
+**r8b::CDSPResampler** class, which is a single front-end class for the
+whole library. You do not basically need to use nor understand any other
+classes beside this class. Several derived classes that have varying levels
+of precision are also available (for full-resolution 16-bit and 24-bit
+resampling).
+
+The code of the library resides in the "r8b" C++ namespace, effectively
+isolating it from all other code. The code is thread-safe. A separate
+resampler object should be created for each audio channel or stream being
+processed concurrently.
+
+Note that you will need to compile the "r8bbase.cpp" source file and include
+the resulting object file into your application build. This source file
+includes definitions of several global static objects used by the library.
+You may also need to include to your project: the "Kernel32" library (on
+Windows) and the "pthread" library on macOS and Linux.
+
+The library is able to process signal of any scale and loudness: it is not
+limited to just a "usual" -1.0 to 1.0 range.
+
+By defining the `R8B_IPP` configuration macro it is possible to enable Intel
+IPP back-end for FFT functions, instead of the default Ooura FFT. IPP FFT
+makes sample rate conversion faster by 23% on average.
+
+ #define R8B_IPP 1
+
+If a larger initial processing delay and a very minor sample timing error are
+not an issue, for the most efficiency you can define these macros at
+the beginning of the `r8bconf.h` file, or during compilation:
+
+ #define R8B_IPP 1
+ #define R8B_FASTTIMING 1
+ #define R8B_EXTFFT 1
+
+If you do not have access to the Intel IPP then you may consider enabling the
+PFFFT which is only slightly slower than Intel IPP FFT in performance. There
+are two macros available: `R8B_PFFFT` and `R8B_PFFFT_DOUBLE`. The first macro
+enables PFFFT that works in single-precision resolution, thus limiting the
+overall resampler's precision to 24-bit sample rate conversions (for
+mission-critical professional audio applications, using the `R8B_PFFFT` macro
+is not recommended as its peak error is quite large). The second macro
+enables PFFFT implementation that works in double-precision resolution, making
+use of SSE2, AVX, and NEON intrinsics, yielding precision that is equal to
+both Intel IPP and Ooura FFT implementations.
+
+To use the PFFFT, define the `R8B_PFFFT` or `R8B_PFFFT_DOUBLE` macro, compile
+and include the supplied `pffft.cpp` or `pffft_double/pffft_double.c` file to
+your project build.
+
+ #define R8B_PFFFT 1
+
+ or
+
+ #define R8B_PFFFT_DOUBLE 1
+
+The code of this library was commented in the [Doxygen](http://www.doxygen.org/)
+style. To generate the documentation locally you may run the
+`doxygen ./other/r8bdoxy.txt` command from the library's folder.
+
+Preliminary tests show that the r8b::CDSPResampler24 resampler class achieves
+`31.8*n_cores` Mrops (`44*n_cores` for Intel IPP FFT) when converting 1
+channel of 24-bit audio from 44100 to 96000 sample rate (2% transition band),
+on a Ryzen 3700X processor-based 64-bit system. This approximately translates
+to a real-time resampling of `720*n_cores` (`1000*n_cores`) audio streams, at
+100% CPU load. Speed performance when converting to other sample rates may
+vary greatly. When comparing performance of this resampler library to another
+library make sure that the competing library is also tuned to produce a fully
+linear-phase response, has similar stop-band characteristics and similar
+sample timing precision.
+
+## Dynamic Link Library ##
+
+The functions of this SRC library are also accessible in simplified form via
+the DLL file on Windows, requiring a processor with SSE2 support (Win64
+version includes AVX2 auto-dispatch code). Delphi Pascal interface unit file
+for the DLL file is available. DLL and C LIB files are distributed in the DLL
+folder on the project's homepage. On non-Windows systems it is preferrable
+to use the C++ library directly. Note that the DLL was compiled with the
+Intel IPP enabled.
+
+## Real-Time Applications ##
+
+The resampler class of this library was designed as an asynchronous processor:
+it may produce any number of output samples, depending on the input sample
+data length and the resampling parameters. The resampler must be fed with the
+input sample data until enough output sample data were produced, with any
+excess output samples used before feeding the resampler with more input data.
+A "relief" factor here is that the resampler removes the initial processing
+latency automatically, and that after initial moments of processing the output
+becomes steady, with only minor output sample data length fluctuations.
+
+So, while for an off-line resampling a "push" method can be used,
+demonstrated in the `example.cpp` file, for a real-time resampling a "pull"
+method should be used which calls the resampling process until the output
+buffer is filled.
+
+## Notes ##
+
+When using the **r8b::CDSPResampler** class directly, you may select the
+transition band/steepness of the low-pass (reconstruction) filter, expressed
+as a percentage of the full spectral bandwidth of the input signal (or the
+output signal if the downsampling is performed), and the desired stop-band
+attenuation in decibel.
+
+The transition band is specified as the normalized spectral space of the input
+signal (or the output signal if the downsampling is performed) between the
+low-pass filter's -3 dB point and the Nyquist frequency, and ranges from 0.5%
+to 45%. Stop-band attenuation can be specified in the range from 49 to 218
+decibel. Both the transition band and stop-band attenuation affect
+resampler's overall speed performance and initial output delay. For your
+information, transition frequency range spans 175% of the specified transition
+band, which means that for 2% transition band, frequency response below
+0.965\*Nyquist is linear.
+
+This SRC library also implements a much faster "power of 2" resampling (e.g.
+2X, 4X, 8X, 16X, 3X, 3\*2X, 3\*4X, 3\*8X, etc. upsampling and downsampling),
+which is engaged automatically if the resampling parameters permit.
+
+This library was tested for compatibility with [GNU C++](https://gcc.gnu.org/),
+[Microsoft Visual C++](https://visualstudio.microsoft.com/),
+[LLVM](https://llvm.org/) and [Intel C++](https://software.intel.com/en-us/c-compilers)
+compilers, on 32- and 64-bit Windows, macOS, and CentOS Linux.
+
+Most code is "inline", without the need to compile many source files. The
+memory footprint is quite modest.
+
+## Acknowledgements ##
+
+r8brain-free-src is bundled with the following code:
+
+* FFT routines Copyright (c) 1996-2001 Takuya OOURA.
+[Homepage](http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
+* PFFFT Copyright (c) 2013 Julien Pommier.
+[Homepage](https://bitbucket.org/jpommier/pffft)
+* PFFFT DOUBLE Copyright (c) 2020 Hayati Ayguen, Dario Mambro.
+[Homepage](https://github.com/unevens/pffft)
+
+## Users ##
+
+This library is used by:
+
+* [REAPER](https://reaper.fm/)
+* [AUDIRVANA](https://audirvana.com/)
+* [Red Dead Redemption 2](https://www.rockstargames.com/reddeadredemption2/credits)
+* [Mini Piano Lite](https://play.google.com/store/apps/details?id=umito.android.minipiano)
+* [OpenMPT](https://openmpt.org/)
+* [Boogex Guitar Amp audio plugin](https://www.voxengo.com/product/boogex/)
+* [r8brain free sample rate converter](https://www.voxengo.com/product/r8brain/)
+* [Voice Aloud Reader](https://play.google.com/store/apps/details?id=com.hyperionics.avarLic)
+* [Zynewave Podium](https://zynewave.com/podium/)
+* [Phonometrica](http://www.phonometrica-ling.org/index.html)
+* [Ripcord](https://cancel.fm/ripcord/)
+* [TensorVox](https://github.com/ZDisket/TensorVox)
+* [Curvessor](https://github.com/unevens/Curvessor)
+
+Please send me a note via aleksey.vaneev@gmail.com and I will include a link
+to your software product to this list of users. This list is important in
+maintaining confidence in this library among the interested parties. The
+inclusion into this list is not mandatory.
+
+## Change Log ##
+
+Version 5.6:
+
+* Added SSE and NEON implementations to `CDSPHBUpsampler` yielding 15%
+performance improvement of power-of-2 upsampling.
+* Added SSE and NEON implementations to the `CDSPRealFFT::multiplyBlocksZP`
+function, for 2-3% performance improvement.
+* Added intermediate interpolator's transition band limitation, for logical
+hardness (not practically needed).
+* Added the `aDoConsumeLatency` parameter to `CDSPHBUpsampler` constructor,
+for "inline" DSP uses of the class.
+* Made various minor changes across the codebase.
+
+Version 5.5:
+
+* Hardened positional logic of fractional filter calculation, removed
+redundant multiplications.
+* Removed unnecessary function templating from the `CDSPSincFilterGen` class.
+* Added the `__ARM_NEON` macro to NEON availability detection.
+
+Version 5.4:
+
+* Added compiler specializations to previously optimized inner loops.
+"Shuffled" SIMD interpolation code is not efficient on Apple M1. Intel C++
+Compiler vectorizes "whole stepping" interpolation as good as a
+manually-written SSE.
+* Reorganized SIMD instructions for a slightly better performance.
+* Changed internal buffer sizes of half-band resamplers (1-2% performance
+boost).
+* Fixed compiler warnings in PFFFT code.
+* Added several asserts to the code.
+
+Version 5.3:
+
+* Optimized inner loops of the fractional interpolator, added SSE2 and NEON
+intrinsics, resulting in a measurable (8-25%) performance gain.
+* Optimized filter calculation functions: changed some divisions by a constant
+to multiplications.
+* Renamed M_PI macros to R8B_PI, to avoid macro collisions.
+* Removed redundant code and macros.
+
+Version 5.2:
+
+* Modified `PFFFT` and `PFFFT DOUBLE` conditional pre-processor directives to
+always enable NEON on `aarch64`/`arm64` (this includes code built for
+Apple M1).
+
+Version 5.1:
+
+* Changed alignment in the `CFixedBuffer` class to 64 bytes. This improves AVX
+performance of the `PFFFT DOUBLE` implementation by a few percent.
+* Removed redundant files from the `pffft_double` folder, integrated the
+`pffft_common.c` file into the `pffft_double.c` file.
+
+Version 5.0:
+
+* Removed a long-outdated macros from the `r8bconf.h` file.
+* Changed a conditional pre-processor directive in the `pf_sse2_double.h` file
+as per PFFFT DOUBLE author's suggestion, to allow SSE2 intrinsics in most
+compilers.
+* Fixed "License.txt" misnaming in the source files to "LICENSE".
+
+Version 4.10:
+
+* Added the `PFFFT DOUBLE` implementation support. Now available via the
+`R8B_PFFFT_DOUBLE` definition macro.
+
+Version 4.9:
+
+* Reoptimized half-band and fractional interpolation filters with a stricter
+frequency response linearity constraints. This did not impact the average
+speed performance.
+
+Version 4.8:
+
+* Added a limit to the intermediate filter's transition band, to keep the
+latency under control at any resampling ratio.
+
+Version 4.7:
+
+* Added `#ifndef _USE_MATH_DEFINES` to `pffft.cpp`.
+* Moved `#include "pffft.h"` to `CDSPRealFFT.h`.
+
+Version 4.6:
+
+* Removed the `MaxInLen` parameter from the `oneshot()` function.
+* Decreased intermediate low-pass filter's transition band slightly, for more
+stable quality.
+
+Version 4.5:
+
+* Fixed VS2017 compiler warnings.
+
+Version 4.4:
+
+* Fixed the "Declaration hides a member" Intel C++ compiler warnings.
+
+Version 4.3:
+
+* Added //$ markers for internal debugging purposes.
+
+Version 4.2:
+
+* Backed-off max transition band to 45 and MinAtten to 49.
+* Implemented Wave64 and AIFF file input in the `r8bfreesrc` bench tool. The
+tool is now compiled with the `R8B_IPP 1` and `R8B_EXTFFT 1` macros to
+demonstrate the maximal achievable performance.
+
+Version 4.1:
+
+* Updated allowed ReqAtten range to 52-218, ReqTransBand 0.5-56. It is
+possible to specify filter parameters slightly beyond these values, but the
+resulting filter will be slightly out of specification as well.
+* Optimized static filter banks allocation.
+
+Version 4.0:
+
+* A major overhaul of interpolation classes: now templated parameters are not
+used, all required parameters are calculated at runtime. Static filter bank
+object is not used, it is created when necessary, and then cached.
+* Implemented one-third interpolation filters, however, this did not
+measurably increase resampler's speed.
+
+Version 3.7:
+
+* Used ippsMul_64f_I() in the CDSPRealFFT::multiplyBlockZ() function for a
+minor conversion speed increase in Intel IPP mode.
+
+Version 3.6:
+
+* Added memory alignment to allocated buffers which boosts performance by 1.5%
+when Intel IPP FFT is in use.
+* Implemented PFFFT support.
+
+Version 3.5:
+
+* Improved resampling speed very slightly.
+* Updated the `r8bfreesrc` benchmark tool to support RF64 WAV files.
+
+Version 3.4:
+
+* Added a more efficient half-band filters for >= 256 resampling ratios.
+
+Version 3.3:
+
+* Made minor fix to downsampling for some use cases of CDSPBlockConvolver,
+did not affect resampler.
+* Converted CDSPHBUpsampler and CDSPHBDownsampler's inner functions to
+static functions, which boosted high-ratio resampling performance measurably.
+
+Version 3.2:
+
+* Minor fix to the latency consumption mechanism.
+
+Version 3.1:
+
+* Reoptimized fractional delay filter's windowing function.
+
+Version 3.0:
+
+* Implemented a new variant of the getInLenBeforeOutStart() function.
+* Reimplemented oneshot() function to support `float` buffer types.
+* Considerably improved downsampling performance at high resampling ratios.
+* Implemented intermediate interpolation technique which boosted upsampling
+performance for most resampling ratios considerably.
+* Removed the ConvCount constant - now resampler supports virtually any
+resampling ratios.
+* Removed the UsePower2 parameter from the resampler constructor.
+* Now resampler's process() function always returns pointer to the internal
+buffer, input buffer is returned only if no resampling happens.
+* Resampler's getMaxOutLen() function can now be used to obtain the maximal
+output length that can be produced by the resampler in a single call.
+* Added a more efficient "one-third" filters to half-band upsampler and
+downsampler.
+
+Version 2.1:
+
+* Optimized 2X half-band downsampler.
+
+Version 2.0:
+
+* Optimized power-of-2 upsampling.
+
+Version 1.9:
+
+* Optimized half-band downsampling filter.
+* Implemented whole-number stepping resampling.
+* Added `R8B_EXTFFT` configuration option.
+* Fixed initial sub-sample offseting on downsampling.
+
+Version 1.8:
+
+* Added `R8B_FASTTIMING` configuration option.
+
+Version 1.7:
+
+* Improved sample timing precision.
+* Increased CDSPResampler :: ConvCountMax to 28 to support a lot wider
+resampling ratios.
+* Added `bench` tools.
+* Removed getInLenBeforeOutStart() due to incorrect calculation.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
new file mode 100644
index 00000000..f6f2bc2a
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
@@ -0,0 +1,1316 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file fft4g.h
+ *
+ * @brief Wrapper class for Takuya OOURA's FFT functions.
+ *
+ * Functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ *
+ * Modified and used with permission granted by the license.
+ *
+ * Here, the original "fft4g.c" file was wrapped into the "ooura_fft" class.
+ */
+
+#ifndef R8B_FFT4G_INCLUDED
+#define R8B_FFT4G_INCLUDED
+
+/*
+Fast Fourier/Cosine/Sine Transform
+ dimension :one
+ data length :power of 2
+ decimation :frequency
+ radix :4, 2
+ data :inplace
+ table :use
+functions
+ cdft: Complex Discrete Fourier Transform
+ rdft: Real Discrete Fourier Transform
+ ddct: Discrete Cosine Transform
+ ddst: Discrete Sine Transform
+ dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+ dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+ void cdft(int, int, FPType *, int *, FPType *);
+ void rdft(int, int, FPType *, int *, FPType *);
+ void ddct(int, int, FPType *, int *, FPType *);
+ void ddst(int, int, FPType *, int *, FPType *);
+ void dfct(int, FPType *, FPType *, int *, FPType *);
+ void dfst(int, FPType *, FPType *, int *, FPType *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+ [definition]
+ <case1>
+ X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+ <case2>
+ X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+ (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ cdft(2*n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ cdft(2*n, -1, a, ip, w);
+ [parameters]
+ 2*n :data length (int)
+ n >= 1, n = power of 2
+ a[0...2*n-1] :input/output data (FPType *)
+ input data
+ a[2*j] = Re(x[j]),
+ a[2*j+1] = Im(x[j]), 0<=j<n
+ output data
+ a[2*k] = Re(X[k]),
+ a[2*k+1] = Im(X[k]), 0<=k<n
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n/2-1] :cos/sin table (FPType *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ cdft(2*n, -1, a, ip, w);
+ is
+ cdft(2*n, 1, a, ip, w);
+ for (j = 0; j <= 2 * n - 1; j++) {
+ a[j] *= 1.0 / n;
+ }
+ .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+ [definition]
+ <case1> RDFT
+ R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+ I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+ <case2> IRDFT (excluding scale)
+ a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+ sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+ sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ rdft(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ rdft(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (FPType *)
+ <case1>
+ output data
+ a[2*k] = R[k], 0<=k<n/2
+ a[2*k+1] = I[k], 0<k<n/2
+ a[1] = R[n/2]
+ <case2>
+ input data
+ a[2*j] = R[j], 0<=j<n/2
+ a[2*j+1] = I[j], 0<j<n/2
+ a[1] = R[n/2]
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n/2-1] :cos/sin table (FPType *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ rdft(n, 1, a, ip, w);
+ is
+ rdft(n, -1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+ [definition]
+ <case1> IDCT (excluding scale)
+ C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+ <case2> DCT
+ C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ ddct(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ ddct(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (FPType *)
+ output data
+ a[k] = C[k], 0<=k<n
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/4-1] :cos/sin table (FPType *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ ddct(n, -1, a, ip, w);
+ is
+ a[0] *= 0.5;
+ ddct(n, 1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+ [definition]
+ <case1> IDST (excluding scale)
+ S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+ <case2> DST
+ S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ ddst(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ ddst(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (FPType *)
+ <case1>
+ input data
+ a[j] = A[j], 0<j<n
+ a[0] = A[n]
+ output data
+ a[k] = S[k], 0<=k<n
+ <case2>
+ output data
+ a[k] = S[k], 0<k<n
+ a[0] = S[n]
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/4-1] :cos/sin table (FPType *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ ddst(n, -1, a, ip, w);
+ is
+ a[0] *= 0.5;
+ ddst(n, 1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+ [definition]
+ C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+ [usage]
+ ip[0] = 0; // first time only
+ dfct(n, a, t, ip, w);
+ [parameters]
+ n :data length - 1 (int)
+ n >= 2, n = power of 2
+ a[0...n] :input/output data (FPType *)
+ output data
+ a[k] = C[k], 0<=k<=n
+ t[0...n/2] :work area (FPType *)
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/4)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/8-1] :cos/sin table (FPType *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ a[0] *= 0.5;
+ a[n] *= 0.5;
+ dfct(n, a, t, ip, w);
+ is
+ a[0] *= 0.5;
+ a[n] *= 0.5;
+ dfct(n, a, t, ip, w);
+ for (j = 0; j <= n; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+ [definition]
+ S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+ [usage]
+ ip[0] = 0; // first time only
+ dfst(n, a, t, ip, w);
+ [parameters]
+ n :data length + 1 (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (FPType *)
+ output data
+ a[k] = S[k], 0<k<n
+ (a[0] is used for work area)
+ t[0...n/2-1] :work area (FPType *)
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/4)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/8-1] :cos/sin table (FPType *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ dfst(n, a, t, ip, w);
+ is
+ dfst(n, a, t, ip, w);
+ for (j = 1; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+Appendix :
+ The cos/sin table is recalculated when the larger table required.
+ w[] and ip[] are compatible with all routines.
+*/
+
+namespace r8b {
+
+/**
+ * @brief A wrapper class around Takuya OOURA's FFT functions.
+ *
+ * A wrapper class around fft4g.c file's FFT functions by Takuya OOURA.
+ * Provides static private functions for use by the CDSPRealFFT class.
+ */
+
+class ooura_fft
+{
+friend class CDSPRealFFT;
+
+private:
+typedef double FPType;
+
+static void cdft(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+ if (n > (ip[0] << 2)) {
+ makewt(n >> 2, ip, w);
+ }
+ if (n > 4) {
+ if (isgn >= 0) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ } else {
+ bitrv2conj(n, ip + 2, a);
+ cftbsub(n, a, w);
+ }
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+}
+
+static void rdft(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+ int nw, nc;
+ double xi;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 2)) {
+ nc = n >> 2;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn >= 0) {
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ xi = a[0] - a[1];
+ a[0] += a[1];
+ a[1] = xi;
+ } else {
+ a[1] = 0.5 * (a[0] - a[1]);
+ a[0] -= a[1];
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ bitrv2(n, ip + 2, a);
+ cftbsub(n, a, w);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ }
+}
+
+static void ddct(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+ int j, nw, nc;
+ double xr;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > nc) {
+ nc = n;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn < 0) {
+ xr = a[n - 1];
+ for (j = n - 2; j >= 2; j -= 2) {
+ a[j + 1] = a[j] - a[j - 1];
+ a[j] += a[j - 1];
+ }
+ a[1] = a[0] - xr;
+ a[0] += xr;
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ bitrv2(n, ip + 2, a);
+ cftbsub(n, a, w);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ }
+ dctsub(n, a, nc, w + nw);
+ if (isgn >= 0) {
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ xr = a[0] - a[1];
+ a[0] += a[1];
+ for (j = 2; j < n; j += 2) {
+ a[j - 1] = a[j] - a[j + 1];
+ a[j] += a[j + 1];
+ }
+ a[n - 1] = xr;
+ }
+}
+
+static void ddst(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+ int j, nw, nc;
+ double xr;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > nc) {
+ nc = n;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn < 0) {
+ xr = a[n - 1];
+ for (j = n - 2; j >= 2; j -= 2) {
+ a[j + 1] = -a[j] - a[j - 1];
+ a[j] -= a[j - 1];
+ }
+ a[1] = a[0] + xr;
+ a[0] -= xr;
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ bitrv2(n, ip + 2, a);
+ cftbsub(n, a, w);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ }
+ dstsub(n, a, nc, w + nw);
+ if (isgn >= 0) {
+ if (n > 4) {
+ bitrv2(n, ip + 2, a);
+ cftfsub(n, a, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, w);
+ }
+ xr = a[0] - a[1];
+ a[0] += a[1];
+ for (j = 2; j < n; j += 2) {
+ a[j - 1] = -a[j] - a[j + 1];
+ a[j] -= a[j + 1];
+ }
+ a[n - 1] = -xr;
+ }
+}
+
+static void dfct(int n, FPType *a, FPType *t, int *ip, FPType *w)
+{
+ int j, k, l, m, mh, nw, nc;
+ double xr, xi, yr, yi;
+
+ nw = ip[0];
+ if (n > (nw << 3)) {
+ nw = n >> 3;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 1)) {
+ nc = n >> 1;
+ makect(nc, ip, w + nw);
+ }
+ m = n >> 1;
+ yi = a[m];
+ xi = a[0] + a[n];
+ a[0] -= a[n];
+ t[0] = xi - yi;
+ t[m] = xi + yi;
+ if (n > 2) {
+ mh = m >> 1;
+ for (j = 1; j < mh; j++) {
+ k = m - j;
+ xr = a[j] - a[n - j];
+ xi = a[j] + a[n - j];
+ yr = a[k] - a[n - k];
+ yi = a[k] + a[n - k];
+ a[j] = xr;
+ a[k] = yr;
+ t[j] = xi - yi;
+ t[k] = xi + yi;
+ }
+ t[mh] = a[mh] + a[n - mh];
+ a[mh] -= a[n - mh];
+ dctsub(m, a, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, a);
+ cftfsub(m, a, w);
+ rftfsub(m, a, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, a, w);
+ }
+ a[n - 1] = a[0] - a[1];
+ a[1] = a[0] + a[1];
+ for (j = m - 2; j >= 2; j -= 2) {
+ a[2 * j + 1] = a[j] + a[j + 1];
+ a[2 * j - 1] = a[j] - a[j + 1];
+ }
+ l = 2;
+ m = mh;
+ while (m >= 2) {
+ dctsub(m, t, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, t);
+ cftfsub(m, t, w);
+ rftfsub(m, t, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, t, w);
+ }
+ a[n - l] = t[0] - t[1];
+ a[l] = t[0] + t[1];
+ k = 0;
+ for (j = 2; j < m; j += 2) {
+ k += l << 2;
+ a[k - l] = t[j] - t[j + 1];
+ a[k + l] = t[j] + t[j + 1];
+ }
+ l <<= 1;
+ mh = m >> 1;
+ for (j = 0; j < mh; j++) {
+ k = m - j;
+ t[j] = t[m + k] - t[m + j];
+ t[k] = t[m + k] + t[m + j];
+ }
+ t[mh] = t[m + mh];
+ m = mh;
+ }
+ a[l] = t[0];
+ a[n] = t[2] - t[1];
+ a[0] = t[2] + t[1];
+ } else {
+ a[1] = a[0];
+ a[2] = t[0];
+ a[0] = t[1];
+ }
+}
+
+static void dfst(int n, FPType *a, FPType *t, int *ip, FPType *w)
+{
+ int j, k, l, m, mh, nw, nc;
+ double xr, xi, yr, yi;
+
+ nw = ip[0];
+ if (n > (nw << 3)) {
+ nw = n >> 3;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 1)) {
+ nc = n >> 1;
+ makect(nc, ip, w + nw);
+ }
+ if (n > 2) {
+ m = n >> 1;
+ mh = m >> 1;
+ for (j = 1; j < mh; j++) {
+ k = m - j;
+ xr = a[j] + a[n - j];
+ xi = a[j] - a[n - j];
+ yr = a[k] + a[n - k];
+ yi = a[k] - a[n - k];
+ a[j] = xr;
+ a[k] = yr;
+ t[j] = xi + yi;
+ t[k] = xi - yi;
+ }
+ t[0] = a[mh] - a[n - mh];
+ a[mh] += a[n - mh];
+ a[0] = a[m];
+ dstsub(m, a, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, a);
+ cftfsub(m, a, w);
+ rftfsub(m, a, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, a, w);
+ }
+ a[n - 1] = a[1] - a[0];
+ a[1] = a[0] + a[1];
+ for (j = m - 2; j >= 2; j -= 2) {
+ a[2 * j + 1] = a[j] - a[j + 1];
+ a[2 * j - 1] = -a[j] - a[j + 1];
+ }
+ l = 2;
+ m = mh;
+ while (m >= 2) {
+ dstsub(m, t, nc, w + nw);
+ if (m > 4) {
+ bitrv2(m, ip + 2, t);
+ cftfsub(m, t, w);
+ rftfsub(m, t, nc, w + nw);
+ } else if (m == 4) {
+ cftfsub(m, t, w);
+ }
+ a[n - l] = t[1] - t[0];
+ a[l] = t[0] + t[1];
+ k = 0;
+ for (j = 2; j < m; j += 2) {
+ k += l << 2;
+ a[k - l] = -t[j] - t[j + 1];
+ a[k + l] = t[j] - t[j + 1];
+ }
+ l <<= 1;
+ mh = m >> 1;
+ for (j = 1; j < mh; j++) {
+ k = m - j;
+ t[j] = t[m + k] + t[m + j];
+ t[k] = t[m + k] - t[m + j];
+ }
+ t[0] = t[m + mh];
+ m = mh;
+ }
+ a[l] = t[0];
+ }
+ a[0] = 0;
+}
+
+/* -------- initializing routines -------- */
+
+static void makewt(int nw, int *ip, FPType *w)
+{
+ int j, nwh;
+ double delta, x, y;
+
+ ip[0] = nw;
+ ip[1] = 1;
+ if (nw > 2) {
+ nwh = nw >> 1;
+ delta = atan(1.0) / nwh;
+ w[0] = 1;
+ w[1] = 0;
+ w[nwh] = cos(delta * nwh);
+ w[nwh + 1] = w[nwh];
+ if (nwh > 2) {
+ for (j = 2; j < nwh; j += 2) {
+ x = cos(delta * j);
+ y = sin(delta * j);
+ w[j] = x;
+ w[j + 1] = y;
+ w[nw - j] = y;
+ w[nw - j + 1] = x;
+ }
+ bitrv2(nw, ip + 2, w);
+ }
+ }
+}
+
+static void makect(int nc, int *ip, FPType *c)
+{
+ int j, nch;
+ double delta;
+
+ ip[1] = nc;
+ if (nc > 1) {
+ nch = nc >> 1;
+ delta = atan(1.0) / nch;
+ c[0] = cos(delta * nch);
+ c[nch] = 0.5 * c[0];
+ for (j = 1; j < nch; j++) {
+ c[j] = 0.5 * cos(delta * j);
+ c[nc - j] = 0.5 * sin(delta * j);
+ }
+ }
+}
+
+/* -------- child routines -------- */
+
+static void bitrv2(int n, int *ip, FPType *a)
+{
+ int j, j1, k, k1, l, m, m2;
+ double xr, xi, yr, yi;
+
+ ip[0] = 0;
+ l = n;
+ m = 1;
+ while ((m << 3) < l) {
+ l >>= 1;
+ for (j = 0; j < m; j++) {
+ ip[m + j] = ip[j] + l;
+ }
+ m <<= 1;
+ }
+ m2 = 2 * m;
+ if ((m << 3) == l) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 -= m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ j1 = 2 * k + m2 + ip[k];
+ k1 = j1 + m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ } else {
+ for (k = 1; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += m2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ }
+ }
+}
+
+static void bitrv2conj(int n, int *ip, FPType *a)
+{
+ int j, j1, k, k1, l, m, m2;
+ double xr, xi, yr, yi;
+
+ ip[0] = 0;
+ l = n;
+ m = 1;
+ while ((m << 3) < l) {
+ l >>= 1;
+ for (j = 0; j < m; j++) {
+ ip[m + j] = ip[j] + l;
+ }
+ m <<= 1;
+ }
+ m2 = 2 * m;
+ if ((m << 3) == l) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 -= m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += 2 * m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 2 * k + ip[k];
+ a[k1 + 1] = -a[k1 + 1];
+ j1 = k1 + m2;
+ k1 = j1 + m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ k1 += m2;
+ a[k1 + 1] = -a[k1 + 1];
+ }
+ } else {
+ a[1] = -a[1];
+ a[m2 + 1] = -a[m2 + 1];
+ for (k = 1; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 2 * j + ip[k];
+ k1 = 2 * k + ip[j];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += m2;
+ k1 += m2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 2 * k + ip[k];
+ a[k1 + 1] = -a[k1 + 1];
+ a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+ }
+ }
+}
+
+static void cftfsub(int n, FPType *a, const FPType *w)
+{
+ int j, j1, j2, j3, l;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ l = 2;
+ if (n > 8) {
+ cft1st(n, a, w);
+ l = 8;
+ while ((l << 2) < n) {
+ cftmdl(n, l, a, w);
+ l <<= 2;
+ }
+ }
+ if ((l << 2) == n) {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+ } else {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ x0r = a[j] - a[j1];
+ x0i = a[j + 1] - a[j1 + 1];
+ a[j] += a[j1];
+ a[j + 1] += a[j1 + 1];
+ a[j1] = x0r;
+ a[j1 + 1] = x0i;
+ }
+ }
+}
+
+static void cftbsub(int n, FPType *a, const FPType *w)
+{
+ int j, j1, j2, j3, l;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ l = 2;
+ if (n > 8) {
+ cft1st(n, a, w);
+ l = 8;
+ while ((l << 2) < n) {
+ cftmdl(n, l, a, w);
+ l <<= 2;
+ }
+ }
+ if ((l << 2) == n) {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = -a[j + 1] - a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = -a[j + 1] + a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i - x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i + x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i - x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i + x3r;
+ }
+ } else {
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ x0r = a[j] - a[j1];
+ x0i = -a[j + 1] + a[j1 + 1];
+ a[j] += a[j1];
+ a[j + 1] = -a[j + 1] - a[j1 + 1];
+ a[j1] = x0r;
+ a[j1 + 1] = x0i;
+ }
+ }
+}
+
+static void cft1st(int n, FPType *a, const FPType *w)
+{
+ int j, k1, k2;
+ double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ x0r = a[0] + a[2];
+ x0i = a[1] + a[3];
+ x1r = a[0] - a[2];
+ x1i = a[1] - a[3];
+ x2r = a[4] + a[6];
+ x2i = a[5] + a[7];
+ x3r = a[4] - a[6];
+ x3i = a[5] - a[7];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[4] = x0r - x2r;
+ a[5] = x0i - x2i;
+ a[2] = x1r - x3i;
+ a[3] = x1i + x3r;
+ a[6] = x1r + x3i;
+ a[7] = x1i - x3r;
+ wk1r = w[2];
+ x0r = a[8] + a[10];
+ x0i = a[9] + a[11];
+ x1r = a[8] - a[10];
+ x1i = a[9] - a[11];
+ x2r = a[12] + a[14];
+ x2i = a[13] + a[15];
+ x3r = a[12] - a[14];
+ x3i = a[13] - a[15];
+ a[8] = x0r + x2r;
+ a[9] = x0i + x2i;
+ a[12] = x2i - x0i;
+ a[13] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[10] = wk1r * (x0r - x0i);
+ a[11] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[14] = wk1r * (x0i - x0r);
+ a[15] = wk1r * (x0i + x0r);
+ k1 = 0;
+ for (j = 16; j < n; j += 16) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = w[k1];
+ wk2i = w[k1 + 1];
+ wk1r = w[k2];
+ wk1i = w[k2 + 1];
+ wk3r = wk1r - 2 * wk2i * wk1i;
+ wk3i = 2 * wk2i * wk1r - wk1i;
+ x0r = a[j] + a[j + 2];
+ x0i = a[j + 1] + a[j + 3];
+ x1r = a[j] - a[j + 2];
+ x1i = a[j + 1] - a[j + 3];
+ x2r = a[j + 4] + a[j + 6];
+ x2i = a[j + 5] + a[j + 7];
+ x3r = a[j + 4] - a[j + 6];
+ x3i = a[j + 5] - a[j + 7];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 4] = wk2r * x0r - wk2i * x0i;
+ a[j + 5] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 2] = wk1r * x0r - wk1i * x0i;
+ a[j + 3] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 6] = wk3r * x0r - wk3i * x0i;
+ a[j + 7] = wk3r * x0i + wk3i * x0r;
+ wk1r = w[k2 + 2];
+ wk1i = w[k2 + 3];
+ wk3r = wk1r - 2 * wk2r * wk1i;
+ wk3i = 2 * wk2r * wk1r - wk1i;
+ x0r = a[j + 8] + a[j + 10];
+ x0i = a[j + 9] + a[j + 11];
+ x1r = a[j + 8] - a[j + 10];
+ x1i = a[j + 9] - a[j + 11];
+ x2r = a[j + 12] + a[j + 14];
+ x2i = a[j + 13] + a[j + 15];
+ x3r = a[j + 12] - a[j + 14];
+ x3i = a[j + 13] - a[j + 15];
+ a[j + 8] = x0r + x2r;
+ a[j + 9] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j + 12] = -wk2i * x0r - wk2r * x0i;
+ a[j + 13] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j + 10] = wk1r * x0r - wk1i * x0i;
+ a[j + 11] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j + 14] = wk3r * x0r - wk3i * x0i;
+ a[j + 15] = wk3r * x0i + wk3i * x0r;
+ }
+}
+
+static void cftmdl(int n, int l, FPType *a, const FPType *w)
+{
+ int j, j1, j2, j3, k, k1, k2, m, m2;
+ double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ m = l << 2;
+ for (j = 0; j < l; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x0r - x2r;
+ a[j2 + 1] = x0i - x2i;
+ a[j1] = x1r - x3i;
+ a[j1 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ }
+ wk1r = w[2];
+ for (j = m; j < l + m; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j2] = x2i - x0i;
+ a[j2 + 1] = x0r - x2r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1] = wk1r * (x0r - x0i);
+ a[j1 + 1] = wk1r * (x0r + x0i);
+ x0r = x3i + x1r;
+ x0i = x3r - x1i;
+ a[j3] = wk1r * (x0i - x0r);
+ a[j3 + 1] = wk1r * (x0i + x0r);
+ }
+ k1 = 0;
+ m2 = 2 * m;
+ for (k = m2; k < n; k += m2) {
+ k1 += 2;
+ k2 = 2 * k1;
+ wk2r = w[k1];
+ wk2i = w[k1 + 1];
+ wk1r = w[k2];
+ wk1i = w[k2 + 1];
+ wk3r = wk1r - 2 * wk2i * wk1i;
+ wk3i = 2 * wk2i * wk1r - wk1i;
+ for (j = k; j < l + k; j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2] = wk2r * x0r - wk2i * x0i;
+ a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ wk1r = w[k2 + 2];
+ wk1i = w[k2 + 3];
+ wk3r = wk1r - 2 * wk2r * wk1i;
+ wk3i = 2 * wk2r * wk1r - wk1i;
+ for (j = k + m; j < l + (k + m); j += 2) {
+ j1 = j + l;
+ j2 = j1 + l;
+ j3 = j2 + l;
+ x0r = a[j] + a[j1];
+ x0i = a[j + 1] + a[j1 + 1];
+ x1r = a[j] - a[j1];
+ x1i = a[j + 1] - a[j1 + 1];
+ x2r = a[j2] + a[j3];
+ x2i = a[j2 + 1] + a[j3 + 1];
+ x3r = a[j2] - a[j3];
+ x3i = a[j2 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ x0r -= x2r;
+ x0i -= x2i;
+ a[j2] = -wk2i * x0r - wk2r * x0i;
+ a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j1] = wk1r * x0r - wk1i * x0i;
+ a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r - wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+ }
+ }
+}
+
+static void rftfsub(int n, FPType *a, int nc, const FPType *c)
+{
+ int j, k, kk, ks, m;
+ double wkr, wki, xr, xi, yr, yi;
+
+ m = n >> 1;
+ ks = 2 * nc / m;
+ kk = 0;
+ for (j = 2; j < m; j += 2) {
+ k = n - j;
+ kk += ks;
+ wkr = 0.5 - c[nc - kk];
+ wki = c[kk];
+ xr = a[j] - a[k];
+ xi = a[j + 1] + a[k + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j] -= yr;
+ a[j + 1] -= yi;
+ a[k] += yr;
+ a[k + 1] -= yi;
+ }
+}
+
+static void rftbsub(int n, FPType *a, int nc, const FPType *c)
+{
+ int j, k, kk, ks, m;
+ double wkr, wki, xr, xi, yr, yi;
+
+ a[1] = -a[1];
+ m = n >> 1;
+ ks = 2 * nc / m;
+ kk = 0;
+ for (j = 2; j < m; j += 2) {
+ k = n - j;
+ kk += ks;
+ wkr = 0.5 - c[nc - kk];
+ wki = c[kk];
+ xr = a[j] - a[k];
+ xi = a[j + 1] + a[k + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j] -= yr;
+ a[j + 1] = yi - a[j + 1];
+ a[k] += yr;
+ a[k + 1] = yi - a[k + 1];
+ }
+ a[m + 1] = -a[m + 1];
+}
+
+static void dctsub(int n, FPType *a, int nc, const FPType *c)
+{
+ int j, k, kk, ks, m;
+ double wkr, wki, xr;
+
+ m = n >> 1;
+ ks = nc / n;
+ kk = 0;
+ for (j = 1; j < m; j++) {
+ k = n - j;
+ kk += ks;
+ wkr = c[kk] - c[nc - kk];
+ wki = c[kk] + c[nc - kk];
+ xr = wki * a[j] - wkr * a[k];
+ a[j] = wkr * a[j] + wki * a[k];
+ a[k] = xr;
+ }
+ a[m] *= c[0];
+}
+
+static void dstsub(int n, FPType *a, int nc, const FPType *c)
+{
+ int j, k, kk, ks, m;
+ double wkr, wki, xr;
+
+ m = n >> 1;
+ ks = nc / n;
+ kk = 0;
+ for (j = 1; j < m; j++) {
+ k = n - j;
+ kk += ks;
+ wkr = c[kk] - c[nc - kk];
+ wki = c[kk] + c[nc - kk];
+ xr = wki * a[k] - wkr * a[j];
+ a[k] = wkr * a[k] + wki * a[j];
+ a[j] = xr;
+ }
+ a[m] *= c[0];
+}
+};
+
+} // namespace r8b
+
+#endif // R8B_FFT4G_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
new file mode 100644
index 00000000..f877862e
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
@@ -0,0 +1,38 @@
+/**
+ * @file r8bbase.cpp
+ *
+ * @brief C++ file that should be compiled and included into your application.
+ *
+ * This is a single library file that should be compiled and included into the
+ * project that uses the "r8brain-free-src" sample rate converter. This file
+ * defines several global static objects used by the library.
+ *
+ * You may also need to include to your project: the "Kernel32" library
+ * (on Windows) and the "pthread" library on macOS and Linux.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#include "CDSPFIRFilter.h"
+#include "CDSPFracInterpolator.h"
+
+namespace r8b {
+
+#if R8B_FLTTEST
+ int InterpFilterFracs = -1;
+#endif // R8B_FLTTEST
+
+CSyncObject CDSPRealFFTKeeper :: StateSync;
+CDSPRealFFT :: CObjKeeper CDSPRealFFTKeeper :: FFTObjects[ 31 ];
+
+CSyncObject CDSPFIRFilterCache :: StateSync;
+CPtrKeeper< CDSPFIRFilter* > CDSPFIRFilterCache :: Objects;
+int CDSPFIRFilterCache :: ObjCount = 0;
+
+CSyncObject CDSPFracDelayFilterBankCache :: StateSync;
+CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: Objects;
+CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: StaticObjects;
+int CDSPFracDelayFilterBankCache :: ObjCount = 0;
+
+} // namespace r8b
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
new file mode 100644
index 00000000..32054965
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
@@ -0,0 +1,1206 @@
+//$ nobt
+
+/**
+ * @file r8bbase.h
+ *
+ * @brief The "base" inclusion file with basic classes and functions.
+ *
+ * This is the "base" inclusion file for the "r8brain-free-src" sample rate
+ * converter. This inclusion file contains implementations of several small
+ * utility classes and functions used by the library.
+ *
+ * @mainpage
+ *
+ * @section intro_sec Introduction
+ *
+ * Open source (under the MIT license) high-quality professional audio sample
+ * rate converter (SRC) (resampling) library. Features routines for SRC, both
+ * up- and downsampling, to/from any sample rate, including non-integer sample
+ * rates: it can be also used for conversion to/from SACD sample rate and even
+ * go beyond that. SRC routines were implemented in multi-platform C++ code,
+ * and have a high level of optimality.
+ *
+ * For more information, please visit
+ * https://github.com/avaneev/r8brain-free-src
+ *
+ * @section license License
+ *
+ * The MIT License (MIT)
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Please credit the creator of this library in your documentation in the
+ * following way: "Sample rate converter designed by Aleksey Vaneev of
+ * Voxengo"
+ *
+ * @version 5.6
+ */
+
+#ifndef R8BBASE_INCLUDED
+#define R8BBASE_INCLUDED
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include "r8bconf.h"
+
+#if defined( _WIN32 )
+ #include <windows.h>
+#else // defined( _WIN32 )
+ #include <pthread.h>
+#endif // defined( _WIN32 )
+
+#if defined( __SSE4_2__ ) || defined( __SSE4_1__ ) || \
+ defined( __SSSE3__ ) || defined( __SSE3__ ) || defined( __SSE2__ ) || \
+ defined( __x86_64__ ) || defined( _M_AMD64 ) || defined( _M_X64 ) || \
+ defined( __amd64 )
+
+ #include <immintrin.h>
+
+ #define R8B_SSE2
+ #define R8B_SIMD_ISH
+
+#elif defined( __aarch64__ ) || defined( __arm64__ ) || defined( __ARM_NEON )
+
+ #include <arm_neon.h>
+
+ #define R8B_NEON
+
+ #if !defined( __APPLE__ )
+ #define R8B_SIMD_ISH // Shuffled interpolation is inefficient on M1.
+ #endif // !defined( __APPLE__ )
+
+#endif // ARM64
+
+/**
+ * @brief The "r8brain-free-src" library namespace.
+ *
+ * The "r8brain-free-src" sample rate converter library namespace.
+ */
+
+namespace r8b {
+
+/**
+ * Macro defines r8brain-free-src version string.
+ */
+
+#define R8B_VERSION "5.6"
+
+/**
+ * The macro equals to "pi" constant, fits 53-bit floating point mantissa.
+ */
+
+#define R8B_PI 3.14159265358979324
+
+/**
+ * The R8B_2PI macro equals to "2 * pi" constant, fits 53-bit floating point
+ * mantissa.
+ */
+
+#define R8B_2PI 6.28318530717958648
+
+/**
+ * The R8B_3PI macro equals to "3 * pi" constant, fits 53-bit floating point
+ * mantissa.
+ */
+
+#define R8B_3PI 9.42477796076937972
+
+/**
+ * The R8B_PId2 macro equals to "pi divided by 2" constant, fits 53-bit
+ * floating point mantissa.
+ */
+
+#define R8B_PId2 1.57079632679489662
+
+/**
+ * A special macro that defines empty copy-constructor and copy operator with
+ * the "private:" prefix. This macro should be used in classes that cannot be
+ * copied in a standard C++ way.
+ *
+ * This macro does not need to be defined in classes derived from a class
+ * where such macro was already used.
+ *
+ * @param ClassName The name of the class which uses this macro.
+ */
+
+#define R8BNOCTOR( ClassName ) \
+ private: \
+ ClassName( const ClassName& ) { } \
+ ClassName& operator = ( const ClassName& ) { return( *this ); }
+
+/**
+ * @brief The default base class for objects created on heap.
+ *
+ * Class that implements "new" and "delete" operators that use standard
+ * malloc() and free() functions.
+ */
+
+class CStdClassAllocator
+{
+public:
+ /**
+ * @param n The size of the object, in bytes.
+ * @param p Pointer to object's pre-allocated memory block.
+ * @return Pointer to object.
+ */
+
+ void* operator new( size_t, void* p )
+ {
+ return( p );
+ }
+
+ /**
+ * @param n The size of the object, in bytes.
+ * @return Pointer to the allocated memory block for the object.
+ */
+
+ void* operator new( size_t n )
+ {
+ return( :: malloc( n ));
+ }
+
+ /**
+ * @param n The size of the object, in bytes.
+ * @return Pointer to the allocated memory block for the object.
+ */
+
+ void* operator new[]( size_t n )
+ {
+ return( :: malloc( n ));
+ }
+
+ /**
+ * Operator frees a previously allocated memory block for the object.
+ *
+ * @param p Pointer to the allocated memory block for the object.
+ */
+
+ void operator delete( void* p )
+ {
+ :: free( p );
+ }
+
+ /**
+ * Operator frees a previously allocated memory block for the object.
+ *
+ * @param p Pointer to the allocated memory block for the object.
+ */
+
+ void operator delete[]( void* p )
+ {
+ :: free( p );
+ }
+};
+
+/**
+ * @brief The default base class for objects that allocate blocks of memory.
+ *
+ * Memory buffer allocator that uses "stdlib" standard memory functions.
+ */
+
+class CStdMemAllocator : public CStdClassAllocator
+{
+public:
+ /**
+ * Function allocates memory block.
+ *
+ * @param Size The size of the block, in bytes.
+ * @result The pointer to the allocated block.
+ */
+
+ static void* allocmem( const size_t Size )
+ {
+ return( :: malloc( Size ));
+ }
+
+ /**
+ * Function reallocates a previously allocated memory block.
+ *
+ * @param p Pointer to the allocated block, can be NULL.
+ * @param Size The new size of the block, in bytes.
+ * @result The pointer to the (re)allocated block.
+ */
+
+ static void* reallocmem( void* p, const size_t Size )
+ {
+ return( :: realloc( p, Size ));
+ }
+
+ /**
+ * Function frees a previously allocated memory block.
+ *
+ * @param p Pointer to the allocated block, can be NULL.
+ */
+
+ static void freemem( void* p )
+ {
+ :: free( p );
+ }
+};
+
+/**
+ * This function forces the provided "ptr" pointer to be aligned to
+ * "align" bytes. Works with power-of-2 alignments only.
+ *
+ * @param ptr Pointer to align.
+ * @param align Alignment, in bytes, power-of-2.
+ * @tparam T Pointer's element type.
+ */
+
+template< typename T >
+inline T* alignptr( T* const ptr, const uintptr_t align )
+{
+ return( (T*) (( (uintptr_t) ptr + align - 1 ) & ~( align - 1 )));
+}
+
+/**
+ * @brief Templated memory buffer class for element buffers of fixed capacity.
+ *
+ * Fixed memory buffer object. Supports allocation of a fixed amount of
+ * memory. Does not store buffer's capacity - the user should know the actual
+ * capacity of the buffer. Does not feature "internal" storage, memory is
+ * always allocated via the R8B_MEMALLOCCLASS class's functions. Thus the
+ * object of this class can be moved in memory.
+ *
+ * This class manages memory space only - it does not perform element class
+ * construction nor destruction operations.
+ *
+ * This class applies 64-byte memory address alignment to the allocated data
+ * block.
+ *
+ * @tparam T The type of the stored elements (e.g. "double").
+ */
+
+template< typename T >
+class CFixedBuffer : public R8B_MEMALLOCCLASS
+{
+ R8BNOCTOR( CFixedBuffer );
+
+public:
+ CFixedBuffer()
+ : Data0( NULL )
+ , Data( NULL )
+ {
+ }
+
+ /**
+ * Constructor allocates memory so that the specified number of elements
+ * of type T can be stored in *this buffer object.
+ *
+ * @param Capacity Storage for this number of elements to allocate.
+ */
+
+ CFixedBuffer( const int Capacity )
+ {
+ R8BASSERT( Capacity > 0 || Capacity == 0 );
+
+ Data0 = allocmem( Capacity * sizeof( T ) + Alignment );
+ Data = (T*) alignptr( Data0, Alignment );
+
+ R8BASSERT( Data0 != NULL || Capacity == 0 );
+ }
+
+ ~CFixedBuffer()
+ {
+ freemem( Data0 );
+ }
+
+ /**
+ * Function allocates memory so that the specified number of elements of
+ * type T can be stored in *this buffer object.
+ *
+ * @param Capacity Storage for this number of elements to allocate.
+ */
+
+ void alloc( const int Capacity )
+ {
+ R8BASSERT( Capacity > 0 || Capacity == 0 );
+
+ freemem( Data0 );
+ Data0 = allocmem( Capacity * sizeof( T ) + Alignment );
+ Data = (T*) alignptr( Data0, Alignment );
+
+ R8BASSERT( Data0 != NULL || Capacity == 0 );
+ }
+
+ /**
+ * Function reallocates memory so that the specified number of elements of
+ * type T can be stored in *this buffer object. Previously allocated data
+ * is copied to the new memory buffer.
+ *
+ * @param PrevCapacity Previous capacity of *this buffer.
+ * @param NewCapacity Storage for this number of elements to allocate.
+ */
+
+ void realloc( const int PrevCapacity, const int NewCapacity )
+ {
+ R8BASSERT( PrevCapacity >= 0 );
+ R8BASSERT( NewCapacity >= 0 );
+
+ void* const NewData0 = allocmem( NewCapacity * sizeof( T ) +
+ Alignment );
+
+ T* const NewData = (T*) alignptr( NewData0, Alignment );
+ const size_t CopySize = ( PrevCapacity > NewCapacity ?
+ NewCapacity : PrevCapacity ) * sizeof( T );
+
+ if( CopySize > 0 )
+ {
+ memcpy( NewData, Data, CopySize );
+ }
+
+ freemem( Data0 );
+ Data0 = NewData0;
+ Data = NewData;
+
+ R8BASSERT( Data0 != NULL || NewCapacity == 0 );
+ }
+
+ /**
+ * Function deallocates a previously allocated buffer.
+ */
+
+ void free()
+ {
+ freemem( Data0 );
+ Data0 = NULL;
+ Data = NULL;
+ }
+
+ /**
+ * @return Pointer to the first element of the allocated buffer, NULL if
+ * not allocated.
+ */
+
+ T* getPtr() const
+ {
+ return( Data );
+ }
+
+ /**
+ * @return Pointer to the first element of the allocated buffer, NULL if
+ * not allocated.
+ */
+
+ operator T* () const
+ {
+ return( Data );
+ }
+
+private:
+ static const size_t Alignment = 64; ///< Buffer address alignment, in
+ ///< bytes.
+ ///<
+ void* Data0; ///< Buffer pointer, original unaligned.
+ ///<
+ T* Data; ///< Element buffer pointer, aligned.
+ ///<
+};
+
+/**
+ * @brief Pointer-to-object "keeper" class with automatic deletion.
+ *
+ * An auxiliary class that can be used for keeping a pointer to object that
+ * should be deleted together with the "keeper" by calling object's "delete"
+ * operator.
+ *
+ * @tparam T Pointer type to operate with, must include the asterisk (e.g.
+ * "CDSPFIRFilter*").
+ */
+
+template< class T >
+class CPtrKeeper
+{
+ R8BNOCTOR( CPtrKeeper );
+
+public:
+ CPtrKeeper()
+ : Object( NULL )
+ {
+ }
+
+ /**
+ * Constructor assigns a pointer to object to *this keeper.
+ *
+ * @param aObject Pointer to object to keep, can be NULL.
+ * @tparam T2 Object's pointer type.
+ */
+
+ template< class T2 >
+ CPtrKeeper( T2 const aObject )
+ : Object( aObject )
+ {
+ }
+
+ ~CPtrKeeper()
+ {
+ delete Object;
+ }
+
+ /**
+ * Function assigns a pointer to object to *this keeper. A previously
+ * keeped pointer will be reset and object deleted.
+ *
+ * @param aObject Pointer to object to keep, can be NULL.
+ * @tparam T2 Object's pointer type.
+ */
+
+ template< class T2 >
+ void operator = ( T2 const aObject )
+ {
+ reset();
+ Object = aObject;
+ }
+
+ /**
+ * @return Pointer to keeped object, NULL if no object is being kept.
+ */
+
+ T operator -> () const
+ {
+ return( Object );
+ }
+
+ /**
+ * @return Pointer to keeped object, NULL if no object is being kept.
+ */
+
+ operator T () const
+ {
+ return( Object );
+ }
+
+ /**
+ * Function resets the keeped pointer and deletes the keeped object.
+ */
+
+ void reset()
+ {
+ T DelObj = Object;
+ Object = NULL;
+ delete DelObj;
+ }
+
+ /**
+ * @return Function returns the keeped pointer and resets it in *this
+ * keeper without object deletion.
+ */
+
+ T unkeep()
+ {
+ T ResObject = Object;
+ Object = NULL;
+ return( ResObject );
+ }
+
+private:
+ T Object; ///< Pointer to keeped object.
+ ///<
+};
+
+/**
+ * @brief Multi-threaded synchronization object class.
+ *
+ * This class uses standard OS thread-locking (mutex) mechanism which is
+ * fairly efficient in most cases.
+ *
+ * The acquire() function can be called recursively, in the same thread, for
+ * this kind of thread-locking mechanism. This will not produce a dead-lock.
+ */
+
+class CSyncObject
+{
+ R8BNOCTOR( CSyncObject );
+
+public:
+ CSyncObject()
+ {
+ #if defined( _WIN32 )
+ InitializeCriticalSectionAndSpinCount( &CritSec, 4000 );
+ #else // defined( _WIN32 )
+ pthread_mutexattr_t MutexAttrs;
+ pthread_mutexattr_init( &MutexAttrs );
+ pthread_mutexattr_settype( &MutexAttrs, PTHREAD_MUTEX_RECURSIVE );
+ pthread_mutex_init( &Mutex, &MutexAttrs );
+ pthread_mutexattr_destroy( &MutexAttrs );
+ #endif // defined( _WIN32 )
+ }
+
+ ~CSyncObject()
+ {
+ #if defined( _WIN32 )
+ DeleteCriticalSection( &CritSec );
+ #else // defined( _WIN32 )
+ pthread_mutex_destroy( &Mutex );
+ #endif // defined( _WIN32 )
+ }
+
+ /**
+ * Function "acquires" *this thread synchronizer object immediately or
+ * waits until another thread releases it.
+ */
+
+ void acquire()
+ {
+ #if defined( _WIN32 )
+ EnterCriticalSection( &CritSec );
+ #else // defined( _WIN32 )
+ pthread_mutex_lock( &Mutex );
+ #endif // defined( _WIN32 )
+ }
+
+ /**
+ * Function "releases" *this previously acquired thread synchronizer
+ * object.
+ */
+
+ void release()
+ {
+ #if defined( _WIN32 )
+ LeaveCriticalSection( &CritSec );
+ #else // defined( _WIN32 )
+ pthread_mutex_unlock( &Mutex );
+ #endif // defined( _WIN32 )
+ }
+
+private:
+ #if defined( _WIN32 )
+ CRITICAL_SECTION CritSec; ///< Standard Windows critical section
+ ///< structure.
+ ///<
+ #else // defined( _WIN32 )
+ pthread_mutex_t Mutex; ///< pthread.h mutex object.
+ ///<
+ #endif // defined( _WIN32 )
+};
+
+/**
+ * @brief A "keeper" class for CSyncObject-based synchronization.
+ *
+ * Sync keeper class. The object of this class can be used as auto-init and
+ * auto-deinit object for calling the acquire() and release() functions of an
+ * object of the CSyncObject class. This "keeper" object is best used in
+ * functions as an "automatic" object allocated on the stack, possibly via the
+ * R8BSYNC() macro.
+ */
+
+class CSyncKeeper
+{
+ R8BNOCTOR( CSyncKeeper );
+
+public:
+ CSyncKeeper()
+ : SyncObj( NULL )
+ {
+ }
+
+ /**
+ * @param aSyncObj Pointer to the sync object which should be used for
+ * sync'ing, can be NULL.
+ */
+
+ CSyncKeeper( CSyncObject* const aSyncObj )
+ : SyncObj( aSyncObj )
+ {
+ if( SyncObj != NULL )
+ {
+ SyncObj -> acquire();
+ }
+ }
+
+ /**
+ * @param aSyncObj Reference to the sync object which should be used for
+ * sync'ing.
+ */
+
+ CSyncKeeper( CSyncObject& aSyncObj )
+ : SyncObj( &aSyncObj )
+ {
+ SyncObj -> acquire();
+ }
+
+ ~CSyncKeeper()
+ {
+ if( SyncObj != NULL )
+ {
+ SyncObj -> release();
+ }
+ }
+
+protected:
+ CSyncObject* SyncObj; ///< Sync object in use (can be NULL).
+ ///<
+};
+
+/**
+ * The synchronization macro. The R8BSYNC( obj ) macro, which creates and
+ * object of the r8b::CSyncKeeper class on stack, should be put before
+ * sections of the code that may potentially change data asynchronously with
+ * other threads at the same time. The R8BSYNC( obj ) macro "acquires" the
+ * synchronization object thus blocking execution of other threads that also
+ * use the same R8BSYNC( obj ) macro. The blocked section begins with the
+ * R8BSYNC( obj ) macro and finishes at the end of the current C++ code block.
+ * Multiple R8BSYNC() macros may be defined from within the same code block.
+ *
+ * @param SyncObject An object of the CSyncObject type that is used for
+ * synchronization.
+ */
+
+#define R8BSYNC( SyncObject ) R8BSYNC_( SyncObject, __LINE__ )
+#define R8BSYNC_( SyncObject, id ) R8BSYNC__( SyncObject, id )
+#define R8BSYNC__( SyncObject, id ) CSyncKeeper SyncKeeper##id( SyncObject )
+
+/**
+ * @brief Sine signal generator class.
+ *
+ * Class implements sine signal generator without biasing.
+ */
+
+class CSineGen
+{
+public:
+ CSineGen()
+ {
+ }
+
+ /**
+ * Constructor initializes *this sine signal generator, with unity gain
+ * output.
+ *
+ * @param si Sine function increment, in radians.
+ * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+ */
+
+ CSineGen( const double si, const double ph )
+ : svalue1( sin( ph ))
+ , svalue2( sin( ph - si ))
+ , sincr( 2.0 * cos( si ))
+ {
+ }
+
+ /**
+ * Constructor initializes *this sine signal generator.
+ *
+ * @param si Sine function increment, in radians.
+ * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+ * @param g The overall gain factor, 1.0 for unity gain (-1.0 to 1.0
+ * amplitude).
+ */
+
+ CSineGen( const double si, const double ph, const double g )
+ : svalue1( sin( ph ) * g )
+ , svalue2( sin( ph - si ) * g )
+ , sincr( 2.0 * cos( si ))
+ {
+ }
+
+ /**
+ * Function initializes *this sine signal generator, with unity gain
+ * output.
+ *
+ * @param si Sine function increment, in radians.
+ * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+ */
+
+ void init( const double si, const double ph )
+ {
+ svalue1 = sin( ph );
+ svalue2 = sin( ph - si );
+ sincr = 2.0 * cos( si );
+ }
+
+ /**
+ * Function initializes *this sine signal generator.
+ *
+ * @param si Sine function increment, in radians.
+ * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+ * @param g The overall gain factor, 1.0 for unity gain (-1.0 to 1.0
+ * amplitude).
+ */
+
+ void init( const double si, const double ph, const double g )
+ {
+ svalue1 = sin( ph ) * g;
+ svalue2 = sin( ph - si ) * g;
+ sincr = 2.0 * cos( si );
+ }
+
+ /**
+ * @return Next value of the sine function, without biasing.
+ */
+
+ double generate()
+ {
+ const double res = svalue1;
+
+ svalue1 = sincr * res - svalue2;
+ svalue2 = res;
+
+ return( res );
+ }
+
+private:
+ double svalue1; ///< Current sine value.
+ ///<
+ double svalue2; ///< Previous sine value.
+ ///<
+ double sincr; ///< Sine value increment.
+ ///<
+};
+
+/**
+ * @param v Input value.
+ * @return Calculated bit occupancy of the specified input value. Bit
+ * occupancy means how many significant lower bits are necessary to store a
+ * specified value. Function treats the input value as unsigned.
+ */
+
+inline int getBitOccupancy( const int v )
+{
+ static const uint8_t OccupancyTable[] =
+ {
+ 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
+ };
+
+ const int tt = v >> 16;
+
+ if( tt != 0 )
+ {
+ const int t = v >> 24;
+
+ return( t != 0 ? 24 + OccupancyTable[ t & 0xFF ] :
+ 16 + OccupancyTable[ tt ]);
+ }
+ else
+ {
+ const int t = v >> 8;
+
+ return( t != 0 ? 8 + OccupancyTable[ t ] : OccupancyTable[ v ]);
+ }
+}
+
+/**
+ * Function calculates frequency response of the specified FIR filter at the
+ * specified circular frequency. Phase can be calculated as atan2( im, re ).
+ *
+ * @param flt FIR filter's coefficients.
+ * @param fltlen Number of coefficients (taps) in the filter.
+ * @param th Circular frequency [0; pi].
+ * @param[out] re0 Resulting real part of the complex frequency response.
+ * @param[out] im0 Resulting imaginary part of the complex frequency response.
+ * @param fltlat Filter's latency, in samples.
+ */
+
+inline void calcFIRFilterResponse( const double* flt, int fltlen,
+ const double th, double& re0, double& im0, const int fltlat = 0 )
+{
+ const double sincr = 2.0 * cos( th );
+ double cvalue1;
+ double svalue1;
+
+ if( fltlat == 0 )
+ {
+ cvalue1 = 1.0;
+ svalue1 = 0.0;
+ }
+ else
+ {
+ cvalue1 = cos( -fltlat * th );
+ svalue1 = sin( -fltlat * th );
+ }
+
+ double cvalue2 = cos( -( fltlat + 1 ) * th );
+ double svalue2 = sin( -( fltlat + 1 ) * th );
+
+ double re = 0.0;
+ double im = 0.0;
+
+ while( fltlen > 0 )
+ {
+ re += cvalue1 * flt[ 0 ];
+ im += svalue1 * flt[ 0 ];
+ flt++;
+ fltlen--;
+
+ double tmp = cvalue1;
+ cvalue1 = sincr * cvalue1 - cvalue2;
+ cvalue2 = tmp;
+
+ tmp = svalue1;
+ svalue1 = sincr * svalue1 - svalue2;
+ svalue2 = tmp;
+ }
+
+ re0 = re;
+ im0 = im;
+}
+
+/**
+ * Function calculates frequency response and group delay of the specified FIR
+ * filter at the specified circular frequency. The group delay is calculated
+ * by evaluating the filter's response at close side-band frequencies of "th".
+ *
+ * @param flt FIR filter's coefficients.
+ * @param fltlen Number of coefficients (taps) in the filter.
+ * @param th Circular frequency [0; pi].
+ * @param[out] re Resulting real part of the complex frequency response.
+ * @param[out] im Resulting imaginary part of the complex frequency response.
+ * @param[out] gd Resulting group delay at the specified frequency, in
+ * samples.
+ */
+
+inline void calcFIRFilterResponseAndGroupDelay( const double* const flt,
+ const int fltlen, const double th, double& re, double& im, double& gd )
+{
+ // Calculate response at "th".
+
+ calcFIRFilterResponse( flt, fltlen, th, re, im );
+
+ // Calculate response at close sideband frequencies.
+
+ const int Count = 2;
+ const double thd2 = 1e-9;
+ double ths[ Count ] = { th - thd2, th + thd2 };
+
+ if( ths[ 0 ] < 0.0 )
+ {
+ ths[ 0 ] = 0.0;
+ }
+
+ if( ths[ 1 ] > R8B_PI )
+ {
+ ths[ 1 ] = R8B_PI;
+ }
+
+ double ph1[ Count ];
+ int i;
+
+ for( i = 0; i < Count; i++ )
+ {
+ double re1;
+ double im1;
+
+ calcFIRFilterResponse( flt, fltlen, ths[ i ], re1, im1 );
+ ph1[ i ] = atan2( im1, re1 );
+ }
+
+ if( fabs( ph1[ 1 ] - ph1[ 0 ]) > R8B_PI )
+ {
+ if( ph1[ 1 ] > ph1[ 0 ])
+ {
+ ph1[ 1 ] -= R8B_2PI;
+ }
+ else
+ {
+ ph1[ 1 ] += R8B_2PI;
+ }
+ }
+
+ const double thd = ths[ 1 ] - ths[ 0 ];
+ gd = ( ph1[ 1 ] - ph1[ 0 ]) / thd;
+}
+
+/**
+ * Function normalizes FIR filter so that its frequency response at DC is
+ * equal to DCGain.
+ *
+ * @param[in,out] p Filter coefficients.
+ * @param l Filter length.
+ * @param DCGain Filter's gain at DC (linear, non-decibel value).
+ * @param pstep "p" array step.
+ */
+
+inline void normalizeFIRFilter( double* const p, const int l,
+ const double DCGain, const int pstep = 1 )
+{
+ R8BASSERT( l > 0 );
+ R8BASSERT( pstep != 0 );
+
+ double s = 0.0;
+ double* pp = p;
+ int i = l;
+
+ while( i > 0 )
+ {
+ s += *pp;
+ pp += pstep;
+ i--;
+ }
+
+ s = DCGain / s;
+ pp = p;
+ i = l;
+
+ while( i > 0 )
+ {
+ *pp *= s;
+ pp += pstep;
+ i--;
+ }
+}
+
+/**
+ * Function calculates coefficients used to calculate 3rd order spline
+ * (polynomial) on the equidistant lattice, using 8 points.
+ *
+ * @param[out] c Output coefficients buffer, length = 4.
+ * @param xm3 Point at x-3 position.
+ * @param xm2 Point at x-2 position.
+ * @param xm1 Point at x-1 position.
+ * @param x0 Point at x position.
+ * @param x1 Point at x+1 position.
+ * @param x2 Point at x+2 position.
+ * @param x3 Point at x+3 position.
+ * @param x4 Point at x+4 position.
+ */
+
+inline void calcSpline3p8Coeffs( double* const c, const double xm3,
+ const double xm2, const double xm1, const double x0, const double x1,
+ const double x2, const double x3, const double x4 )
+{
+ c[ 0 ] = x0;
+ c[ 1 ] = ( 61.0 * ( x1 - xm1 ) + 16.0 * ( xm2 - x2 ) +
+ 3.0 * ( x3 - xm3 )) / 76.0;
+
+ c[ 2 ] = ( 106.0 * ( xm1 + x1 ) + 10.0 * x3 + 6.0 * xm3 - 3.0 * x4 -
+ 29.0 * ( xm2 + x2 ) - 167.0 * x0 ) / 76.0;
+
+ c[ 3 ] = ( 91.0 * ( x0 - x1 ) + 45.0 * ( x2 - xm1 ) +
+ 13.0 * ( xm2 - x3 ) + 3.0 * ( x4 - xm3 )) / 76.0;
+}
+
+/**
+ * Function calculates coefficients used to calculate 2rd order spline
+ * (polynomial) on the equidistant lattice, using 8 points. This function is
+ * based on the calcSpline3p8Coeffs() function, but without the 3rd order
+ * coefficient.
+ *
+ * @param[out] c Output coefficients buffer, length = 3.
+ * @param xm3 Point at x-3 position.
+ * @param xm2 Point at x-2 position.
+ * @param xm1 Point at x-1 position.
+ * @param x0 Point at x position.
+ * @param x1 Point at x+1 position.
+ * @param x2 Point at x+2 position.
+ * @param x3 Point at x+3 position.
+ * @param x4 Point at x+4 position.
+ */
+
+inline void calcSpline2p8Coeffs( double* const c, const double xm3,
+ const double xm2, const double xm1, const double x0, const double x1,
+ const double x2, const double x3, const double x4 )
+{
+ c[ 0 ] = x0;
+ c[ 1 ] = ( 61.0 * ( x1 - xm1 ) + 16.0 * ( xm2 - x2 ) +
+ 3.0 * ( x3 - xm3 )) / 76.0;
+
+ c[ 2 ] = ( 106.0 * ( xm1 + x1 ) + 10.0 * x3 + 6.0 * xm3 - 3.0 * x4 -
+ 29.0 * ( xm2 + x2 ) - 167.0 * x0 ) / 76.0;
+}
+
+/**
+ * Function calculates coefficients used to calculate 3rd order segment
+ * interpolation polynomial on the equidistant lattice, using 4 points.
+ *
+ * @param[out] c Output coefficients buffer, length = 4.
+ * @param[in] y Equidistant point values. Value at offset 1 corresponds to
+ * x=0 point.
+ */
+
+inline void calcSpline3p4Coeffs( double* const c, const double* const y )
+{
+ c[ 0 ] = y[ 1 ];
+ c[ 1 ] = 0.5 * ( y[ 2 ] - y[ 0 ]);
+ c[ 2 ] = y[ 0 ] - 2.5 * y[ 1 ] + y[ 2 ] + y[ 2 ] - 0.5 * y[ 3 ];
+ c[ 3 ] = 0.5 * ( y[ 3 ] - y[ 0 ] ) + 1.5 * ( y[ 1 ] - y[ 2 ]);
+}
+
+/**
+ * Function calculates coefficients used to calculate 3rd order segment
+ * interpolation polynomial on the equidistant lattice, using 6 points.
+ *
+ * @param[out] c Output coefficients buffer, length = 4.
+ * @param[in] y Equidistant point values. Value at offset 2 corresponds to
+ * x=0 point.
+ */
+
+inline void calcSpline3p6Coeffs( double* const c, const double* const y )
+{
+ c[ 0 ] = y[ 2 ];
+ c[ 1 ] = ( 11.0 * ( y[ 3 ] - y[ 1 ]) + 2.0 * ( y[ 0 ] - y[ 4 ])) / 14.0;
+ c[ 2 ] = ( 20.0 * ( y[ 1 ] + y[ 3 ]) + 2.0 * y[ 5 ] - 4.0 * y[ 0 ] -
+ 7.0 * y[ 4 ] - 31.0 * y[ 2 ]) / 14.0;
+
+ c[ 3 ] = ( 17.0 * ( y[ 2 ] - y[ 3 ]) + 9.0 * ( y[ 4 ] - y[ 1 ]) +
+ 2.0 * ( y[ 0 ] - y[ 5 ])) / 14.0;
+}
+
+#if !defined( min )
+
+/**
+ * @param v1 Value 1.
+ * @param v2 Value 2.
+ * @tparam T Values' type.
+ * @return The minimum of 2 values.
+ */
+
+template< typename T >
+inline T min( const T& v1, const T& v2 )
+{
+ return( v1 < v2 ? v1 : v2 );
+}
+
+#endif // min
+
+#if !defined( max )
+
+/**
+ * @param v1 Value 1.
+ * @param v2 Value 2.
+ * @tparam T Values' type.
+ * @return The maximum of 2 values.
+ */
+
+template< typename T >
+inline T max( const T& v1, const T& v2 )
+{
+ return( v1 > v2 ? v1 : v2 );
+}
+
+#endif // max
+
+/**
+ * Function "clamps" (clips) the specified value so that it is not lesser than
+ * "minv", and not greater than "maxv".
+ *
+ * @param Value Value to clamp.
+ * @param minv Minimal allowed value.
+ * @param maxv Maximal allowed value.
+ * @return "Clamped" value.
+ */
+
+inline double clampr( const double Value, const double minv,
+ const double maxv )
+{
+ if( Value < minv )
+ {
+ return( minv );
+ }
+ else
+ if( Value > maxv )
+ {
+ return( maxv );
+ }
+ else
+ {
+ return( Value );
+ }
+}
+
+/**
+ * @param x Value to square.
+ * @return Squared value of the argument.
+ */
+
+inline double sqr( const double x )
+{
+ return( x * x );
+}
+
+/**
+ * @param v Input value.
+ * @param p Power factor.
+ * @return Returns pow() function's value with input value's sign check.
+ */
+
+inline double pows( const double v, const double p )
+{
+ return( v < 0.0 ? -pow( -v, p ) : pow( v, p ));
+}
+
+/**
+ * @param v Input value.
+ * @return Calculated single-argument Gaussian function of the input value.
+ */
+
+inline double gauss( const double v )
+{
+ return( exp( -( v * v )));
+}
+
+/**
+ * @param v Input value.
+ * @return Calculated inverse hyperbolic sine of the input value.
+ */
+
+inline double asinh( const double v )
+{
+ return( log( v + sqrt( v * v + 1.0 )));
+}
+
+/**
+ * @param x Input value.
+ * @return Calculated zero-th order modified Bessel function of the first kind
+ * of the input value. Approximate value.
+ */
+
+inline double besselI0( const double x )
+{
+ const double ax = fabs( x );
+ double y;
+
+ if( ax < 3.75 )
+ {
+ y = x / 3.75;
+ y *= y;
+
+ return( 1.0 + y * ( 3.5156229 + y * ( 3.0899424 + y * ( 1.2067492 +
+ y * ( 0.2659732 + y * ( 0.360768e-1 + y * 0.45813e-2 ))))));
+ }
+
+ y = 3.75 / ax;
+
+ return( exp( ax ) / sqrt( ax ) * ( 0.39894228 + y * ( 0.1328592e-1 +
+ y * ( 0.225319e-2 + y * ( -0.157565e-2 + y * ( 0.916281e-2 +
+ y * ( -0.2057706e-1 + y * ( 0.2635537e-1 + y * ( -0.1647633e-1 +
+ y * 0.392377e-2 )))))))));
+}
+
+} // namespace r8b
+
+#endif // R8BBASE_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
new file mode 100644
index 00000000..ef08b16b
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
@@ -0,0 +1,176 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file r8bconf.h
+ *
+ * @brief The "configuration" inclusion file you can modify.
+ *
+ * This is the "configuration" inclusion file for the "r8brain-free-src"
+ * sample rate converter. You may redefine the macros here as you see fit.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8BCONF_INCLUDED
+#define R8BCONF_INCLUDED
+
+#if !defined( R8B_IPP )
+ /**
+ * Set the R8B_IPP macro definition to 1 to enable the use of Intel IPP's
+ * fast Fourier transform functions. Also uncomment and correct the IPP
+ * header inclusion macros.
+ *
+ * Do not forget to call the ippInit() function at the start of the
+ * application, before using this library's functions.
+ */
+
+ #define R8B_IPP 0
+
+// #include <ippcore.h>
+// #include <ipps.h>
+#endif // !defined( R8B_IPP )
+
+#if !defined( R8BASSERT )
+ /**
+ * Assertion macro used to check for certain run-time conditions. By
+ * default no action is taken if assertion fails.
+ *
+ * @param e Expression to check.
+ */
+
+ #define R8BASSERT( e )
+#endif // !defined( R8BASSERT )
+
+#if !defined( R8BCONSOLE )
+ /**
+ * Console output macro, used to output various resampler status strings,
+ * including filter design parameters, convolver parameters.
+ *
+ * @param e Expression to send to the console, usually consists of a
+ * standard "printf" format string followed by several parameters
+ * (__VA_ARGS__).
+ */
+
+ #define R8BCONSOLE( ... )
+#endif // !defined( R8BCONSOLE )
+
+#if !defined( R8B_BASECLASS )
+ /**
+ * Macro defines the name of the class from which all classes that are
+ * designed to be created on heap are derived. The default
+ * r8b::CStdClassAllocator class uses "stdlib" memory allocation
+ * functions.
+ *
+ * The classes that are best placed on stack or as class members are not
+ * derived from any class.
+ */
+
+ #define R8B_BASECLASS :: r8b :: CStdClassAllocator
+#endif // !defined( R8B_BASECLASS )
+
+#if !defined( R8B_MEMALLOCCLASS )
+ /**
+ * Macro defines the name of the class that implements raw memory
+ * allocation functions, see the r8b::CStdMemAllocator class for details.
+ */
+
+ #define R8B_MEMALLOCCLASS :: r8b :: CStdMemAllocator
+#endif // !defined( R8B_MEMALLOCCLASS )
+
+#if !defined( R8B_FILTER_CACHE_MAX )
+ /**
+ * This macro specifies the number of filters kept in the cache at most.
+ * The actual number can be higher if many different filters are in use at
+ * the same time.
+ */
+
+ #define R8B_FILTER_CACHE_MAX 96
+#endif // !defined( R8B_FILTER_CACHE_MAX )
+
+#if !defined( R8B_FRACBANK_CACHE_MAX )
+ /**
+ * This macro specifies the number of whole-number stepping fractional
+ * delay filter banks kept in the cache at most. The actual number can be
+ * higher if many different filter banks are in use at the same time. As
+ * filter banks are usually big objects, it is advisable to keep this
+ * cache size small.
+ */
+
+ #define R8B_FRACBANK_CACHE_MAX 12
+#endif // !defined( R8B_FRACBANK_CACHE_MAX )
+
+#if !defined( R8B_FLTTEST )
+ /**
+ * This macro, when equal to 1, enables fractional delay filter bank
+ * testing: in this mode the filter bank becomes a dynamic member of the
+ * CDSPFracInterpolator object instead of being a global static object.
+ */
+
+ #define R8B_FLTTEST 0
+#endif // !defined( R8B_FLTTEST )
+
+#if !defined( R8B_FASTTIMING )
+ /**
+ * This macro, when equal to 1, enables a fast interpolation sample
+ * timing technique. This technique improves interpolation performance
+ * (by around 10%) at the expense of a minor sample timing drift which is
+ * on the order of 1e-6 samples per 10 billion output samples. This
+ * setting does not apply to whole-number stepping, if it is in use, as
+ * this stepping provides zero timing error without performance impact.
+ * Also does not apply to the cases when a whole-numbered (2X, 3X, etc.)
+ * resampling is in the actual use.
+ */
+
+ #define R8B_FASTTIMING 0
+#endif // !defined( R8B_FASTTIMING )
+
+#if !defined( R8B_EXTFFT )
+ /**
+ * This macro, when equal to 1, extends length of low-pass filters' FFT
+ * block by a factor of 2, by zero-padding it. This usually improves the
+ * overall time performance of the resampler at the expense of a higher
+ * overall latency (initial processing delay). If such delay is not an
+ * issue, setting this macro to 1 is preferrable. This macro can only have
+ * a value of 0 or 1.
+ */
+
+ #define R8B_EXTFFT 0
+#endif // !defined( R8B_EXTFFT )
+
+#if !defined( R8B_PFFFT )
+ /**
+ * When defined as 1, enables PFFFT routines which are fast, but which
+ * are limited to 24-bit precision. May be a good choice for time-series
+ * interpolation, when stop-band attenuation higher than 120 dB is not
+ * required.
+ */
+
+ #define R8B_PFFFT 0
+#endif // !defined( R8B_PFFFT )
+
+#if R8B_PFFFT
+ #define R8B_FLOATFFT 1
+#endif // R8B_PFFFT
+
+#if !defined( R8B_PFFFT_DOUBLE )
+ /**
+ * When defined as 1, enables PFFFT "double" routines which are fast, and
+ * which provide the highest precision.
+ */
+
+ #define R8B_PFFFT_DOUBLE 0
+#endif // !defined( R8B_PFFFT_DOUBLE )
+
+#if !defined( R8B_FLOATFFT )
+ /**
+ * The R8B_FLOATFFT definition enables double-to-float buffer conversions
+ * for FFT operations, for algorithms that work with "float" values. This
+ * macro should not be changed from the default "0" here.
+ */
+
+ #define R8B_FLOATFFT 0
+#endif // !defined( R8B_FLOATFFT )
+
+#endif // R8BCONF_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
new file mode 100644
index 00000000..2f967b4b
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
@@ -0,0 +1,306 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file r8butil.h
+ *
+ * @brief The inclusion file with several utility functions.
+ *
+ * This file includes several utility functions used by various utility
+ * programs like "calcErrorTable.cpp".
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8BUTIL_INCLUDED
+#define R8BUTIL_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @param re Real part of the frequency response.
+ * @param im Imaginary part of the frequency response.
+ * @return A magnitude response value converted from the linear scale to the
+ * logarithmic scale.
+ */
+
+inline double convertResponseToLog( const double re, const double im )
+{
+ return( 4.34294481903251828 * log( re * re + im * im + 1e-100 ));
+}
+
+/**
+ * An utility function that performs frequency response scanning step update
+ * based on the current magnitude response's slope.
+ *
+ * @param[in,out] step The current scanning step. Will be updated on
+ * function's return. Must be a positive value.
+ * @param curg Squared magnitude response at the current frequency point.
+ * @param[in,out] prevg_log Previous magnitude response, log scale. Will be
+ * updated on function's return.
+ * @param prec Precision multiplier, affects the size of the step.
+ * @param maxstep The maximal allowed step.
+ * @param minstep The minimal allowed step.
+ */
+
+inline void updateScanStep( double& step, const double curg,
+ double& prevg_log, const double prec, const double maxstep,
+ const double minstep = 1e-11 )
+{
+ double curg_log = 4.34294481903251828 * log( curg + 1e-100 );
+ curg_log += ( prevg_log - curg_log ) * 0.7;
+
+ const double slope = fabs( curg_log - prevg_log );
+ prevg_log = curg_log;
+
+ if( slope > 0.0 )
+ {
+ step /= prec * slope;
+ step = max( min( step, maxstep ), minstep );
+ }
+}
+
+/**
+ * Function locates normalized frequency at which the minimum filter gain
+ * is reached. The scanning is performed from lower (left) to higher
+ * (right) frequencies, the whole range is scanned.
+ *
+ * Function expects that the magnitude response is always reducing from lower
+ * to high frequencies, starting at "minth".
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param[out] ming The current minimal gain (squared). On function's return
+ * will contain the minimal gain value found (squared).
+ * @param[out] minth The normalized frequency where the minimal gain is
+ * currently at. On function's return will point to the normalized frequency
+ * where the new minimum was found.
+ * @param thend The ending frequency, inclusive.
+ */
+
+inline void findFIRFilterResponseMinLtoR( const double* const flt,
+ const int fltlen, double& ming, double& minth, const double thend )
+{
+ const double maxstep = minth * 2e-3;
+ double curth = minth;
+ double re;
+ double im;
+ calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+ double prevg_log = convertResponseToLog( re, im );
+ double step = 1e-11;
+
+ while( true )
+ {
+ curth += step;
+
+ if( curth > thend )
+ {
+ break;
+ }
+
+ calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+ const double curg = re * re + im * im;
+
+ if( curg > ming )
+ {
+ ming = curg;
+ minth = curth;
+ break;
+ }
+
+ ming = curg;
+ minth = curth;
+
+ updateScanStep( step, curg, prevg_log, 0.31, maxstep );
+ }
+}
+
+/**
+ * Function locates normalized frequency at which the maximal filter gain
+ * is reached. The scanning is performed from lower (left) to higher
+ * (right) frequencies, the whole range is scanned.
+ *
+ * Note: this function may "stall" in very rare cases if the magnitude
+ * response happens to be "saw-tooth" like, requiring a very small stepping to
+ * be used. If this happens, it may take dozens of seconds to complete.
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param[out] maxg The current maximal gain (squared). On function's return
+ * will contain the maximal gain value (squared).
+ * @param[out] maxth The normalized frequency where the maximal gain is
+ * currently at. On function's return will point to the normalized frequency
+ * where the maximum was reached.
+ * @param thend The ending frequency, inclusive.
+ */
+
+inline void findFIRFilterResponseMaxLtoR( const double* const flt,
+ const int fltlen, double& maxg, double& maxth, const double thend )
+{
+ const double maxstep = maxth * 1e-4;
+ double premaxth = maxth;
+ double premaxg = maxg;
+ double postmaxth = maxth;
+ double postmaxg = maxg;
+
+ double prevth = maxth;
+ double prevg = maxg;
+ double curth = maxth;
+ double re;
+ double im;
+ calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+ double prevg_log = convertResponseToLog( re, im );
+ double step = 1e-11;
+
+ bool WasPeak = false;
+ int AfterPeakCount = 0;
+
+ while( true )
+ {
+ curth += step;
+
+ if( curth > thend )
+ {
+ break;
+ }
+
+ calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+ const double curg = re * re + im * im;
+
+ if( curg > maxg )
+ {
+ premaxth = prevth;
+ premaxg = prevg;
+ maxg = curg;
+ maxth = curth;
+ WasPeak = true;
+ AfterPeakCount = 0;
+ }
+ else
+ if( WasPeak )
+ {
+ if( AfterPeakCount == 0 )
+ {
+ postmaxth = curth;
+ postmaxg = curg;
+ }
+
+ if( AfterPeakCount == 5 )
+ {
+ // Perform 2 approximate binary searches.
+
+ int k;
+
+ for( k = 0; k < 2; k++ )
+ {
+ double l = ( k == 0 ? premaxth : maxth );
+ double curgl = ( k == 0 ? premaxg : maxg );
+ double r = ( k == 0 ? maxth : postmaxth );
+ double curgr = ( k == 0 ? maxg : postmaxg );
+
+ while( true )
+ {
+ const double c = ( l + r ) * 0.5;
+ calcFIRFilterResponse( flt, fltlen, R8B_PI * c,
+ re, im );
+
+ const double curg = re * re + im * im;
+
+ if( curgl > curgr )
+ {
+ r = c;
+ curgr = curg;
+ }
+ else
+ {
+ l = c;
+ curgl = curg;
+ }
+
+ if( r - l < 1e-11 )
+ {
+ if( curgl > curgr )
+ {
+ maxth = l;
+ maxg = curgl;
+ }
+ else
+ {
+ maxth = r;
+ maxg = curgr;
+ }
+
+ break;
+ }
+ }
+ }
+
+ break;
+ }
+
+ AfterPeakCount++;
+ }
+
+ prevth = curth;
+ prevg = curg;
+
+ updateScanStep( step, curg, prevg_log, 1.0, maxstep );
+ }
+}
+
+/**
+ * Function locates normalized frequency at which the specified maximum
+ * filter gain is reached. The scanning is performed from higher (right)
+ * to lower (left) frequencies, scanning stops when the required gain
+ * value was crossed. Function uses an extremely efficient binary search and
+ * thus expects that the magnitude response has the "main lobe" form produced
+ * by windowing, with a minimal pass-band ripple.
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param maxg Maximal gain (squared).
+ * @param[out] th The current normalized frequency. On function's return will
+ * point to the normalized frequency where "maxg" is reached.
+ * @param thend The leftmost frequency to scan, inclusive.
+ */
+
+inline void findFIRFilterResponseLevelRtoL( const double* const flt,
+ const int fltlen, const double maxg, double& th, const double thend )
+{
+ // Perform exact binary search.
+
+ double l = thend;
+ double r = th;
+
+ while( true )
+ {
+ const double c = ( l + r ) * 0.5;
+
+ if( r - l < 1e-14 )
+ {
+ th = c;
+ break;
+ }
+
+ double re;
+ double im;
+ calcFIRFilterResponse( flt, fltlen, R8B_PI * c, re, im );
+ const double curg = re * re + im * im;
+
+ if( curg > maxg )
+ {
+ l = c;
+ }
+ else
+ {
+ r = c;
+ }
+ }
+}
+
+} // namespace r8b
+
+#endif // R8BUTIL_INCLUDED