Initial community commit

author: Jef <jef@targetspot.com> 2024-09-24 08:54:57 -0400
committer: Jef <jef@targetspot.com> 2024-09-24 08:54:57 -0400
commit: 20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree: 12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/external_dependencies/openmpt-trunk/include/r8brain
parent: 537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
download: winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz
18 files changed, 10313 insertions, 0 deletions
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
new file mode 100644
index 00000000..98297b65
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h
@@ -0,0 +1,642 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPBlockConvolver.h
+ *
+ * @brief Single-block overlap-save convolution processor class.
+ *
+ * This file includes single-block overlap-save convolution processor class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED
+#define R8B_CDSPBLOCKCONVOLVER_INCLUDED
+
+#include "CDSPFIRFilter.h"
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+/**
+ * @brief Single-block overlap-save convolution processing class.
+ *
+ * Class that implements single-block overlap-save convolution processing. The
+ * length of a single FFT block used depends on the length of the filter
+ * kernel.
+ *
+ * The rationale behind "single-block" processing is that increasing the FFT
+ * block length by 2 is more efficient than performing convolution at the same
+ * FFT block length but using two blocks.
+ *
+ * This class also implements a built-in resampling by any whole-number
+ * factor, which simplifies the overall resampling objects topology.
+ */
+
+class CDSPBlockConvolver : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initializes internal variables and constants of *this
+	 * object.
+	 *
+	 * @param aFilter Pre-calculated filter data. Reference to this object is
+	 * inhertied by *this object, and the object will be released when *this
+	 * object is destroyed. If upsampling is used, filter's gain should be
+	 * equal to the upsampling factor.
+	 * @param aUpFactor The upsampling factor, positive value. E.g. value of 2
+	 * means 2x upsampling should be performed over the input data.
+	 * @param aDownFactor The downsampling factor, positive value. E.g. value
+	 * of 2 means 2x downsampling should be performed over the output data.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. This value is usually
+	 * non-zero if the minimum-phase filters are in use. This value is always
+	 * zero if the linear-phase filters are in use.
+	 * @param aDoConsumeLatency "True" if the output latency should be
+	 * consumed. Does not apply to the fractional part of the latency (if such
+	 * part is available).
+	 */
+
+	CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor,
+		const int aDownFactor, const double PrevLatency = 0.0,
+		const bool aDoConsumeLatency = true )
+		: Filter( &aFilter )
+		, UpFactor( aUpFactor )
+		, DownFactor( aDownFactor )
+		, DoConsumeLatency( aDoConsumeLatency )
+		, BlockLen2( 2 << Filter -> getBlockLenBits() )
+	{
+		R8BASSERT( UpFactor > 0 );
+		R8BASSERT( DownFactor > 0 );
+		R8BASSERT( PrevLatency >= 0.0 );
+
+		int fftinBits;
+		UpShift = getBitOccupancy( UpFactor ) - 1;
+
+		if(( 1 << UpShift ) == UpFactor )
+		{
+			fftinBits = Filter -> getBlockLenBits() + 1 - UpShift;
+			PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) /
+				UpFactor;
+
+			InputLen = BlockLen2 - PrevInputLen * UpFactor;
+		}
+		else
+		{
+			UpShift = -1;
+			fftinBits = Filter -> getBlockLenBits() + 1;
+			PrevInputLen = Filter -> getKernelLen() - 1;
+			InputLen = BlockLen2 - PrevInputLen;
+		}
+
+		OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 );
+		LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor;
+		Latency = (int) LatencyFrac;
+		const int InLatency = Latency + Filter -> getLatency() - OutOffset;
+		LatencyFrac -= Latency;
+		LatencyFrac /= DownFactor;
+
+		Latency += InputLen + Filter -> getLatency();
+
+		int fftoutBits;
+		InputDelay = 0;
+		DownSkipInit = 0;
+		DownShift = getBitOccupancy( DownFactor ) - 1;
+
+		if(( 1 << DownShift ) == DownFactor )
+		{
+			fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift;
+
+			if( DownFactor > 1 )
+			{
+				if( UpShift > 0 )
+				{
+					// This case never happens in practice due to mutual
+					// exclusion of "power of 2" DownFactor and UpFactor
+					// values.
+
+					R8BASSERT( UpShift == 0 );
+				}
+				else
+				{
+					// Make sure InputLen is divisible by DownFactor.
+
+					const int ilc = InputLen & ( DownFactor - 1 );
+					PrevInputLen += ilc;
+					InputLen -= ilc;
+					Latency -= ilc;
+
+					// Correct InputDelay for input and filter's latency.
+
+					const int lc = InLatency & ( DownFactor - 1 );
+
+					if( lc > 0 )
+					{
+						InputDelay = DownFactor - lc;
+					}
+
+					if( !DoConsumeLatency )
+					{
+						Latency /= DownFactor;
+					}
+				}
+			}
+		}
+		else
+		{
+			fftoutBits = Filter -> getBlockLenBits() + 1;
+			DownShift = -1;
+
+			if( !DoConsumeLatency && DownFactor > 1 )
+			{
+				DownSkipInit = Latency % DownFactor;
+				Latency /= DownFactor;
+			}
+		}
+
+		R8BASSERT( Latency >= 0 );
+
+		fftin = new CDSPRealFFTKeeper( fftinBits );
+
+		if( fftoutBits == fftinBits )
+		{
+			fftout = fftin;
+		}
+		else
+		{
+			ffto2 = new CDSPRealFFTKeeper( fftoutBits );
+			fftout = ffto2;
+		}
+
+		WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen );
+		CurInput = &WorkBlocks[ 0 ];
+		CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and
+			// CurOutput are address-aligned.
+		PrevInput = &WorkBlocks[ BlockLen2 * 2 ];
+
+		clear();
+
+		R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i "
+			"fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen,
+			UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(),
+			getLatency() );
+	}
+
+	virtual ~CDSPBlockConvolver()
+	{
+		Filter -> unref();
+	}
+
+	virtual int getLatency() const
+	{
+		return( DoConsumeLatency ? 0 : Latency );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor );
+	}
+
+	virtual void clear()
+	{
+		memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ]));
+
+		if( DoConsumeLatency )
+		{
+			LatencyLeft = Latency;
+		}
+		else
+		{
+			LatencyLeft = 0;
+
+			if( DownShift > 0 )
+			{
+				memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) *
+					sizeof( CurOutput[ 0 ]));
+			}
+			else
+			{
+				memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset *
+					sizeof( CurOutput[ 0 ]));
+
+				memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) *
+					sizeof( CurOutput[ 0 ]));
+			}
+		}
+
+		memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ]));
+
+		InDataLeft = InputLen - InputDelay;
+		UpSkip = 0;
+		DownSkip = DownSkipInit;
+	}
+
+	virtual int process( double* ip, int l0, double*& op0 )
+	{
+		R8BASSERT( l0 >= 0 );
+		R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 );
+
+		double* op = op0;
+		int l = l0 * UpFactor;
+		l0 = 0;
+
+		while( l > 0 )
+		{
+			const int Offs = InputLen - InDataLeft;
+
+			if( l < InDataLeft )
+			{
+				InDataLeft -= l;
+
+				if( UpShift >= 0 )
+				{
+					memcpy( &CurInput[ Offs >> UpShift ], ip,
+						( l >> UpShift ) * sizeof( CurInput[ 0 ]));
+				}
+				else
+				{
+					copyUpsample( ip, &CurInput[ Offs ], l );
+				}
+
+				copyToOutput( Offs - OutOffset, op, l, l0 );
+				break;
+			}
+
+			const int b = InDataLeft;
+			l -= b;
+			InDataLeft = InputLen;
+			int ilu;
+
+			if( UpShift >= 0 )
+			{
+				const int bu = b >> UpShift;
+				memcpy( &CurInput[ Offs >> UpShift ], ip,
+					bu * sizeof( CurInput[ 0 ]));
+
+				ip += bu;
+				ilu = InputLen >> UpShift;
+			}
+			else
+			{
+				copyUpsample( ip, &CurInput[ Offs ], b );
+				ilu = InputLen;
+			}
+
+			const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]);
+			memcpy( &CurInput[ ilu ], PrevInput, pil );
+			memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil );
+
+			(*fftin) -> forward( CurInput );
+
+			if( UpShift > 0 )
+			{
+				#if R8B_FLOATFFT
+					mirrorInputSpectrum( (float*) CurInput );
+				#else // R8B_FLOATFFT
+					mirrorInputSpectrum( CurInput );
+				#endif // R8B_FLOATFFT
+			}
+
+			if( Filter -> isZeroPhase() )
+			{
+				(*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(),
+					CurInput );
+			}
+			else
+			{
+				(*fftout) -> multiplyBlocks( Filter -> getKernelBlock(),
+					CurInput );
+			}
+
+			if( DownShift > 0 )
+			{
+				const int z = BlockLen2 >> DownShift;
+
+				#if R8B_FLOATFFT
+					float* const kb = (float*) Filter -> getKernelBlock();
+					float* const p = (float*) CurInput;
+				#else // R8B_FLOATFFT
+					const double* const kb = Filter -> getKernelBlock();
+					double* const p = CurInput;
+				#endif // R8B_FLOATFFT
+
+				p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ];
+			}
+
+			(*fftout) -> inverse( CurInput );
+
+			copyToOutput( Offs - OutOffset, op, b, l0 );
+
+			double* const tmp = CurInput;
+			CurInput = CurOutput;
+			CurOutput = tmp;
+		}
+
+		return( l0 );
+	}
+
+private:
+	CDSPFIRFilter* Filter; ///< Filter in use.
+		///<
+	CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce
+		///< the input spectrum (can embed the "power of 2" upsampling).
+		///<
+	CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL).
+		///<
+	CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output
+		///< signal (can embed the "power of 2" downsampling), may point to
+		///< either "fftin" or "ffto2".
+		///<
+	int UpFactor; ///< Upsampling factor.
+		///<
+	int DownFactor; ///< Downsampling factor.
+		///<
+	bool DoConsumeLatency; ///< "True" if the output latency should be
+		///< consumed. Does not apply to the fractional part of the latency
+		///< (if such part is available).
+		///<
+	int BlockLen2; ///< Equals block length * 2.
+		///<
+	int OutOffset; ///< Output offset, depends on filter's introduced latency.
+		///<
+	int PrevInputLen; ///< The length of previous input data saved, used for
+		///< overlap.
+		///<
+	int InputLen; ///< The number of input samples that should be accumulated
+		///< before the input block is processed.
+		///<
+	int Latency; ///< Processing latency, in samples.
+		///<
+	double LatencyFrac; ///< Fractional latency, in samples, that is left in
+		///< the output signal.
+		///<
+	int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is
+		///< not a "power of 2" value. Equals 0 if UpFactor equals 1.
+		///<
+	int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if
+		///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals
+		///< 1.
+		///<
+	int InputDelay; ///< Additional input delay, in samples. Used to make the
+		///< output delay divisible by DownShift. Used only if UpShift <= 0
+		///< and DownShift > 0.
+		///<
+	CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and
+		///< output data blocks, overall capacity = BlockLen2 * 2 +
+		///< PrevInputLen. Used in the flip-flop manner.
+		///<
+	double* PrevInput; ///< Previous input data buffer, capacity = BlockLen.
+		///<
+	double* CurInput; ///< Input data buffer, capacity = BlockLen2.
+		///<
+	double* CurOutput; ///< Output data buffer, capacity = BlockLen2.
+		///<
+	int InDataLeft; ///< Samples left before processing input and output FFT
+		///< blocks. Initialized to InputLen on clear.
+		///<
+	int LatencyLeft; ///< Latency in samples left to skip.
+		///<
+	int UpSkip; ///< The current upsampling sample skip (value in the range
+		///< 0 to UpFactor - 1).
+		///<
+	int DownSkip; ///< The current downsampling sample skip (value in the
+		///< range 0 to DownFactor - 1). Not used if DownShift > 0.
+		///<
+	int DownSkipInit; ///< The initial DownSkip value after clear().
+		///<
+
+	/**
+	 * Function copies samples from the input buffer to the output buffer
+	 * while inserting zeros inbetween them to perform the whole-numbered
+	 * upsampling.
+	 *
+	 * @param[in,out] ip0 Input buffer. Will be advanced on function's return.
+	 * @param[out] op Output buffer.
+	 * @param l0 The number of samples to fill in the output buffer, including
+	 * both input samples and interpolation (zero) samples.
+	 */
+
+	void copyUpsample( double*& ip0, double* op, int l0 )
+	{
+		int b = min( UpSkip, l0 );
+
+		if( b != 0 )
+		{
+			UpSkip -= b;
+			l0 -= b;
+
+			*op = 0.0;
+			op++;
+
+			while( --b != 0 )
+			{
+				*op = 0.0;
+				op++;
+			}
+		}
+
+		double* ip = ip0;
+		const int upf = UpFactor;
+		int l = l0 / upf;
+		int lz = l0 - l * upf;
+
+		if( upf == 3 )
+		{
+			while( l != 0 )
+			{
+				op[ 0 ] = *ip;
+				op[ 1 ] = 0.0;
+				op[ 2 ] = 0.0;
+				ip++;
+				op += upf;
+				l--;
+			}
+		}
+		else
+		if( upf == 5 )
+		{
+			while( l != 0 )
+			{
+				op[ 0 ] = *ip;
+				op[ 1 ] = 0.0;
+				op[ 2 ] = 0.0;
+				op[ 3 ] = 0.0;
+				op[ 4 ] = 0.0;
+				ip++;
+				op += upf;
+				l--;
+			}
+		}
+		else
+		{
+			const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]);
+
+			while( l != 0 )
+			{
+				*op = *ip;
+				ip++;
+
+				memset( op + 1, 0, zc );
+				op += upf;
+				l--;
+			}
+		}
+
+		if( lz != 0 )
+		{
+			*op = *ip;
+			ip++;
+			op++;
+
+			UpSkip = upf - lz;
+
+			while( --lz != 0 )
+			{
+				*op = 0.0;
+				op++;
+			}
+		}
+
+		ip0 = ip;
+	}
+
+	/**
+	 * Function copies sample data from the CurOutput buffer to the specified
+	 * output buffer and advances its position. If necessary, this function
+	 * "consumes" latency and performs downsampling.
+	 *
+	 * @param Offs CurOutput buffer offset, can be negative.
+	 * @param[out] op0 Output buffer pointer, will be advanced.
+	 * @param b The number of output samples available, including those which
+	 * are discarded during whole-number downsampling.
+	 * @param l0 The overall output sample count, will be increased.
+	 */
+
+	void copyToOutput( int Offs, double*& op0, int b, int& l0 )
+	{
+		if( Offs < 0 )
+		{
+			if( Offs + b <= 0 )
+			{
+				Offs += BlockLen2;
+			}
+			else
+			{
+				copyToOutput( Offs + BlockLen2, op0, -Offs, l0 );
+				b += Offs;
+				Offs = 0;
+			}
+		}
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= b )
+			{
+				LatencyLeft -= b;
+				return;
+			}
+
+			Offs += LatencyLeft;
+			b -= LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		const int df = DownFactor;
+
+		if( DownShift > 0 )
+		{
+			int Skip = Offs & ( df - 1 );
+
+			if( Skip > 0 )
+			{
+				Skip = df - Skip;
+				b -= Skip;
+				Offs += Skip;
+			}
+
+			if( b > 0 )
+			{
+				b = ( b + df - 1 ) >> DownShift;
+				memcpy( op0, &CurOutput[ Offs >> DownShift ],
+					b * sizeof( op0[ 0 ]));
+
+				op0 += b;
+				l0 += b;
+			}
+		}
+		else
+		{
+			if( df > 1 )
+			{
+				const double* ip = &CurOutput[ Offs + DownSkip ];
+				int l = ( b + df - 1 - DownSkip ) / df;
+				DownSkip += l * df - b;
+
+				double* op = op0;
+				l0 += l;
+				op0 += l;
+
+				while( l > 0 )
+				{
+					*op = *ip;
+					ip += df;
+					op++;
+					l--;
+				}
+			}
+			else
+			{
+				memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ]));
+				op0 += b;
+				l0 += b;
+			}
+		}
+	}
+
+	/**
+	 * Function performs input spectrum mirroring which is used to perform a
+	 * fast "power of 2" upsampling. Such mirroring is equivalent to insertion
+	 * of zeros into the input signal.
+	 *
+	 * @param p Spectrum data block to mirror.
+	 * @tparam T Buffer's element type.
+	 */
+
+	template< typename T >
+	void mirrorInputSpectrum( T* const p )
+	{
+		const int bl1 = BlockLen2 >> UpShift;
+		const int bl2 = bl1 + bl1;
+		int i;
+
+		for( i = bl1 + 2; i < bl2; i += 2 )
+		{
+			p[ i ] = p[ bl2 - i ];
+			p[ i + 1 ] = -p[ bl2 - i + 1 ];
+		}
+
+		p[ bl1 ] = p[ 1 ];
+		p[ bl1 + 1 ] = 0.0;
+		p[ 1 ] = p[ 0 ];
+
+		for( i = 1; i < UpShift; i++ )
+		{
+			const int z = bl1 << i;
+			memcpy( &p[ z ], p, z * sizeof( p[ 0 ]));
+			p[ z + 1 ] = 0.0;
+		}
+	}
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
new file mode 100644
index 00000000..e2a23768
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h
@@ -0,0 +1,719 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPFIRFilter.h
+ *
+ * @brief FIR filter generator and filter cache classes.
+ *
+ * This file includes low-pass FIR filter generator and filter cache.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPFIRFILTER_INCLUDED
+#define R8B_CDSPFIRFILTER_INCLUDED
+
+#include "CDSPSincFilterGen.h"
+#include "CDSPRealFFT.h"
+
+namespace r8b {
+
+/**
+ * Enumeration of filter's phase responses.
+ */
+
+enum EDSPFilterPhaseResponse
+{
+	fprLinearPhase = 0, ///< Linear-phase response. Features a linear-phase
+		///< high-latency response, with the latency expressed as integer
+		///< value.
+		///<
+	fprMinPhase ///< Minimum-phase response. Features a minimal latency
+		///< response, but the response's phase is non-linear. The latency is
+		///< usually expressed as non-integer value, and usually is small, but
+		///< is never equal to zero. The minimum-phase filter is transformed
+		///< from the linear-phase filter. The transformation has precision
+		///< limits which may skew both the -3 dB point and attenuation of the
+		///< filter being transformed: as it was measured, the skew happens
+		///< purely at random, and in most cases it is within tolerable range.
+		///< In a small (1%) random subset of cases the skew is bigger and
+		///< cannot be predicted. Minimum-phase transform requires 64-bit
+		///< floating point FFT precision, results with 32-bit float FFT are
+		///< far from optimal.
+		///<
+};
+
+/**
+ * @brief Calculation and storage class for FIR filters.
+ *
+ * Class that implements calculation and storing of a FIR filter (currently
+ * contains low-pass filter calculation routine designed for sample rate
+ * conversion). Objects of this class cannot be created directly, but can be
+ * obtained via the CDSPFilterCache::getLPFilter() static function.
+ */
+
+class CDSPFIRFilter : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPFIRFilter );
+
+	friend class CDSPFIRFilterCache;
+
+public:
+	~CDSPFIRFilter()
+	{
+		R8BASSERT( RefCount == 0 );
+
+		delete Next;
+	}
+
+	/**
+	 * @return The minimal allowed low-pass filter's transition band, in
+	 * percent.
+	 */
+
+	static double getLPMinTransBand()
+	{
+		return( 0.5 );
+	}
+
+	/**
+	 * @return The maximal allowed low-pass filter's transition band, in
+	 * percent.
+	 */
+
+	static double getLPMaxTransBand()
+	{
+		return( 45.0 );
+	}
+
+	/**
+	 * @return The minimal allowed low-pass filter's stop-band attenuation, in
+	 * decibel.
+	 */
+
+	static double getLPMinAtten()
+	{
+		return( 49.0 );
+	}
+
+	/**
+	 * @return The maximal allowed low-pass filter's stop-band attenuation, in
+	 * decibel.
+	 */
+
+	static double getLPMaxAtten()
+	{
+		return( 218.0 );
+	}
+
+	/**
+	 * @return "True" if kernel block of *this filter has zero-phase response.
+	 */
+
+	bool isZeroPhase() const
+	{
+		return( IsZeroPhase );
+	}
+
+	/**
+	 * @return Filter's latency, in samples (integer part).
+	 */
+
+	int getLatency() const
+	{
+		return( Latency );
+	}
+
+	/**
+	 * @return Filter's latency, in samples (fractional part). Always zero for
+	 * linear-phase filters.
+	 */
+
+	double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	/**
+	 * @return Filter kernel length, in samples. Not to be confused with the
+	 * block length.
+	 */
+
+	int getKernelLen() const
+	{
+		return( KernelLen );
+	}
+
+	/**
+	 * @return Filter's block length, espressed as Nth power of 2. The actual
+	 * length is twice as large due to zero-padding.
+	 */
+
+	int getBlockLenBits() const
+	{
+		return( BlockLenBits );
+	}
+
+	/**
+	 * @return Filter's kernel block, in complex-numbered form obtained via
+	 * the CDSPRealFFT::forward() function call, zero-padded, gain-adjusted
+	 * with the CDSPRealFFT::getInvMulConst() * ReqGain constant, immediately
+	 * suitable for convolution. Kernel block may have "zero-phase" response,
+	 * depending on the isZeroPhase() function's result.
+	 */
+
+	const double* getKernelBlock() const
+	{
+		return( KernelBlock );
+	}
+
+	/**
+	 * This function should be called when the filter obtained via the
+	 * filter cache is no longer needed.
+	 */
+
+	void unref();
+
+private:
+	double ReqNormFreq; ///< Required normalized frequency, 0 to 1 inclusive.
+		///<
+	double ReqTransBand; ///< Required transition band in percent, as passed
+		///< by the user.
+		///<
+	double ReqAtten; ///< Required stop-band attenuation in decibel, as passed
+		///< by the user (positive value).
+		///<
+	EDSPFilterPhaseResponse ReqPhase; ///< Required filter's phase response.
+		///<
+	double ReqGain; ///< Required overall filter's gain.
+		///<
+	CDSPFIRFilter* Next; ///< Next FIR filter in cache's list.
+		///<
+	int RefCount; ///< The number of references made to *this FIR filter.
+		///<
+	bool IsZeroPhase; ///< "True" if kernel block of *this filter has
+		///< zero-phase response.
+		///<
+	int Latency; ///< Filter's latency in samples (integer part).
+		///<
+	double LatencyFrac; ///< Filter's latency in samples (fractional part).
+		///<
+	int KernelLen; ///< Filter kernel length, in samples.
+		///<
+	int BlockLenBits; ///< Block length used to store *this FIR filter,
+		///< expressed as Nth power of 2. This value is used directly by the
+		///< convolver.
+		///<
+	CFixedBuffer< double > KernelBlock; ///< FIR filter buffer, capacity
+		///< equals to 1 << ( BlockLenBits + 1 ). Second part of the buffer
+		///< contains zero-padding to allow alias-free convolution.
+		///< Address-aligned.
+		///<
+
+	CDSPFIRFilter()
+		: RefCount( 1 )
+	{
+	}
+
+	/**
+	 * Function builds filter kernel based on the "Req" parameters.
+	 *
+	 * @param ExtAttenCorrs External attentuation correction table, for
+	 * internal use.
+	 */
+
+	void buildLPFilter( const double* const ExtAttenCorrs )
+	{
+		const double tb = ReqTransBand * 0.01;
+		double pwr;
+		double fo1;
+		double hl;
+		double atten = -ReqAtten;
+
+		if( tb >= 0.25 )
+		{
+			if( ReqAtten >= 117.0 )
+			{
+				atten -= 1.60;
+			}
+			else
+			if( ReqAtten >= 60.0 )
+			{
+				atten -= 1.91;
+			}
+			else
+			{
+				atten -= 2.25;
+			}
+		}
+		else
+		if( tb >= 0.10 )
+		{
+			if( ReqAtten >= 117.0 )
+			{
+				atten -= 0.69;
+			}
+			else
+			if( ReqAtten >= 60.0 )
+			{
+				atten -= 0.73;
+			}
+			else
+			{
+				atten -= 1.13;
+			}
+		}
+		else
+		{
+			if( ReqAtten >= 117.0 )
+			{
+				atten -= 0.21;
+			}
+			else
+			if( ReqAtten >= 60.0 )
+			{
+				atten -= 0.25;
+			}
+			else
+			{
+				atten -= 0.36;
+			}
+		}
+
+		static const int AttenCorrCount = 264;
+		static const double AttenCorrMin = 49.0;
+		static const double AttenCorrDiff = 176.25;
+		int AttenCorr = (int) floor(( -atten - AttenCorrMin ) *
+			AttenCorrCount / AttenCorrDiff + 0.5 );
+
+		AttenCorr = min( AttenCorrCount, max( 0, AttenCorr ));
+
+		if( ExtAttenCorrs != NULL )
+		{
+			atten -= ExtAttenCorrs[ AttenCorr ];
+		}
+		else
+		if( tb >= 0.25 )
+		{
+			static const double AttenCorrScale = 101.0;
+			static const signed char AttenCorrs[] = {
+				-127, -127, -125, -125, -122, -119, -115, -110, -104, -97,
+				-91, -82, -75, -24, -16, -6, 4, 14, 24, 29, 30, 32, 37, 44,
+				51, 57, 63, 67, 65, 50, 53, 56, 58, 60, 63, 64, 66, 68, 74,
+				77, 78, 78, 78, 79, 79, 60, 60, 60, 61, 59, 52, 47, 41, 36,
+				30, 24, 17, 9, 0, -8, -10, -11, -14, -13, -18, -25, -31, -38,
+				-44, -50, -57, -63, -68, -74, -81, -89, -96, -101, -104, -107,
+				-109, -110, -86, -84, -85, -82, -80, -77, -73, -67, -62, -55,
+				-48, -42, -35, -30, -20, -11, -2, 5, 6, 6, 7, 11, 16, 21, 26,
+				34, 41, 46, 49, 52, 55, 56, 48, 49, 51, 51, 52, 52, 52, 52,
+				52, 51, 51, 50, 47, 47, 50, 48, 46, 42, 38, 35, 31, 27, 24,
+				20, 16, 12, 11, 12, 10, 8, 4, -1, -6, -11, -16, -19, -17, -21,
+				-24, -27, -32, -34, -37, -38, -40, -41, -40, -40, -42, -41,
+				-44, -45, -43, -41, -34, -31, -28, -24, -21, -18, -14, -10,
+				-5, -1, 2, 5, 8, 7, 4, 3, 2, 2, 4, 6, 8, 9, 9, 10, 10, 10, 10,
+				9, 8, 9, 11, 14, 13, 12, 11, 10, 8, 7, 6, 5, 3, 2, 2, -1, -1,
+				-3, -3, -4, -4, -5, -4, -6, -7, -9, -5, -1, -1, 0, 1, 0, -2,
+				-3, -4, -5, -5, -8, -13, -13, -13, -12, -13, -12, -11, -11,
+				-9, -8, -7, -5, -3, -1, 2, 4, 6, 9, 10, 11, 14, 18, 21, 24,
+				27, 30, 34, 37, 37, 39, 40 };
+
+			atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+		}
+		else
+		if( tb >= 0.10 )
+		{
+			static const double AttenCorrScale = 210.0;
+			static const signed char AttenCorrs[] = {
+				-113, -118, -122, -125, -126, -97, -95, -92, -92, -89, -82,
+				-75, -69, -48, -42, -36, -30, -22, -14, -5, -2, 1, 6, 13, 22,
+				28, 35, 41, 48, 55, 56, 56, 61, 65, 71, 77, 81, 83, 85, 85,
+				74, 74, 73, 72, 71, 70, 68, 64, 59, 56, 49, 52, 46, 42, 36,
+				32, 26, 20, 13, 7, -2, -6, -10, -15, -20, -27, -33, -38, -44,
+				-43, -48, -53, -57, -63, -69, -73, -75, -79, -81, -74, -76,
+				-77, -77, -78, -81, -80, -80, -78, -76, -65, -62, -59, -56,
+				-51, -48, -44, -38, -33, -25, -19, -13, -5, -1, 2, 7, 13, 17,
+				21, 25, 30, 35, 40, 45, 50, 53, 56, 57, 55, 58, 59, 62, 64,
+				67, 67, 68, 68, 62, 61, 61, 59, 59, 57, 57, 55, 52, 48, 42,
+				38, 35, 31, 26, 20, 15, 13, 10, 7, 3, -2, -8, -13, -17, -23,
+				-28, -34, -37, -40, -41, -45, -48, -50, -53, -57, -59, -62,
+				-63, -63, -57, -57, -56, -56, -54, -54, -53, -49, -48, -41,
+				-38, -33, -31, -26, -23, -18, -12, -9, -7, -7, -3, 0, 5, 9,
+				14, 16, 20, 22, 21, 23, 25, 27, 28, 29, 34, 33, 35, 33, 31,
+				30, 29, 29, 26, 26, 25, 24, 20, 19, 15, 10, 8, 4, 1, -2, -6,
+				-10, -16, -19, -23, -26, -27, -30, -34, -39, -43, -47, -51,
+				-52, -54, -56, -58, -59, -62, -63, -66, -65, -65, -64, -59,
+				-57, -54, -52, -48, -44, -42, -37, -32, -22, -17, -10, -3, 5,
+				13, 22, 30, 40, 50, 60, 72 };
+
+			atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+		}
+		else
+		{
+			static const double AttenCorrScale = 196.0;
+			static const signed char AttenCorrs[] = {
+				-15, -17, -20, -20, -20, -21, -20, -16, -17, -18, -17, -13,
+				-12, -11, -9, -7, -5, -4, -1, 1, 3, 4, 5, 6, 7, 9, 9, 10, 10,
+				10, 11, 11, 11, 12, 12, 12, 10, 11, 10, 10, 8, 10, 11, 10, 11,
+				11, 13, 14, 15, 19, 27, 26, 23, 18, 14, 8, 4, -2, -6, -12,
+				-17, -23, -28, -33, -37, -42, -46, -49, -53, -57, -60, -61,
+				-64, -65, -67, -66, -66, -66, -65, -64, -61, -59, -56, -52,
+				-48, -42, -38, -31, -27, -19, -13, -7, -1, 8, 14, 22, 29, 37,
+				45, 52, 59, 66, 73, 80, 86, 91, 96, 100, 104, 108, 111, 114,
+				115, 117, 118, 120, 120, 118, 117, 114, 113, 111, 107, 103,
+				99, 95, 89, 84, 78, 72, 66, 60, 52, 44, 37, 30, 21, 14, 6, -3,
+				-11, -18, -26, -34, -43, -51, -58, -65, -73, -78, -85, -90,
+				-97, -102, -107, -113, -115, -118, -121, -125, -125, -126,
+				-126, -126, -125, -124, -121, -119, -115, -111, -109, -101,
+				-102, -95, -88, -81, -73, -67, -63, -54, -47, -40, -33, -26,
+				-18, -11, -5, 2, 8, 14, 19, 25, 31, 36, 37, 43, 47, 49, 51,
+				52, 57, 57, 56, 57, 58, 58, 58, 57, 56, 52, 52, 50, 48, 44,
+				41, 39, 37, 33, 31, 26, 24, 21, 18, 14, 11, 8, 4, 2, -2, -5,
+				-7, -9, -11, -13, -15, -16, -18, -19, -20, -23, -24, -24, -25,
+				-27, -26, -27, -29, -30, -31, -32, -35, -36, -39, -40, -44,
+				-46, -51, -54, -59, -63, -69, -76, -83, -91, -98 };
+
+			atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale;
+		}
+
+		pwr = 7.43932822146293e-8 * sqr( atten ) + 0.000102747434588003 *
+			cos( 0.00785021930010397 * atten ) * cos( 0.633854318781239 +
+			0.103208573657699 * atten ) - 0.00798132247867036 -
+			0.000903555213543865 * atten - 0.0969365532127236 * exp(
+			0.0779275237937911 * atten ) - 1.37304948662012e-5 * atten * cos(
+			0.00785021930010397 * atten );
+
+		if( pwr <= 0.067665322581 )
+		{
+			if( tb >= 0.25 )
+			{
+				hl = 2.6778150875894 / tb + 300.547590563091 * atan( atan(
+					2.68959772209918 * pwr )) / ( 5.5099277187035 * tb - tb *
+					tanh( cos( asinh( atten ))));
+
+				fo1 = 0.987205355829873 * tb + 1.00011788929851 * atan2(
+					-0.321432067051302 - 6.19131357321578 * sqrt( pwr ),
+					hl + -1.14861472207245 / ( hl - 14.1821147585957 ) + pow(
+					0.9521145021664, pow( atan2( 1.12018764830637, tb ),
+					2.10988901686912 * hl - 20.9691278378345 )));
+			}
+			else
+			if( tb >= 0.10 )
+			{
+				hl = ( 1.56688617018066 + 142.064321294568 * pwr +
+					0.00419441117131136 * cos( 243.633511747297 * pwr ) -
+					0.022953443903576 * atten - 0.026629568860284 * cos(
+					127.715550622571 * pwr )) / tb;
+
+				fo1 = 0.982299356642411 * tb + 0.999441744774215 * asinh((
+					-0.361783054039583 - 5.80540593623676 * sqrt( pwr )) /
+					hl );
+			}
+			else
+			{
+				hl = ( 2.45739657014937 + 269.183679500541 * pwr * cos(
+					5.73225668178813 + atan2( cosh( 0.988861169868941 -
+					17.2201556280744 * pwr ), 1.08340138240431 * pwr ))) / tb;
+
+				fo1 = 2.291956939 * tb + 0.01942450693 * sqr( tb ) * hl -
+					4.67538973161837 * pwr * tb - 1.668433124 * tb *
+					pow( pwr, pwr );
+			}
+		}
+		else
+		{
+			if( tb >= 0.25 )
+			{
+				hl = ( 1.50258368698213 + 158.556968859477 * asinh( pwr ) *
+					tanh( 57.9466246871383 * tanh( pwr )) -
+					0.0105440479814834 * atten ) / tb;
+
+				fo1 = 0.994024401639321 * tb + ( -0.236282717577215 -
+					6.8724924545387 * sqrt( sin( pwr ))) / hl;
+			}
+			else
+			if( tb >= 0.10 )
+			{
+				hl = ( 1.50277377248945 + 158.222625721046 * asinh( pwr ) *
+					tanh( 1.02875299001715 + 42.072277322604 * pwr ) -
+					0.0108380943845632 * atten ) / tb;
+
+				fo1 = 0.992539376734551 * tb + ( -0.251747813037178 -
+					6.74159892452584 * sqrt( tanh( tanh( tan( pwr ))))) / hl;
+			}
+			else
+			{
+				hl = ( 1.15990238966306 * pwr - 5.02124037125213 * sqr(
+					pwr ) - 0.158676856669827 * atten * cos( 1.1609073390614 *
+					pwr - 6.33932586197475 * pwr * sqr( pwr ))) / tb;
+
+				fo1 = 0.867344453126885 * tb + 0.052693817907757 * tb * log(
+					pwr ) + 0.0895511178735932 * tb * atan( 59.7538527741309 *
+					pwr ) - 0.0745653568081453 * pwr * tb;
+			}
+		}
+
+		double WinParams[ 2 ];
+		WinParams[ 0 ] = 125.0;
+		WinParams[ 1 ] = pwr;
+
+		CDSPSincFilterGen sinc;
+		sinc.Len2 = 0.25 * hl / ReqNormFreq;
+		sinc.Freq1 = 0.0;
+		sinc.Freq2 = R8B_PI * ( 1.0 - fo1 ) * ReqNormFreq;
+		sinc.initBand( CDSPSincFilterGen :: wftKaiser, WinParams, true );
+
+		KernelLen = sinc.KernelLen;
+		BlockLenBits = getBitOccupancy( KernelLen - 1 ) + R8B_EXTFFT;
+		const int BlockLen = 1 << BlockLenBits;
+
+		KernelBlock.alloc( BlockLen * 2 );
+		sinc.generateBand( &KernelBlock[ 0 ],
+			&CDSPSincFilterGen :: calcWindowKaiser );
+
+		if( ReqPhase == fprLinearPhase )
+		{
+			IsZeroPhase = true;
+			Latency = sinc.fl2;
+			LatencyFrac = 0.0;
+		}
+		else
+		{
+			IsZeroPhase = false;
+			double DCGroupDelay;
+
+			calcMinPhaseTransform( &KernelBlock[ 0 ], KernelLen, 16, false,
+				&DCGroupDelay );
+
+			Latency = (int) DCGroupDelay;
+			LatencyFrac = DCGroupDelay - Latency;
+		}
+
+		CDSPRealFFTKeeper ffto( BlockLenBits + 1 );
+
+		if( IsZeroPhase )
+		{
+			// Calculate DC gain.
+
+			double s = 0.0;
+			int i;
+
+			for( i = 0; i < KernelLen; i++ )
+			{
+				s += KernelBlock[ i ];
+			}
+
+			s = ffto -> getInvMulConst() * ReqGain / s;
+
+			// Time-shift the filter so that zero-phase response is produced.
+			// Simultaneously multiply by "s".
+
+			for( i = 0; i <= sinc.fl2; i++ )
+			{
+				KernelBlock[ i ] = KernelBlock[ sinc.fl2 + i ] * s;
+			}
+
+			for( i = 1; i <= sinc.fl2; i++ )
+			{
+				KernelBlock[ BlockLen * 2 - i ] = KernelBlock[ i ];
+			}
+
+			memset( &KernelBlock[ sinc.fl2 + 1 ], 0,
+				( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
+
+			ffto -> forward( KernelBlock );
+			ffto -> convertToZP( KernelBlock );
+		}
+		else
+		{
+			normalizeFIRFilter( &KernelBlock[ 0 ], KernelLen,
+				ffto -> getInvMulConst() * ReqGain );
+
+			memset( &KernelBlock[ KernelLen ], 0,
+				( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ]));
+
+			ffto -> forward( KernelBlock );
+		}
+
+		R8BCONSOLE( "CDSPFIRFilter: flt_len=%i latency=%i nfreq=%.4f "
+			"tb=%.1f att=%.1f gain=%.3f\n", KernelLen, Latency,
+			ReqNormFreq, ReqTransBand, ReqAtten, ReqGain );
+	}
+};
+
+/**
+ * @brief FIR filter cache class.
+ *
+ * Class that implements cache for calculated FIR filters. The required FIR
+ * filter should be obtained via the getLPFilter() static function.
+ */
+
+class CDSPFIRFilterCache : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPFIRFilterCache );
+
+	friend class CDSPFIRFilter;
+
+public:
+	/**
+	 * @return The number of filters present in the cache now. This value can
+	 * be monitored for debugging "forgotten" filters.
+	 */
+
+	static int getObjCount()
+	{
+		R8BSYNC( StateSync );
+
+		return( ObjCount );
+	}
+
+	/**
+	 * Function calculates or returns reference to a previously calculated
+	 * (cached) low-pass FIR filter. Note that the real transition band and
+	 * attenuation achieved by the filter varies with the magnitude of the
+	 * required attenuation, and are never 100% exact.
+	 *
+	 * @param ReqNormFreq Required normalized frequency, in the range 0 to 1,
+	 * inclusive. This is the point after which the stop-band spans.
+	 * @param ReqTransBand Required transition band, in percent of the
+	 * 0 to ReqNormFreq spectral bandwidth, in the range
+	 * CDSPFIRFilter::getLPMinTransBand() to
+	 * CDSPFIRFilter::getLPMaxTransBand(), inclusive. The transition band
+	 * specifies the part of the spectrum between the -3 dB and ReqNormFreq
+	 * points. The real resulting -3 dB point varies in the range from -3.00
+	 * to -3.05 dB, but is generally very close to -3 dB.
+	 * @param ReqAtten Required stop-band attenuation in decibel, in the range
+	 * CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
+	 * inclusive. Note that the actual stop-band attenuation of the resulting
+	 * filter may be 0.40-4.46 dB higher.
+	 * @param ReqPhase Required filter's phase response.
+	 * @param ReqGain Required overall filter's gain (1.0 for unity gain).
+	 * @param AttenCorrs Attentuation correction table, to pass to the filter
+	 * generation function. For internal use.
+	 * @return A reference to a new or a previously calculated low-pass FIR
+	 * filter object with the required characteristics. A reference count is
+	 * incremented in the returned filter object which should be released
+	 * after use via the CDSPFIRFilter::unref() function.
+	 */
+
+	static CDSPFIRFilter& getLPFilter( const double ReqNormFreq,
+		const double ReqTransBand, const double ReqAtten,
+		const EDSPFilterPhaseResponse ReqPhase, const double ReqGain,
+		const double* const AttenCorrs = NULL )
+	{
+		R8BASSERT( ReqNormFreq > 0.0 && ReqNormFreq <= 1.0 );
+		R8BASSERT( ReqTransBand >= CDSPFIRFilter :: getLPMinTransBand() );
+		R8BASSERT( ReqTransBand <= CDSPFIRFilter :: getLPMaxTransBand() );
+		R8BASSERT( ReqAtten >= CDSPFIRFilter :: getLPMinAtten() );
+		R8BASSERT( ReqAtten <= CDSPFIRFilter :: getLPMaxAtten() );
+		R8BASSERT( ReqGain > 0.0 );
+
+		R8BSYNC( StateSync );
+
+		CDSPFIRFilter* PrevObj = NULL;
+		CDSPFIRFilter* CurObj = Objects;
+
+		while( CurObj != NULL )
+		{
+			if( CurObj -> ReqNormFreq == ReqNormFreq &&
+				CurObj -> ReqTransBand == ReqTransBand &&
+				CurObj -> ReqAtten == ReqAtten &&
+				CurObj -> ReqPhase == ReqPhase &&
+				CurObj -> ReqGain == ReqGain )
+			{
+				break;
+			}
+
+			if( CurObj -> Next == NULL && ObjCount >= R8B_FILTER_CACHE_MAX )
+			{
+				if( CurObj -> RefCount == 0 )
+				{
+					// Delete the last filter which is not used.
+
+					PrevObj -> Next = NULL;
+					delete CurObj;
+					ObjCount--;
+				}
+				else
+				{
+					// Move the last filter to the top of the list since it
+					// seems to be in use for a long time.
+
+					PrevObj -> Next = NULL;
+					CurObj -> Next = Objects.unkeep();
+					Objects = CurObj;
+				}
+
+				CurObj = NULL;
+				break;
+			}
+
+			PrevObj = CurObj;
+			CurObj = CurObj -> Next;
+		}
+
+		if( CurObj != NULL )
+		{
+			CurObj -> RefCount++;
+
+			if( PrevObj == NULL )
+			{
+				return( *CurObj );
+			}
+
+			// Remove the filter from the list temporarily.
+
+			PrevObj -> Next = CurObj -> Next;
+		}
+		else
+		{
+			// Create a new filter object (with RefCount == 1) and build the
+			// filter kernel.
+
+			CurObj = new CDSPFIRFilter();
+			CurObj -> ReqNormFreq = ReqNormFreq;
+			CurObj -> ReqTransBand = ReqTransBand;
+			CurObj -> ReqAtten = ReqAtten;
+			CurObj -> ReqPhase = ReqPhase;
+			CurObj -> ReqGain = ReqGain;
+			ObjCount++;
+
+			CurObj -> buildLPFilter( AttenCorrs );
+		}
+
+		// Insert the filter at the start of the list.
+
+		CurObj -> Next = Objects.unkeep();
+		Objects = CurObj;
+
+		return( *CurObj );
+	}
+
+private:
+	static CSyncObject StateSync; ///< Cache state synchronizer.
+		///<
+	static CPtrKeeper< CDSPFIRFilter* > Objects; ///< The chain of cached
+		///< objects.
+		///<
+	static int ObjCount; ///< The number of objects currently preset in the
+		///< cache.
+		///<
+};
+
+// ---------------------------------------------------------------------------
+// CDSPFIRFilter PUBLIC
+// ---------------------------------------------------------------------------
+
+inline void CDSPFIRFilter :: unref()
+{
+	R8BSYNC( CDSPFIRFilterCache :: StateSync );
+
+	RefCount--;
+}
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPFIRFILTER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
new file mode 100644
index 00000000..9fb770c1
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h
@@ -0,0 +1,1180 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPFracInterpolator.h
+ *
+ * @brief Fractional delay interpolator and filter bank classes.
+ *
+ * This file includes fractional delay interpolator class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPFRACINTERPOLATOR_INCLUDED
+#define R8B_CDSPFRACINTERPOLATOR_INCLUDED
+
+#include "CDSPSincFilterGen.h"
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+#if R8B_FLTTEST
+	extern int InterpFilterFracs; ///< Force this number of fractional filter
+		///< positions. -1 - use default.
+		///<
+#endif // R8B_FLTTEST
+
+/**
+ * @brief Sinc function-based fractional delay filter bank class.
+ *
+ * Class implements storage and initialization of a bank of sinc-based
+ * fractional delay filters, expressed as 0th, 1st, 2nd or 3rd order
+ * polynomial interpolation coefficients. The filters are windowed by the
+ * "Kaiser" power-raised window function.
+ */
+
+class CDSPFracDelayFilterBank : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPFracDelayFilterBank );
+
+	friend class CDSPFracDelayFilterBankCache;
+
+public:
+	/**
+	 * Constructor.
+	 *
+	 * @param aFilterFracs The number of fractional delay positions to sample,
+	 * -1 - use default.
+	 * @param aElementSize The size of each filter's tap, in "double" values.
+	 * This parameter corresponds to the complexity of interpolation. 4 should
+	 * be set for 3rd order, 3 for 2nd order, 2 for linear interpolation, 1
+	 * for whole-numbered stepping.
+	 * @param aInterpPoints The number of points the interpolation is based
+	 * on. This value should not be confused with the ElementSize. Set to 2
+	 * for linear or no interpolation.
+	 * @param aReqAtten Required filter attentuation.
+	 * @param aIsThird "True" if one-third filter is required.
+	 */
+
+	CDSPFracDelayFilterBank( const int aFilterFracs, const int aElementSize,
+		const int aInterpPoints, const double aReqAtten, const bool aIsThird )
+		: InitFilterFracs( aFilterFracs )
+		, ElementSize( aElementSize )
+		, InterpPoints( aInterpPoints )
+		, ReqAtten( aReqAtten )
+		, IsThird( aIsThird )
+		, Next( NULL )
+		, RefCount( 1 )
+	{
+		R8BASSERT( ElementSize >= 1 && ElementSize <= 4 );
+
+		// Kaiser window function Params, for half and third-band.
+
+		const double* const Params = getWinParams( ReqAtten, IsThird,
+			FilterLen );
+
+		FilterSize = FilterLen * ElementSize;
+
+		if( InitFilterFracs == -1 )
+		{
+			FilterFracs = (int) ceil( pow( 6.4, ReqAtten / 50.0 ));
+
+			#if R8B_FLTTEST
+
+			if( InterpFilterFracs != -1 )
+			{
+				FilterFracs = InterpFilterFracs;
+			}
+
+			#endif // R8B_FLTTEST
+		}
+		else
+		{
+			FilterFracs = InitFilterFracs;
+		}
+
+		Table.alloc( FilterSize * ( FilterFracs + InterpPoints ));
+
+		CDSPSincFilterGen sinc;
+		sinc.Len2 = FilterLen / 2;
+
+		double* p = Table;
+		const int pc2 = InterpPoints / 2;
+		int i;
+
+		for( i = -pc2 + 1; i <= FilterFracs + pc2; i++ )
+		{
+			sinc.FracDelay = (double) ( FilterFracs - i ) / FilterFracs;
+			sinc.initFrac( CDSPSincFilterGen :: wftKaiser, Params, true );
+			sinc.generateFrac( p, &CDSPSincFilterGen :: calcWindowKaiser,
+				ElementSize );
+
+			normalizeFIRFilter( p, FilterLen, 1.0, ElementSize );
+			p += FilterSize;
+		}
+
+		const int TablePos2 = FilterSize;
+		const int TablePos3 = FilterSize * 2;
+		const int TablePos4 = FilterSize * 3;
+		const int TablePos5 = FilterSize * 4;
+		const int TablePos6 = FilterSize * 5;
+		const int TablePos7 = FilterSize * 6;
+		const int TablePos8 = FilterSize * 7;
+		double* const TableEnd = Table + ( FilterFracs + 1 ) * FilterSize;
+		p = Table;
+
+		if( InterpPoints == 8 )
+		{
+			if( ElementSize == 3 )
+			{
+				// Calculate 2nd order spline (polynomial) interpolation
+				// coefficients using 8 points.
+
+				while( p < TableEnd )
+				{
+					calcSpline2p8Coeffs( p, p[ 0 ], p[ TablePos2 ],
+						p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ],
+						p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]);
+
+					p += ElementSize;
+				}
+
+				#if defined( R8B_SIMD_ISH )
+					shuffle2_3( Table, TableEnd );
+				#endif // SIMD
+			}
+			else
+			if( ElementSize == 4 )
+			{
+				// Calculate 3rd order spline (polynomial) interpolation
+				// coefficients using 8 points.
+
+				while( p < TableEnd )
+				{
+					calcSpline3p8Coeffs( p, p[ 0 ], p[ TablePos2 ],
+						p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ],
+						p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]);
+
+					p += ElementSize;
+				}
+
+				#if defined( R8B_SIMD_ISH )
+					shuffle2_4( Table, TableEnd );
+				#endif // SIMD
+			}
+		}
+		else
+		{
+			if( ElementSize == 2 )
+			{
+				// Calculate linear interpolation coefficients.
+
+				while( p < TableEnd )
+				{
+					p[ 1 ] = p[ TablePos2 ] - p[ 0 ];
+					p += ElementSize;
+				}
+
+				#if defined( R8B_SIMD_ISH )
+					shuffle2_2( Table, TableEnd );
+				#endif // SIMD
+			}
+		}
+
+		R8BCONSOLE( "CDSPFracDelayFilterBank: fracs=%i order=%i taps=%i "
+			"att=%.1f third=%i\n", FilterFracs, ElementSize - 1, FilterLen,
+			ReqAtten, (int) IsThird );
+	}
+
+	~CDSPFracDelayFilterBank()
+	{
+		delete Next;
+	}
+
+	/**
+	 * Function "rounds" the specified attenuation to the nearest effective
+	 * value.
+	 *
+	 * @param[in,out] att Required filter attentuation. Will be rounded to the
+	 * nearest value.
+	 * @param aIsThird "True" if one-third filter is required.
+	 */
+
+	static void roundReqAtten( double& att, const bool aIsThird )
+	{
+		int tmp;
+		getWinParams( att, aIsThird, tmp );
+	}
+
+	/**
+	 * Function returns the length of the filter. Always an even number, not
+	 * less than 6.
+	 */
+
+	int getFilterLen() const
+	{
+		return( FilterLen );
+	}
+
+	/**
+	 * Function returns the number of fractional positions sampled by the
+	 * bank.
+	 */
+
+	int getFilterFracs() const
+	{
+		return( FilterFracs );
+	}
+
+	/**
+	 * @param i Filter index, in the range 0 to FilterFracs, inclusive.
+	 * @return Reference to the filter.
+	 */
+
+	const double& operator []( const int i ) const
+	{
+		R8BASSERT( i >= 0 && i <= FilterFracs );
+
+		return( Table[ i * FilterSize ]);
+	}
+
+	/**
+	 * This function should be called when the filter bank obtained via the
+	 * filter bank cache is no longer needed.
+	 */
+
+	void unref();
+
+private:
+	int FilterLen; ///< Filter length. Always an even number, not less than 6.
+		///<
+	int FilterFracs; ///< Fractional position count.
+		///<
+	int InitFilterFracs; ///< Fractional position count as supplied to the
+		///< constructor, may equal -1.
+		///<
+	int ElementSize; ///< Filter element size.
+		///<
+	int InterpPoints; ///< Interpolation points to use.
+		///<
+	double ReqAtten; ///< Filter's attentuation.
+		///<
+	bool IsThird; ///< "True" if one-third filter is in use.
+		///<
+	int FilterSize; ///< This constant specifies the "size" of a single filter
+		///< in "double" elements.
+		///<
+	CFixedBuffer< double > Table; ///< The table of fractional delay filters
+		///< for all discrete fractional x = 0..1 sample positions, and
+		///< interpolation coefficients.
+		///<
+	CDSPFracDelayFilterBank* Next; ///< Next filter bank in cache's list.
+		///<
+	int RefCount; ///< The number of references made to *this filter bank.
+		///< Not considered for "static" filter bank objects.
+		///<
+
+	/**
+	 * Function returns windowing function parameters for the specified
+	 * attenuation and filter type.
+	 *
+	 * @param[in,out] att Required filter attentuation. Will be rounded to the
+	 * nearest value.
+	 * @param aIsThird "True" if one-third filter is required.
+	 * @param[out] fltlen Resulting filter length.
+	 */
+
+	static const double* getWinParams( double& att, const bool aIsThird,
+		int& fltlen )
+	{
+		static const int Coeffs2Base = 8;
+		static const int Coeffs2Count = 12;
+		static const double Coeffs2[ Coeffs2Count ][ 3 ] = {
+			{ 4.1308468534586913, 1.1752580009977263, 55.5446 }, // 0.0256
+			{ 4.4241520324148826, 1.8004881791443044, 81.4191 }, // 0.0886
+			{ 5.2615232289173663, 1.8133318236025469, 96.3392 }, // 0.0481
+			{ 5.9433751227216174, 1.8730186391986436, 111.1315 }, // 0.0264
+			{ 6.8308658290513815, 1.8549555110340281, 125.4653 }, // 0.0146
+			{ 7.6648458290312904, 1.8565766090828464, 139.7379 }, // 0.0081
+			{ 8.2038728664307605, 1.9269521820570166, 154.0532 }, // 0.0045
+			{ 8.7865150946655142, 1.9775307667441668, 168.2101 }, // 0.0025
+			{ 9.5945017884101773, 1.9718456992078597, 182.1076 }, // 0.0014
+			{ 10.5163141145985240, 1.9504067820201083, 195.5668 }, // 0.0008
+			{ 10.2382465206362470, 2.1608923446870087, 209.0610 }, // 0.0004
+			{ 10.9976060250714000, 2.1536533525688935, 222.5010 }, // 0.0003
+		};
+
+		static const int Coeffs3Base = 6;
+		static const int Coeffs3Count = 10;
+		static const double Coeffs3[ Coeffs3Count ][ 3 ] = {
+			{ 3.9888564562781847, 1.5869927184268915, 66.5701 }, // 0.0467
+			{ 4.6986694038145007, 1.8086068597928262, 86.4715 }, // 0.0136
+			{ 5.5995071329337822, 1.8930163360942349, 106.1195 }, // 0.0040
+			{ 6.3627287800257228, 1.9945748322093975, 125.2307 }, // 0.0012
+			{ 7.4299550711428308, 1.9893400572347544, 144.3469 }, // 0.0004
+			{ 8.0667715944075642, 2.0928201458699909, 163.4099 }, // 0.0001
+			{ 8.7469970226288822, 2.1640279784268355, 181.0694 }, // 0.0000
+			{ 10.0823430069835230, 2.0896678025321922, 199.2880 }, // 0.0000
+			{ 10.9222206090489510, 2.1221681162186004, 216.6865 }, // 0.0000
+			{ 21.2017743894772010, 1.1856768080118900, 233.9188 }, // 0.0000
+		};
+
+		const double* Params;
+		int i = 0;
+
+		if( aIsThird )
+		{
+			while( i != Coeffs3Count - 1 && Coeffs3[ i ][ 2 ] < att )
+			{
+				i++;
+			}
+
+			Params = &Coeffs3[ i ][ 0 ];
+			att = Coeffs3[ i ][ 2 ];
+			fltlen = Coeffs3Base + i * 2;
+		}
+		else
+		{
+			while( i != Coeffs2Count - 1 && Coeffs2[ i ][ 2 ] < att )
+			{
+				i++;
+			}
+
+			Params = &Coeffs2[ i ][ 0 ];
+			att = Coeffs2[ i ][ 2 ];
+			fltlen = Coeffs2Base + i * 2;
+		}
+
+		return( Params );
+	}
+
+	/**
+	 * Function shuffles 2 order-2 filter points for SIMD operation.
+	 *
+	 * @param p Filter table start pointer.
+	 * @param pe Filter table end pointer.
+	 */
+
+	static void shuffle2_2( double* p, double* const pe )
+	{
+		while( p != pe )
+		{
+			const double t = p[ 2 ];
+			p[ 2 ] = p[ 1 ];
+			p[ 1 ] = t;
+
+			p += 4;
+		}
+	}
+
+	/**
+	 * Function shuffles 2 order-3 filter points for SIMD operation.
+	 *
+	 * @param p Filter table start pointer.
+	 * @param pe Filter table end pointer.
+	 */
+
+	static void shuffle2_3( double* p, double* const pe )
+	{
+		while( p != pe )
+		{
+			const double t1 = p[ 1 ];
+			const double t2 = p[ 2 ];
+			const double t3 = p[ 3 ];
+			const double t4 = p[ 4 ];
+			p[ 1 ] = t3;
+			p[ 2 ] = t1;
+			p[ 3 ] = t4;
+			p[ 4 ] = t2;
+
+			p += 6;
+		}
+	}
+
+	/**
+	 * Function shuffles 2 order-4 filter points for SIMD operation.
+	 *
+	 * @param p Filter table start pointer.
+	 * @param pe Filter table end pointer.
+	 */
+
+	static void shuffle2_4( double* p, double* const pe )
+	{
+		while( p != pe )
+		{
+			const double t1 = p[ 1 ];
+			const double t2 = p[ 2 ];
+			const double t3 = p[ 3 ];
+			const double t4 = p[ 4 ];
+			const double t5 = p[ 5 ];
+			const double t6 = p[ 6 ];
+			p[ 1 ] = t4;
+			p[ 2 ] = t1;
+			p[ 3 ] = t5;
+			p[ 4 ] = t2;
+			p[ 5 ] = t6;
+			p[ 6 ] = t3;
+
+			p += 8;
+		}
+	}
+};
+
+/**
+ * @brief Fractional delay filter cache class.
+ *
+ * Class implements cache storage of fractional delay filter banks.
+ */
+
+class CDSPFracDelayFilterBankCache : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPFracDelayFilterBankCache );
+
+	friend class CDSPFracDelayFilterBank;
+
+public:
+	/**
+	 * @return The number of filters present in the cache now. This value can
+	 * be monitored for debugging "forgotten" filters.
+	 */
+
+	static int getObjCount()
+	{
+		R8BSYNC( StateSync );
+
+		return( ObjCount );
+	}
+
+	/**
+	 * Function calculates or returns reference to a previously calculated
+	 * (cached) fractional delay filter bank.
+	 *
+	 * @param aFilterFracs The number of fractional delay positions to sample,
+	 * -1 - use default.
+	 * @param aElementSize The size of each filter's tap, in "double" values.
+	 * @param aInterpPoints The number of points the interpolation is based
+	 * on.
+	 * @param ReqAtten Required filter attentuation.
+	 * @param IsThird "True" if one-third filter is required.
+	 * @param IsStatic "True" if a permanent static filter should be returned
+	 * that is never removed from the cache until application terminates.
+	 */
+
+	static CDSPFracDelayFilterBank& getFilterBank( const int aFilterFracs,
+		const int aElementSize, const int aInterpPoints,
+		double ReqAtten, const bool IsThird, const bool IsStatic )
+	{
+		CDSPFracDelayFilterBank :: roundReqAtten( ReqAtten, IsThird );
+
+		R8BSYNC( StateSync );
+
+		if( IsStatic )
+		{
+			CDSPFracDelayFilterBank* CurObj = StaticObjects;
+
+			while( CurObj != NULL )
+			{
+				if( CurObj -> InitFilterFracs == aFilterFracs &&
+					CurObj -> ElementSize == aElementSize &&
+					CurObj -> InterpPoints == aInterpPoints &&
+					CurObj -> ReqAtten == ReqAtten &&
+					CurObj -> IsThird == IsThird )
+				{
+					return( *CurObj );
+				}
+
+				CurObj = CurObj -> Next;
+			}
+
+			// Create a new filter bank and build it.
+
+			CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize,
+				aInterpPoints, ReqAtten, IsThird );
+
+			// Insert the bank at the start of the list.
+
+			CurObj -> Next = StaticObjects.unkeep();
+			StaticObjects = CurObj;
+
+			return( *CurObj );
+		}
+
+		CDSPFracDelayFilterBank* PrevObj = NULL;
+		CDSPFracDelayFilterBank* CurObj = Objects;
+
+		while( CurObj != NULL )
+		{
+			if( CurObj -> InitFilterFracs == aFilterFracs &&
+				CurObj -> ElementSize == aElementSize &&
+				CurObj -> InterpPoints == aInterpPoints &&
+				CurObj -> ReqAtten == ReqAtten &&
+				CurObj -> IsThird == IsThird )
+			{
+				break;
+			}
+
+			if( CurObj -> Next == NULL && ObjCount >= R8B_FRACBANK_CACHE_MAX )
+			{
+				if( CurObj -> RefCount == 0 )
+				{
+					// Delete the last bank which is not used.
+
+					PrevObj -> Next = NULL;
+					delete CurObj;
+					ObjCount--;
+				}
+				else
+				{
+					// Move the last bank to the top of the list since it
+					// seems to be in use for a long time.
+
+					PrevObj -> Next = NULL;
+					CurObj -> Next = Objects.unkeep();
+					Objects = CurObj;
+				}
+
+				CurObj = NULL;
+				break;
+			}
+
+			PrevObj = CurObj;
+			CurObj = CurObj -> Next;
+		}
+
+		if( CurObj != NULL )
+		{
+			CurObj -> RefCount++;
+
+			if( PrevObj == NULL )
+			{
+				return( *CurObj );
+			}
+
+			// Remove the bank from the list temporarily.
+
+			PrevObj -> Next = CurObj -> Next;
+		}
+		else
+		{
+			// Create a new filter bank (with RefCount == 1) and build it.
+
+			CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize,
+				aInterpPoints, ReqAtten, IsThird );
+
+			ObjCount++;
+		}
+
+		// Insert the bank at the start of the list.
+
+		CurObj -> Next = Objects.unkeep();
+		Objects = CurObj;
+
+		return( *CurObj );
+	}
+
+private:
+	static CSyncObject StateSync; ///< Cache state synchronizer.
+		///<
+	static CPtrKeeper< CDSPFracDelayFilterBank* > Objects; ///< The chain of
+		///< cached objects.
+		///<
+	static CPtrKeeper< CDSPFracDelayFilterBank* > StaticObjects; ///< The
+		///< chain of static objects.
+		///<
+	static int ObjCount; ///< The number of objects currently preset in the
+		///< Objects cache.
+		///<
+};
+
+// ---------------------------------------------------------------------------
+// CDSPFracDelayFilterBank PUBLIC
+// ---------------------------------------------------------------------------
+
+inline void CDSPFracDelayFilterBank :: unref()
+{
+	R8BSYNC( CDSPFracDelayFilterBankCache :: StateSync );
+
+	RefCount--;
+}
+
+/**
+ * @param l Number 1.
+ * @param s Number 2.
+ * @param[out] GCD Resulting GCD.
+ * @return "True" if the greatest common denominator of 2 numbers was
+ * found.
+ */
+
+inline bool findGCD( double l, double s, double& GCD )
+{
+	int it = 0;
+
+	while( it < 50 )
+	{
+		if( s <= 0.0 )
+		{
+			GCD = l;
+			return( true );
+		}
+
+		const double r = l - s;
+		l = s;
+		s = ( r < 0.0 ? -r : r );
+		it++;
+	}
+
+	return( false );
+}
+
+/**
+ * Function evaluates source and destination sample rate ratio and returns
+ * the required input and output stepping. Function returns "false" if
+ * whole stepping cannot be used to perform interpolation using these sample
+ * rates.
+ *
+ * @param SSampleRate Source sample rate.
+ * @param DSampleRate Destination sample rate.
+ * @param[out] ResInStep Resulting input step.
+ * @param[out] ResOutStep Resulting output step.
+ * @return "True" if stepping was acquired.
+ */
+
+inline bool getWholeStepping( const double SSampleRate,
+	const double DSampleRate, int& ResInStep, int& ResOutStep )
+{
+	double GCD;
+
+	if( !findGCD( SSampleRate, DSampleRate, GCD ) || GCD < 1.0 )
+	{
+		return( false );
+	}
+
+	const double InStep0 = SSampleRate / GCD;
+	ResInStep = (int) InStep0;
+	const double OutStep0 = DSampleRate / GCD;
+	ResOutStep = (int) OutStep0;
+
+	if( InStep0 != ResInStep || OutStep0 != ResOutStep )
+	{
+		return( false );
+	}
+
+	if( ResOutStep > 1500 )
+	{
+		// Do not allow large output stepping due to low cache
+		// performance of large filter banks.
+
+		return( false );
+	}
+
+	return( true );
+}
+
+/**
+ * @brief Fractional delay filter bank-based interpolator class.
+ *
+ * Class implements the fractional delay interpolator. This implementation at
+ * first puts the input signal into a ring buffer and then performs
+ * interpolation. The interpolation is performed using sinc-based fractional
+ * delay filters. These filters are contained in a bank, and for higher
+ * precision they are interpolated between adjacent filters.
+ *
+ * To increase sample timing precision, this class uses "resettable counter"
+ * approach. This gives zero overall sample timing error. With the
+ * R8B_FASTTIMING configuration option enabled, the sample timing experiences
+ * a very minor drift.
+ */
+
+class CDSPFracInterpolator : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initalizes the interpolator. It is important to call the
+	 * getMaxOutLen() function afterwards to obtain the optimal output buffer
+	 * length.
+	 *
+	 * @param aSrcSampleRate Source sample rate.
+	 * @param aDstSampleRate Destination sample rate.
+	 * @param ReqAtten Required filter attentuation.
+	 * @param IsThird "True" if one-third filter is required.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. This latency will be
+	 * consumed completely.
+	 */
+
+	CDSPFracInterpolator( const double aSrcSampleRate,
+		const double aDstSampleRate, const double ReqAtten,
+		const bool IsThird, const double PrevLatency )
+		: SrcSampleRate( aSrcSampleRate )
+		, DstSampleRate( aDstSampleRate )
+	#if R8B_FASTTIMING
+		, FracStep( aSrcSampleRate / aDstSampleRate )
+	#endif // R8B_FASTTIMING
+	{
+		R8BASSERT( SrcSampleRate > 0.0 );
+		R8BASSERT( DstSampleRate > 0.0 );
+		R8BASSERT( PrevLatency >= 0.0 );
+		R8BASSERT( BufLenBits >= 5 );
+
+		InitFracPos = PrevLatency;
+		Latency = (int) InitFracPos;
+		InitFracPos -= Latency;
+
+		R8BASSERT( Latency >= 0 );
+
+		#if R8B_FLTTEST
+
+			IsWhole = false;
+			LatencyFrac = 0.0;
+			FilterBank = new CDSPFracDelayFilterBank( -1, 3, 8, ReqAtten,
+				IsThird );
+
+		#else // R8B_FLTTEST
+
+			IsWhole = getWholeStepping( SrcSampleRate, DstSampleRate, InStep,
+				OutStep );
+
+			if( IsWhole )
+			{
+				InitFracPosW = (int) ( InitFracPos * OutStep );
+				LatencyFrac = InitFracPos - (double) InitFracPosW / OutStep;
+				FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank(
+					OutStep, 1, 2, ReqAtten, IsThird, false );
+			}
+			else
+			{
+				LatencyFrac = 0.0;
+				FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank(
+					-1, 3, 8, ReqAtten, IsThird, true );
+			}
+
+		#endif // R8B_FLTTEST
+
+		FilterLen = FilterBank -> getFilterLen();
+		fl2 = FilterLen >> 1;
+		fll = fl2 - 1;
+		flo = fll + fl2;
+		flb = BufLen - fll;
+
+		R8BASSERT(( 1 << BufLenBits ) >= FilterLen * 3 );
+
+		static const CConvolveFn FltConvFn0[ 13 ] = {
+			&CDSPFracInterpolator :: convolve0< 6 >,
+			&CDSPFracInterpolator :: convolve0< 8 >,
+			&CDSPFracInterpolator :: convolve0< 10 >,
+			&CDSPFracInterpolator :: convolve0< 12 >,
+			&CDSPFracInterpolator :: convolve0< 14 >,
+			&CDSPFracInterpolator :: convolve0< 16 >,
+			&CDSPFracInterpolator :: convolve0< 18 >,
+			&CDSPFracInterpolator :: convolve0< 20 >,
+			&CDSPFracInterpolator :: convolve0< 22 >,
+			&CDSPFracInterpolator :: convolve0< 24 >,
+			&CDSPFracInterpolator :: convolve0< 26 >,
+			&CDSPFracInterpolator :: convolve0< 28 >,
+			&CDSPFracInterpolator :: convolve0< 30 >
+		};
+
+		convfn = ( IsWhole ? FltConvFn0[ fl2 - 3 ] :
+			&CDSPFracInterpolator :: convolve2 );
+
+		R8BCONSOLE( "CDSPFracInterpolator: src=%.2f dst=%.2f taps=%i "
+			"fracs=%i whole=%i third=%i step=%.6f\n", SrcSampleRate,
+			DstSampleRate, FilterLen, ( IsWhole ? OutStep :
+			FilterBank -> getFilterFracs() ), (int) IsWhole, (int) IsThird,
+			aSrcSampleRate / aDstSampleRate );
+
+		clear();
+	}
+
+	virtual ~CDSPFracInterpolator()
+	{
+		#if R8B_FLTTEST
+
+			delete FilterBank;
+
+		#else // R8B_FLTTEST
+
+			FilterBank -> unref();
+
+		#endif // R8B_FLTTEST
+	}
+
+	virtual int getLatency() const
+	{
+		return( 0 );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return( (int) ceil( MaxInLen * DstSampleRate / SrcSampleRate ) + 1 );
+	}
+
+	virtual void clear()
+	{
+		LatencyLeft = Latency;
+		BufLeft = 0;
+		WritePos = 0;
+		ReadPos = flb; // Set "read" position to account for filter's
+			// latency at zero fractional delay.
+
+		memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+
+		if( IsWhole )
+		{
+			InPosFracW = InitFracPosW;
+		}
+		else
+		{
+			InPosFrac = InitFracPos;
+
+			#if !R8B_FASTTIMING
+				InCounter = 0;
+				InPosInt = 0;
+				InPosShift = InitFracPos * DstSampleRate / SrcSampleRate;
+			#endif // !R8B_FASTTIMING
+		}
+	}
+
+	virtual int process( double* ip, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+		R8BASSERT( ip != op0 || l == 0 || SrcSampleRate > DstSampleRate );
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= l )
+			{
+				LatencyLeft -= l;
+				return( 0 );
+			}
+
+			l -= LatencyLeft;
+			ip += LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		double* op = op0;
+
+		while( l > 0 )
+		{
+			// Add new input samples to both halves of the ring buffer.
+
+			const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+			double* const wp1 = Buf + WritePos;
+			memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+			if( WritePos < flo )
+			{
+				const int c = min( b, flo - WritePos );
+				memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+			}
+
+			ip += b;
+			WritePos = ( WritePos + b ) & BufLenMask;
+			l -= b;
+			BufLeft += b;
+
+			// Produce as many output samples as possible.
+
+			op = ( *this.*convfn )( op );
+		}
+
+		#if !R8B_FASTTIMING
+
+			if( !IsWhole && InCounter > 1000 )
+			{
+				// Reset the interpolation position counter to achieve a
+				// higher sample timing precision.
+
+				InCounter = 0;
+				InPosInt = 0;
+				InPosShift = InPosFrac * DstSampleRate / SrcSampleRate;
+			}
+
+		#endif // !R8B_FASTTIMING
+
+		return( (int) ( op - op0 ));
+	}
+
+private:
+	static const int BufLenBits = 8; ///< The length of the ring buffer,
+		///< expressed as Nth power of 2. This value can be reduced if it is
+		///< known that only short input buffers will be passed to the
+		///< interpolator. The minimum value of this parameter is 5, and
+		///< 1 << BufLenBits should be at least 3 times larger than the
+		///< FilterLen. However, this condition can be easily met if the input
+		///< signal is suitably downsampled first before the interpolation is
+		///< performed.
+		///<
+	static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+		///< buffer. The actual length is twice as long to allow "beyond max
+		///< position" positioning.
+		///<
+	static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+		///< position wrapping.
+		///<
+	int FilterLen; ///< Filter length, in taps. Even value.
+		///<
+	int fl2; ///< Right-side (half) filter length.
+		///<
+	int fll; ///< Input latency.
+		///<
+	int flo; ///< Overrun length.
+		///<
+	int flb; ///< Initial read position and maximal buffer write length.
+		///<
+	double Buf[ BufLen + 29 ]; ///< The ring buffer, including overrun
+		///< protection for maximal filter length.
+		///<
+	double SrcSampleRate; ///< Source sample rate.
+		///<
+	double DstSampleRate; ///< Destination sample rate.
+		///<
+	bool IsWhole; ///< "True" if whole-number stepping is in use.
+		///<
+	int InStep; ///< Input whole-number stepping.
+		///<
+	int OutStep; ///< Output whole-number stepping (corresponds to filter bank
+		///< size).
+		///<
+	double InitFracPos; ///< Initial fractional position, in samples, in the
+		///< range [0; 1).
+		///<
+	int InitFracPosW; ///< Initial fractional position for whole-number
+		///< stepping.
+		///<
+	int Latency; ///< Initial latency that should be removed from the input.
+		///<
+	double LatencyFrac; ///< Left-over fractional latency.
+		///<
+	int BufLeft; ///< The number of samples left in the buffer to process.
+		///<
+	int WritePos; ///< The current buffer write position. Incremented together
+		///< with the BufLeft variable.
+		///<
+	int ReadPos; ///< The current buffer read position.
+		///<
+	int LatencyLeft; ///< Input latency left to remove.
+		///<
+	double InPosFrac; ///< Interpolation position (fractional part).
+		///<
+	int InPosFracW; ///< Interpolation position (fractional part) for
+		///< whole-number stepping. Corresponds to the index into the filter
+		///< bank.
+		///<
+	CDSPFracDelayFilterBank* FilterBank; ///< Filter bank in use, may be
+		///< whole-number stepping filter bank or static bank.
+		///<
+#if R8B_FASTTIMING
+	double FracStep; ///< Fractional sample timing step.
+#else // R8B_FASTTIMING
+	int InCounter; ///< Interpolation step counter.
+		///<
+	int InPosInt; ///< Interpolation position (integer part).
+		///<
+	double InPosShift; ///< Interpolation position fractional shift.
+		///<
+#endif // R8B_FASTTIMING
+
+	typedef double*( CDSPFracInterpolator :: *CConvolveFn )( double* op ); ///<
+		///< Convolution function type.
+		///<
+	CConvolveFn convfn; ///< Convolution function in use.
+		///<
+
+	/**
+	 * Convolution function for 0th order resampling.
+	 *
+	 * @param[out] op Output buffer.
+	 * @return Advanced "op" value.
+	 * @tparam fltlen Filter length, in taps.
+	 */
+
+	template< int fltlen >
+	double* convolve0( double* op )
+	{
+		while( BufLeft > fl2 )
+		{
+			const double* const ftp = &(*FilterBank)[ InPosFracW ];
+			const double* const rp = Buf + ReadPos;
+			int i;
+
+		#if defined( R8B_SSE2 ) && !defined( __INTEL_COMPILER )
+
+			__m128d s = _mm_setzero_pd();
+
+			for( i = 0; i < fltlen; i += 2 )
+			{
+				const __m128d m = _mm_mul_pd( _mm_load_pd( ftp + i ),
+					_mm_loadu_pd( rp + i ));
+
+				s = _mm_add_pd( s, m );
+			}
+
+			_mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 )));
+
+		#elif defined( R8B_NEON )
+
+			float64x2_t s = vdupq_n_f64( 0.0 );
+
+			for( i = 0; i < fltlen; i += 2 )
+			{
+				s = vmlaq_f64( s, vld1q_f64( ftp + i ), vld1q_f64( rp + i ));
+			}
+
+			*op = vaddvq_f64( s );
+
+		#else // SIMD
+
+			double s = 0.0;
+
+			for( i = 0; i < fltlen; i++ )
+			{
+				s += ftp[ i ] * rp[ i ];
+			}
+
+			*op = s;
+
+		#endif // SIMD
+
+			op++;
+
+			InPosFracW += InStep;
+			const int PosIncr = InPosFracW / OutStep;
+			InPosFracW -= PosIncr * OutStep;
+
+			ReadPos = ( ReadPos + PosIncr ) & BufLenMask;
+			BufLeft -= PosIncr;
+		}
+
+		return( op );
+	}
+
+	/**
+	 * Convolution function for 2nd order resampling.
+	 *
+	 * @param[out] op Output buffer.
+	 * @return Advanced "op" value.
+	 */
+
+	double* convolve2( double* op )
+	{
+		const CDSPFracDelayFilterBank& fb = *FilterBank;
+		const int fltlen = FilterLen;
+
+		while( BufLeft > fl2 )
+		{
+			double x = InPosFrac * fb.getFilterFracs();
+			const int fti = (int) x; // Function table index.
+			x -= fti; // Coefficient for interpolation between
+				// adjacent fractional delay filters.
+			const double x2d = x * x;
+			const double* ftp = &fb[ fti ];
+			const double* const rp = Buf + ReadPos;
+			int i;
+
+		#if defined( R8B_SSE2 ) && defined( R8B_SIMD_ISH )
+
+			const __m128d x1 = _mm_set1_pd( x );
+			const __m128d x2 = _mm_set1_pd( x2d );
+			__m128d s = _mm_setzero_pd();
+
+			for( i = 0; i < fltlen; i += 2 )
+			{
+				const __m128d ftp2 = _mm_load_pd( ftp + 2 );
+				const __m128d xx1 = _mm_mul_pd( ftp2, x1 );
+				const __m128d ftp4 = _mm_load_pd( ftp + 4 );
+				const __m128d xx2 = _mm_mul_pd( ftp4, x2 );
+				const __m128d ftp0 = _mm_load_pd( ftp );
+				ftp += 6;
+
+				const __m128d rpi = _mm_loadu_pd( rp + i );
+				const __m128d xxs = _mm_add_pd( ftp0, _mm_add_pd( xx1, xx2 ));
+
+				s = _mm_add_pd( s, _mm_mul_pd( rpi, xxs ));
+			}
+
+			_mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 )));
+
+		#elif defined( R8B_NEON ) && defined( R8B_SIMD_ISH )
+
+			const float64x2_t x1 = vdupq_n_f64( x );
+			const float64x2_t x2 = vdupq_n_f64( x2d );
+			float64x2_t s = vdupq_n_f64( 0.0 );
+
+			for( i = 0; i < fltlen; i += 2 )
+			{
+				const float64x2_t ftp2 = vld1q_f64( ftp + 2 );
+				const float64x2_t xx1 = vmulq_f64( ftp2, x1 );
+				const float64x2_t ftp4 = vld1q_f64( ftp + 4 );
+				const float64x2_t xx2 = vmulq_f64( ftp4, x2 );
+				const float64x2_t ftp0 = vld1q_f64( ftp );
+				ftp += 6;
+
+				const float64x2_t rpi = vld1q_f64( rp + i );
+				const float64x2_t xxs = vaddq_f64( ftp0,
+					vaddq_f64( xx1, xx2 ));
+
+				s = vmlaq_f64( s, rpi, xxs );
+			}
+
+			*op = vaddvq_f64( s );
+
+		#else // SIMD
+
+			double s = 0.0;
+
+			for( i = 0; i < fltlen; i++ )
+			{
+				s += ( ftp[ 0 ] + ftp[ 1 ] * x + ftp[ 2 ] * x2d ) * rp[ i ];
+				ftp += 3;
+			}
+
+			*op = s;
+
+		#endif // SIMD
+
+			op++;
+
+			#if R8B_FASTTIMING
+
+				InPosFrac += FracStep;
+				const int PosIncr = (int) InPosFrac;
+				InPosFrac -= PosIncr;
+
+			#else // R8B_FASTTIMING
+
+				InCounter++;
+				const double NextInPos = ( InCounter + InPosShift ) *
+					SrcSampleRate / DstSampleRate;
+
+				const int NextInPosInt = (int) NextInPos;
+				const int PosIncr = NextInPosInt - InPosInt;
+				InPosInt = NextInPosInt;
+				InPosFrac = NextInPos - NextInPosInt;
+
+			#endif // R8B_FASTTIMING
+
+			ReadPos = ( ReadPos + PosIncr ) & BufLenMask;
+			BufLeft -= PosIncr;
+		}
+
+		return( op );
+	}
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPFRACINTERPOLATOR_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
new file mode 100644
index 00000000..2a10f421
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h
@@ -0,0 +1,401 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPHBDownsampler.h
+ *
+ * @brief Half-band downsampling convolver class.
+ *
+ * This file includes half-band downsampling convolver class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPHBDOWNSAMPLER_INCLUDED
+#define R8B_CDSPHBDOWNSAMPLER_INCLUDED
+
+#include "CDSPHBUpsampler.h"
+
+namespace r8b {
+
+/**
+ * @brief Half-band downsampler class.
+ *
+ * Class implements brute-force half-band 2X downsampling that uses small
+ * sparse symmetric FIR filters. The output has 2.0 gain.
+ */
+
+class CDSPHBDownsampler : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initalizes the half-band downsampler.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * downsampling ratio, e.g. at 4x downsampling 0 is used, at 8x
+	 * downsampling 1 is used, etc.
+	 * @param IsThird "True" if 1/3 resampling is performed.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. Whole-number latency will
+	 * be consumed by *this object while remaining fractional latency can be
+	 * obtained via the getLatencyFrac() function.
+	 */
+
+	CDSPHBDownsampler( const double ReqAtten, const int SteepIndex,
+		const bool IsThird, const double PrevLatency )
+	{
+		static const CConvolveFn FltConvFn[ 14 ] = {
+			&CDSPHBDownsampler :: convolve1, &CDSPHBDownsampler :: convolve2,
+			&CDSPHBDownsampler :: convolve3, &CDSPHBDownsampler :: convolve4,
+			&CDSPHBDownsampler :: convolve5, &CDSPHBDownsampler :: convolve6,
+			&CDSPHBDownsampler :: convolve7, &CDSPHBDownsampler :: convolve8,
+			&CDSPHBDownsampler :: convolve9, &CDSPHBDownsampler :: convolve10,
+			&CDSPHBDownsampler :: convolve11, &CDSPHBDownsampler :: convolve12,
+			&CDSPHBDownsampler :: convolve13,
+			&CDSPHBDownsampler :: convolve14 };
+
+		int fltt;
+		double att;
+
+		if( IsThird )
+		{
+			CDSPHBUpsampler :: getHBFilterThird( ReqAtten, SteepIndex, fltp,
+				fltt, att );
+		}
+		else
+		{
+			CDSPHBUpsampler :: getHBFilter( ReqAtten, SteepIndex, fltp, fltt,
+				att );
+		}
+
+		convfn = FltConvFn[ fltt - 1 ];
+		fll = fltt * 2 - 1;
+		fl2 = fll;
+		flo = fll + fl2;
+		flb = BufLen - fll;
+		BufRP = Buf + fll;
+
+		LatencyFrac = PrevLatency * 0.5;
+		Latency = (int) LatencyFrac;
+		LatencyFrac -= Latency;
+
+		R8BASSERT( Latency >= 0 );
+
+		R8BCONSOLE( "CDSPHBDownsampler: taps=%i third=%i att=%.1f io=1/2\n",
+			fltt, (int) IsThird, att );
+
+		clear();
+	}
+
+	virtual int getLatency() const
+	{
+		return( 0 );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return(( MaxInLen + 1 ) >> 1 );
+	}
+
+	virtual void clear()
+	{
+		LatencyLeft = Latency;
+		BufLeft = 0;
+		WritePos = 0;
+		ReadPos = flb; // Set "read" position to account for filter's latency.
+
+		memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+	}
+
+	virtual int process( double* ip, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+
+		double* op = op0;
+
+		while( l > 0 )
+		{
+			// Add new input samples to both halves of the ring buffer.
+
+			const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+			double* const wp1 = Buf + WritePos;
+			memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+			if( WritePos < flo )
+			{
+				const int c = min( b, flo - WritePos );
+				memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+			}
+
+			ip += b;
+			WritePos = ( WritePos + b ) & BufLenMask;
+			l -= b;
+			BufLeft += b;
+
+			// Produce output.
+
+			if( BufLeft > fl2 )
+			{
+				const int c = ( BufLeft - fl2 + 1 ) >> 1;
+
+				double* const opend = op + c;
+				( *convfn )( op, opend, fltp, BufRP, ReadPos );
+
+				op = opend;
+				const int c2 = c + c;
+				ReadPos = ( ReadPos + c2 ) & BufLenMask;
+				BufLeft -= c2;
+			}
+		}
+
+		int ol = (int) ( op - op0 );
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= ol )
+			{
+				LatencyLeft -= ol;
+				return( 0 );
+			}
+
+			ol -= LatencyLeft;
+			op0 += LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		return( ol );
+	}
+
+private:
+	static const int BufLenBits = 10; ///< The length of the ring buffer,
+		///< expressed as Nth power of 2. This value can be reduced if it is
+		///< known that only short input buffers will be passed to the
+		///< interpolator. The minimum value of this parameter is 5, and
+		///< 1 << BufLenBits should be at least 3 times larger than the
+		///< FilterLen.
+		///<
+	static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+		///< buffer. The actual length is twice as long to allow "beyond max
+		///< position" positioning.
+		///<
+	static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+		///< position wrapping.
+		///<
+	double Buf[ BufLen + 54 ]; ///< The ring buffer, including overrun
+		///< protection for the largest filter.
+		///<
+	const double* fltp; ///< Half-band filter taps.
+		///<
+	int fll; ///< Input latency.
+		///<
+	int fl2; ///< Right-side filter length.
+		///<
+	int flo; ///< Overrun length.
+		///<
+	int flb; ///< Initial read position and maximal buffer write length.
+		///<
+	const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
+		///<
+	int Latency; ///< Initial latency that should be removed from the output.
+		///<
+	double LatencyFrac; ///< Fractional latency left on the output.
+		///<
+	int BufLeft; ///< The number of samples left in the buffer to process.
+		///< When this value is below FilterLenD2Plus1, the interpolation
+		///< cycle ends.
+		///<
+	int WritePos; ///< The current buffer write position. Incremented together
+		///< with the BufLeft variable.
+		///<
+	int ReadPos; ///< The current buffer read position.
+		///<
+	int LatencyLeft; ///< Latency left to remove.
+		///<
+	typedef void( *CConvolveFn )( double* op, double* const opend,
+		const double* const flt, const double* const rp0, int rpos ); ///<
+		///< Convolution function type.
+		///<
+	CConvolveFn convfn; ///< Convolution function in use.
+		///<
+
+#define R8BHBC1( fn ) \
+	static void fn( double* op, double* const opend, const double* const flt, \
+		const double* const rp0, int rpos ) \
+	{ \
+		while( op != opend ) \
+		{ \
+			const double* const rp = rp0 + rpos; \
+			*op = rp[ 0 ] +
+
+#define R8BHBC2 \
+			rpos = ( rpos + 2 ) & BufLenMask; \
+			op++; \
+		} \
+	}
+
+	R8BHBC1( convolve1 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve2 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve3 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve4 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve5 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve6 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve7 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve8 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve9 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve10 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve11 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve12 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+				flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve13 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+				flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
+				flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]);
+	R8BHBC2
+
+	R8BHBC1( convolve14 )
+				flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) +
+				flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) +
+				flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) +
+				flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) +
+				flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) +
+				flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) +
+				flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) +
+				flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) +
+				flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) +
+				flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) +
+				flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) +
+				flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) +
+				flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]) +
+				flt[ 13 ] * ( rp[ 27 ] + rp[ -27 ]);
+	R8BHBC2
+
+#undef R8BHBC1
+#undef R8BHBC2
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPHBDOWNSAMPLER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
new file mode 100644
index 00000000..e9b03c70
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h
@@ -0,0 +1,804 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPHBUpsampler.h
+ *
+ * @brief Half-band upsampling class.
+ *
+ * This file includes half-band upsampling class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPHBUPSAMPLER_INCLUDED
+#define R8B_CDSPHBUPSAMPLER_INCLUDED
+
+#include "CDSPProcessor.h"
+
+namespace r8b {
+
+/**
+ * @brief Half-band upsampling class.
+ *
+ * Class implements brute-force half-band 2X upsampling that uses small
+ * sparse symmetric FIR filters. It is very efficient and should be used at
+ * latter upsampling steps after initial steep 2X upsampling.
+ */
+
+class CDSPHBUpsampler : public CDSPProcessor
+{
+public:
+	/**
+	 * Function that provides filter data for various steepness indices and
+	 * attenuations.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
+	 * etc.
+	 */
+
+	static void getHBFilter( const double ReqAtten, const int SteepIndex,
+		const double*& flt, int& fltt, double& att )
+	{
+		static const double HBKernel_4A[ 4 ] = { // -54.5176 dB, 4
+			6.1729335650971517e-001, -1.5963945620743250e-001,
+			5.5073370934086312e-002, -1.4603578989932850e-002,};
+		static const double HBKernel_5A[ 5 ] = { // -66.3075 dB, 4
+			6.2068807424902472e-001, -1.6827573634467302e-001,
+			6.5263016720721170e-002, -2.2483331611592005e-002,
+			5.2917326684281110e-003,};
+		static const double HBKernel_6A[ 6 ] = { // -89.5271 dB, 4
+			6.2187202340480707e-001, -1.7132842113816371e-001,
+			6.9019169178765674e-002, -2.5799728312695277e-002,
+			7.4880112525741666e-003, -1.2844465869952567e-003,};
+		static const double HBKernel_7A[ 7 ] = { // -105.2842 dB, 4
+			6.2354494135775851e-001, -1.7571220703702045e-001,
+			7.4529843603968457e-002, -3.0701736822442153e-002,
+			1.0716755639039573e-002, -2.7833422930759735e-003,
+			4.1118797093875510e-004,};
+		static const double HBKernel_8A[ 8 ] = { // -121.0063 dB, 4
+			6.2488363107953926e-001, -1.7924942606514119e-001,
+			7.9068155655640557e-002, -3.4907523415495731e-002,
+			1.3710256799907897e-002, -4.3991142586987933e-003,
+			1.0259190163889602e-003, -1.3278941979339359e-004,};
+		static const double HBKernel_9A[ 9 ] = { // -136.6982 dB, 4
+			6.2597763804021977e-001, -1.8216414325139055e-001,
+			8.2879104876726728e-002, -3.8563442248249404e-002,
+			1.6471530499739394e-002, -6.0489108881335227e-003,
+			1.7805283804140392e-003, -3.7533200112729561e-004,
+			4.3172840558735476e-005,};
+		static const double HBKernel_10A[ 10 ] = { // -152.3572 dB, 4
+			6.2688767582974092e-001, -1.8460766807559420e-001,
+			8.6128943000481864e-002, -4.1774474147006607e-002,
+			1.9014801985747346e-002, -7.6870397465866507e-003,
+			2.6264590175341853e-003, -7.1106660285478562e-004,
+			1.3645852036179345e-004, -1.4113888783332969e-005,};
+		static const double HBKernel_11A[ 11 ] = { // -183.7962 dB, 4
+			6.2667167706948146e-001, -1.8407153342635879e-001,
+			8.5529995610836046e-002, -4.1346831462361310e-002,
+			1.8844831691322637e-002, -7.7125170365394992e-003,
+			2.7268674860562087e-003, -7.9745028501057233e-004,
+			1.8116344606360795e-004, -2.8569149754241848e-005,
+			2.3667022010173616e-006,};
+		static const double HBKernel_12A[ 12 ] = { // -199.4768 dB, 4
+			6.2747849730367999e-001, -1.8623616784506747e-001,
+			8.8409755898467945e-002, -4.4207468821462342e-002,
+			2.1149175945115381e-002, -9.2551508371115209e-003,
+			3.5871562170822330e-003, -1.1923167653750219e-003,
+			3.2627812189920129e-004, -6.9106902511490413e-005,
+			1.0122897863125124e-005, -7.7531878906846174e-007,};
+		static const double HBKernel_13A[ 13 ] = { // -215.1364 dB, 4
+			6.2816416252367324e-001, -1.8809076955230414e-001,
+			9.0918539867353029e-002, -4.6765502683599310e-002,
+			2.3287520498995663e-002, -1.0760627245014184e-002,
+			4.4853922948425683e-003, -1.6438775426910800e-003,
+			5.1441312354764978e-004, -1.3211725685765050e-004,
+			2.6191319837779187e-005, -3.5802430606313093e-006,
+			2.5491278270628601e-007,};
+		static const double HBKernel_14A[ 14 ] = { // -230.7526 dB, 4
+			6.2875473120929948e-001, -1.8969941936903847e-001,
+			9.3126094480960403e-002, -4.9067251179869126e-002,
+			2.5273008851199916e-002, -1.2218646153393291e-002,
+			5.4048942085580280e-003, -2.1409919546078581e-003,
+			7.4250292812927973e-004, -2.1924542206832172e-004,
+			5.3015808983125091e-005, -9.8743034923598196e-006,
+			1.2650391141650221e-006, -8.4146674637474946e-008,};
+		static const int FltCountA = 11;
+		static const int FlttBaseA = 4;
+		static const double FltAttensA[ FltCountA ] = {
+			54.5176, 66.3075, 89.5271, 105.2842, 121.0063, 136.6982, 152.3572, 183.7962, 199.4768, 215.1364, 230.7526, };
+		static const double* const FltPtrsA[ FltCountA ] = {
+			HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, HBKernel_10A, HBKernel_11A, HBKernel_12A, HBKernel_13A, HBKernel_14A, };
+		static const double HBKernel_2B[ 2 ] = { // -56.6007 dB, 8
+			5.7361525854329076e-001, -7.5092074924827903e-002,};
+		static const double HBKernel_3B[ 3 ] = { // -83.0295 dB, 8
+			5.9277038608066912e-001, -1.0851340190268854e-001,
+			1.5813570475513079e-002,};
+		static const double HBKernel_4B[ 4 ] = { // -123.4724 dB, 8
+			6.0140277542879617e-001, -1.2564483854574138e-001,
+			2.7446500598038322e-002, -3.2051079559057435e-003,};
+		static const double HBKernel_5B[ 5 ] = { // -152.4411 dB, 8
+			6.0818642429088932e-001, -1.3981140187175697e-001,
+			3.8489164054503623e-002, -7.6218861797853104e-003,
+			7.5772358130952392e-004,};
+		static const double HBKernel_6B[ 6 ] = { // -181.2501 dB, 8
+			6.1278392271464355e-001, -1.5000053762513338e-001,
+			4.7575323511364960e-002, -1.2320702802243476e-002,
+			2.1462442592348487e-003, -1.8425092381892940e-004,};
+		static const double HBKernel_7B[ 7 ] = { // -209.9472 dB, 8
+			6.1610372263478952e-001, -1.5767891882524138e-001,
+			5.5089691170294691e-002, -1.6895755656366061e-002,
+			3.9416643438213977e-003, -6.0603623791604668e-004,
+			4.5632602433393365e-005,};
+		static const double HBKernel_8B[ 8 ] = { // -238.5616 dB, 8
+			6.1861282914465976e-001, -1.6367179451225150e-001,
+			6.1369861342939716e-002, -2.1184466539006987e-002,
+			5.9623357510842061e-003, -1.2483098507454090e-003,
+			1.7099297537964702e-004, -1.1448313239478885e-005,};
+		static const int FltCountB = 7;
+		static const int FlttBaseB = 2;
+		static const double FltAttensB[ FltCountB ] = {
+			56.6007, 83.0295, 123.4724, 152.4411, 181.2501, 209.9472, 238.5616, };
+		static const double* const FltPtrsB[ FltCountB ] = {
+			HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, HBKernel_7B, HBKernel_8B, };
+		static const double HBKernel_2C[ 2 ] = { // -89.0473 dB, 16
+			5.6430278013478008e-001, -6.4338068855763375e-002,};
+		static const double HBKernel_3C[ 3 ] = { // -130.8951 dB, 16
+			5.8706402915551448e-001, -9.9362380958670449e-002,
+			1.2298637065869358e-002,};
+		static const double HBKernel_4C[ 4 ] = { // -172.3192 dB, 16
+			5.9896586134984675e-001, -1.2111680603434927e-001,
+			2.4763118076458895e-002, -2.6121758132212989e-003,};
+		static const double HBKernel_5C[ 5 ] = { // -213.4984 dB, 16
+			6.0626808285230716e-001, -1.3588224032740795e-001,
+			3.5544305238309003e-002, -6.5127022377289654e-003,
+			5.8255449565950768e-004,};
+		static const double HBKernel_6C[ 6 ] = { // -254.5186 dB, 16
+			6.1120171263351242e-001, -1.4654486853757870e-001,
+			4.4582959299131253e-002, -1.0840543858123995e-002,
+			1.7343706485509962e-003, -1.3363018567985596e-004,};
+		static const int FltCountC = 5;
+		static const int FlttBaseC = 2;
+		static const double FltAttensC[ FltCountC ] = {
+			89.0473, 130.8951, 172.3192, 213.4984, 254.5186, };
+		static const double* const FltPtrsC[ FltCountC ] = {
+			HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, HBKernel_6C, };
+		static const double HBKernel_1D[ 1 ] = { // -54.4754 dB, 32
+			5.0188900022775451e-001,};
+		static const double HBKernel_2D[ 2 ] = { // -113.2139 dB, 32
+			5.6295152180538044e-001, -6.2953706070191726e-002,};
+		static const double HBKernel_3D[ 3 ] = { // -167.1447 dB, 32
+			5.8621968728755036e-001, -9.8080551656524531e-002,
+			1.1860868761997080e-002,};
+		static const double HBKernel_4D[ 4 ] = { // -220.6519 dB, 32
+			5.9835028657163591e-001, -1.1999986086623511e-001,
+			2.4132530854004228e-002, -2.4829565686819706e-003,};
+		static const int FltCountD = 4;
+		static const int FlttBaseD = 1;
+		static const double FltAttensD[ FltCountD ] = {
+			54.4754, 113.2139, 167.1447, 220.6519, };
+		static const double* const FltPtrsD[ FltCountD ] = {
+			HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
+		static const double HBKernel_1E[ 1 ] = { // -66.5391 dB, 64
+			5.0047102586416625e-001,};
+		static const double HBKernel_2E[ 2 ] = { // -137.3173 dB, 64
+			5.6261293163933568e-001, -6.2613067826620017e-002,};
+		static const double HBKernel_3E[ 3 ] = { // -203.2997 dB, 64
+			5.8600808139396787e-001, -9.7762185880067784e-002,
+			1.1754104554493029e-002,};
+		static const double HBKernel_4E[ 4 ] = { // -268.8550 dB, 64
+			5.9819599352772002e-001, -1.1972157555011861e-001,
+			2.3977305567947922e-002, -2.4517235455853992e-003,};
+		static const int FltCountE = 4;
+		static const int FlttBaseE = 1;
+		static const double FltAttensE[ FltCountE ] = {
+			66.5391, 137.3173, 203.2997, 268.8550, };
+		static const double* const FltPtrsE[ FltCountE ] = {
+			HBKernel_1E, HBKernel_2E, HBKernel_3E, HBKernel_4E, };
+		static const double HBKernel_1F[ 1 ] = { // -82.4633 dB, 128
+			5.0007530666642896e-001,};
+		static const double HBKernel_2F[ 2 ] = { // -161.4049 dB, 128
+			5.6252823610146030e-001, -6.2528244608044792e-002,};
+		static const double HBKernel_3F[ 3 ] = { // -239.4313 dB, 128
+			5.8595514744674237e-001, -9.7682725156791952e-002,
+			1.1727577711117231e-002,};
+		static const int FltCountF = 3;
+		static const int FlttBaseF = 1;
+		static const double FltAttensF[ FltCountF ] = {
+			82.4633, 161.4049, 239.4313, };
+		static const double* const FltPtrsF[ FltCountF ] = {
+			HBKernel_1F, HBKernel_2F, HBKernel_3F, };
+		static const double HBKernel_1G[ 1 ] = { // -94.5052 dB, 256
+			5.0001882524896712e-001,};
+		static const double HBKernel_2G[ 2 ] = { // -185.4886 dB, 256
+			5.6250705922479682e-001, -6.2507059756378394e-002,};
+		static const double HBKernel_3G[ 3 ] = { // -275.5501 dB, 256
+			5.8594191201187384e-001, -9.7662868266991207e-002,
+			1.1720956255134043e-002,};
+		static const int FltCountG = 3;
+		static const int FlttBaseG = 1;
+		static const double FltAttensG[ FltCountG ] = {
+			94.5052, 185.4886, 275.5501, };
+		static const double* const FltPtrsG[ FltCountG ] = {
+			HBKernel_1G, HBKernel_2G, HBKernel_3G, };
+
+		int k = 0;
+
+		if( SteepIndex <= 0 )
+		{
+			while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsA[ k ];
+			fltt = FlttBaseA + k;
+			att = FltAttensA[ k ];
+		}
+		else
+		if( SteepIndex == 1 )
+		{
+			while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsB[ k ];
+			fltt = FlttBaseB + k;
+			att = FltAttensB[ k ];
+		}
+		else
+		if( SteepIndex == 2 )
+		{
+			while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsC[ k ];
+			fltt = FlttBaseC + k;
+			att = FltAttensC[ k ];
+		}
+		else
+		if( SteepIndex == 3 )
+		{
+			while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsD[ k ];
+			fltt = FlttBaseD + k;
+			att = FltAttensD[ k ];
+		}
+		else
+		if( SteepIndex == 4 )
+		{
+			while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsE[ k ];
+			fltt = FlttBaseE + k;
+			att = FltAttensE[ k ];
+		}
+		else
+		if( SteepIndex == 5 )
+		{
+			while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsF[ k ];
+			fltt = FlttBaseF + k;
+			att = FltAttensF[ k ];
+		}
+		else
+		{
+			while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsG[ k ];
+			fltt = FlttBaseG + k;
+			att = FltAttensG[ k ];
+		}
+	}
+
+	/**
+	 * Function that provides filter data for various steepness indices and
+	 * attenuations. For 1/3 resamplings.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used,
+	 * etc.
+	 */
+
+	static void getHBFilterThird( const double ReqAtten, const int SteepIndex,
+		const double*& flt, int& fltt, double& att )
+	{
+		static const double HBKernel_3A[ 3 ] = { // -66.3726 dB, 6
+			5.9811355069551475e-001, -1.1793396656733847e-001,
+			2.0300557211946322e-002,};
+		static const double HBKernel_4A[ 4 ] = { // -90.2546 dB, 6
+			6.0645499250612578e-001, -1.3555496505481171e-001,
+			3.4022804962365975e-002, -4.9535418595798757e-003,};
+		static const double HBKernel_5A[ 5 ] = { // -126.5507 dB, 6
+			6.1014115058940210e-001, -1.4393081816629907e-001,
+			4.1760642892852244e-002, -8.9692183234056175e-003,
+			9.9871340618342070e-004,};
+		static const double HBKernel_6A[ 6 ] = { // -150.1839 dB, 6
+			6.1439563420546972e-001, -1.5360187826905250e-001,
+			5.0840891345687034e-002, -1.4053648740561121e-002,
+			2.6771286587305727e-003, -2.5815816044823123e-004,};
+		static const double HBKernel_7A[ 7 ] = { // -173.7068 dB, 6
+			6.1747493476329918e-001, -1.6087373733313212e-001,
+			5.8263075641409430e-002, -1.8872408173431318e-002,
+			4.7421376543513687e-003, -8.0196529612267474e-004,
+			6.7964807393798996e-005,};
+		static const double HBKernel_8A[ 8 ] = { // -197.1454 dB, 6
+			6.1980610947775050e-001, -1.6654070578314714e-001,
+			6.4416567441730327e-002, -2.3307744348719822e-002,
+			6.9909157372312443e-003, -1.5871946293364403e-003,
+			2.4017727382382763e-004, -1.8125308241541697e-005,};
+		static const double HBKernel_9A[ 9 ] = { // -220.5199 dB, 6
+			6.2163188951899306e-001, -1.7108115323810941e-001,
+			6.9588370095600260e-002, -2.7339625080613838e-002,
+			9.2954469183791771e-003, -2.5537179959555429e-003,
+			5.2572290897951021e-004, -7.1813356135154921e-005,
+			4.8802382808892154e-006,};
+		static const int FltCountA = 7;
+		static const int FlttBaseA = 3;
+		static const double FltAttensA[ FltCountA ] = {
+			66.3726, 90.2546, 126.5507, 150.1839, 173.7068, 197.1454, 220.5199, };
+		static const double* const FltPtrsA[ FltCountA ] = {
+			HBKernel_3A, HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, };
+		static const double HBKernel_2B[ 2 ] = { // -71.0965 dB, 12
+			5.6748544264806311e-001, -6.7764090509431732e-002,};
+		static const double HBKernel_3B[ 3 ] = { // -115.7707 dB, 12
+			5.8793612182667199e-001, -1.0070583248877293e-001,
+			1.2771337947163834e-002,};
+		static const double HBKernel_4B[ 4 ] = { // -152.1535 dB, 12
+			5.9960155600862808e-001, -1.2228154335199336e-001,
+			2.5433718917694709e-002, -2.7537562530837154e-003,};
+		static const double HBKernel_5B[ 5 ] = { // -188.2914 dB, 12
+			6.0676859170554343e-001, -1.3689667009876413e-001,
+			3.6288512631926818e-002, -6.7838855305035351e-003,
+			6.2345167677087547e-004,};
+		static const double HBKernel_6B[ 6 ] = { // -224.2705 dB, 12
+			6.1161456341904397e-001, -1.4743901958274458e-001,
+			4.5344160157313275e-002, -1.1207371780924531e-002,
+			1.8328497112594935e-003, -1.4518193006359589e-004,};
+		static const int FltCountB = 5;
+		static const int FlttBaseB = 2;
+		static const double FltAttensB[ FltCountB ] = {
+			71.0965, 115.7707, 152.1535, 188.2914, 224.2705, };
+		static const double* const FltPtrsB[ FltCountB ] = {
+			HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, };
+		static const double HBKernel_1C[ 1 ] = { // -49.4544 dB, 24
+			5.0336730531430562e-001,};
+		static const double HBKernel_2C[ 2 ] = { // -103.1970 dB, 24
+			5.6330232648142819e-001, -6.3309247177420452e-002,};
+		static const double HBKernel_3C[ 3 ] = { // -152.1195 dB, 24
+			5.8643891113580415e-001, -9.8411593011583087e-002,
+			1.1972706651483846e-002,};
+		static const double HBKernel_4C[ 4 ] = { // -200.6182 dB, 24
+			5.9851012363917222e-001, -1.2028885239978220e-001,
+			2.4294521083140615e-002, -2.5157924156609776e-003,};
+		static const double HBKernel_5C[ 5 ] = { // -248.8730 dB, 24
+			6.0590922882030196e-001, -1.3515953438018685e-001,
+			3.5020857107815606e-002, -6.3256196990467053e-003,
+			5.5506815147598793e-004,};
+		static const int FltCountC = 5;
+		static const int FlttBaseC = 1;
+		static const double FltAttensC[ FltCountC ] = {
+			49.4544, 103.1970, 152.1195, 200.6182, 248.8730, };
+		static const double* const FltPtrsC[ FltCountC ] = {
+			HBKernel_1C, HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, };
+		static const double HBKernel_1D[ 1 ] = { // -61.5357 dB, 48
+			5.0083794231068057e-001,};
+		static const double HBKernel_2D[ 2 ] = { // -127.3167 dB, 48
+			5.6270074379958690e-001, -6.2701174487726344e-002,};
+		static const double HBKernel_3D[ 3 ] = { // -188.2990 dB, 48
+			5.8606296210323228e-001, -9.7844644765123029e-002,
+			1.1781683046528768e-002,};
+		static const double HBKernel_4D[ 4 ] = { // -248.8580 dB, 48
+			5.9823601243162516e-001, -1.1979368994739022e-001,
+			2.4017458606412575e-002, -2.4597810910081913e-003,};
+		static const int FltCountD = 4;
+		static const int FlttBaseD = 1;
+		static const double FltAttensD[ FltCountD ] = {
+			61.5357, 127.3167, 188.2990, 248.8580, };
+		static const double* const FltPtrsD[ FltCountD ] = {
+			HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, };
+		static const double HBKernel_1E[ 1 ] = { // -77.4651 dB, 96
+			5.0013388897382527e-001,};
+		static const double HBKernel_2E[ 2 ] = { // -151.4084 dB, 96
+			5.6255019604317880e-001, -6.2550222932381064e-002,};
+		static const double HBKernel_3E[ 3 ] = { // -224.4365 dB, 96
+			5.8596887234201078e-001, -9.7703321113080305e-002,
+			1.1734448777069783e-002,};
+		static const int FltCountE = 3;
+		static const int FlttBaseE = 1;
+		static const double FltAttensE[ FltCountE ] = {
+			77.4651, 151.4084, 224.4365, };
+		static const double* const FltPtrsE[ FltCountE ] = {
+			HBKernel_1E, HBKernel_2E, HBKernel_3E, };
+		static const double HBKernel_1F[ 1 ] = { // -89.5075 dB, 192
+			5.0003346776264190e-001,};
+		static const double HBKernel_2F[ 2 ] = { // -175.4932 dB, 192
+			5.6251254964097952e-001, -6.2512551321105267e-002,};
+		static const double HBKernel_3F[ 3 ] = { // -260.5645 dB, 192
+			5.8594534336747051e-001, -9.7668015838639821e-002,
+			1.1722672471262996e-002,};
+		static const int FltCountF = 3;
+		static const int FlttBaseF = 1;
+		static const double FltAttensF[ FltCountF ] = {
+			89.5075, 175.4932, 260.5645, };
+		static const double* const FltPtrsF[ FltCountF ] = {
+			HBKernel_1F, HBKernel_2F, HBKernel_3F, };
+		static const double HBKernel_1G[ 1 ] = { // -101.5490 dB, 384
+			5.0000836666064941e-001,};
+		static const double HBKernel_2G[ 2 ] = { // -199.5761 dB, 384
+			5.6250313744943459e-001, -6.2503137554435345e-002,};
+		static const double HBKernel_3G[ 3 ] = { // -296.5185 dB, 384
+			5.8593945786963764e-001, -9.7659186853499613e-002,
+			1.1719728983863425e-002,};
+		static const int FltCountG = 3;
+		static const int FlttBaseG = 1;
+		static const double FltAttensG[ FltCountG ] = {
+			101.5490, 199.5761, 296.5185, };
+		static const double* const FltPtrsG[ FltCountG ] = {
+			HBKernel_1G, HBKernel_2G, HBKernel_3G, };
+
+		int k = 0;
+
+		if( SteepIndex <= 0 )
+		{
+			while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsA[ k ];
+			fltt = FlttBaseA + k;
+			att = FltAttensA[ k ];
+		}
+		else
+		if( SteepIndex == 1 )
+		{
+			while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsB[ k ];
+			fltt = FlttBaseB + k;
+			att = FltAttensB[ k ];
+		}
+		else
+		if( SteepIndex == 2 )
+		{
+			while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsC[ k ];
+			fltt = FlttBaseC + k;
+			att = FltAttensC[ k ];
+		}
+		else
+		if( SteepIndex == 3 )
+		{
+			while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsD[ k ];
+			fltt = FlttBaseD + k;
+			att = FltAttensD[ k ];
+		}
+		else
+		if( SteepIndex == 4 )
+		{
+			while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsE[ k ];
+			fltt = FlttBaseE + k;
+			att = FltAttensE[ k ];
+		}
+		else
+		if( SteepIndex == 5 )
+		{
+			while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsF[ k ];
+			fltt = FlttBaseF + k;
+			att = FltAttensF[ k ];
+		}
+		else
+		{
+			while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten )
+			{
+				k++;
+			}
+
+			flt = FltPtrsG[ k ];
+			fltt = FlttBaseG + k;
+			att = FltAttensG[ k ];
+		}
+	}
+
+	/**
+	 * Constructor initalizes the half-band upsampler.
+	 *
+	 * @param ReqAtten Required half-band filter attentuation.
+	 * @param SteepIndex Steepness index - 0=steepest. Corresponds to general
+	 * upsampling ratio, e.g. at 4x upsampling 0 is used, at 8x upsampling 1
+	 * is used, etc.
+	 * @param IsThird "True" if 1/3 of frequency response resampling is
+	 * performed.
+	 * @param PrevLatency Latency, in samples (any value >=0), which was left
+	 * in the output signal by a previous process. Whole-number latency will
+	 * be consumed by *this object while remaining fractional latency can be
+	 * obtained via the getLatencyFrac() function.
+	 * @param aDoConsumeLatency "True" if the output latency should be
+	 * consumed. Does not apply to the fractional part of the latency (if such
+	 * part is available).
+	 */
+
+	CDSPHBUpsampler( const double ReqAtten, const int SteepIndex,
+		const bool IsThird, const double PrevLatency,
+		const bool aDoConsumeLatency = true )
+		: DoConsumeLatency( aDoConsumeLatency )
+	{
+		static const CConvolveFn FltConvFn[ 14 ] = {
+			&CDSPHBUpsampler :: convolve1, &CDSPHBUpsampler :: convolve2,
+			&CDSPHBUpsampler :: convolve3, &CDSPHBUpsampler :: convolve4,
+			&CDSPHBUpsampler :: convolve5, &CDSPHBUpsampler :: convolve6,
+			&CDSPHBUpsampler :: convolve7, &CDSPHBUpsampler :: convolve8,
+			&CDSPHBUpsampler :: convolve9, &CDSPHBUpsampler :: convolve10,
+			&CDSPHBUpsampler :: convolve11, &CDSPHBUpsampler :: convolve12,
+			&CDSPHBUpsampler :: convolve13, &CDSPHBUpsampler :: convolve14 };
+
+		const double* fltp0;
+		int fltt;
+		double att;
+
+		if( IsThird )
+		{
+			getHBFilterThird( ReqAtten, SteepIndex, fltp0, fltt, att );
+		}
+		else
+		{
+			getHBFilter( ReqAtten, SteepIndex, fltp0, fltt, att );
+		}
+
+		// Copy obtained filter to address-aligned buffer.
+
+		fltp = alignptr( FltBuf, 16 );
+		memcpy( fltp, fltp0, fltt * sizeof( fltp[ 0 ]));
+
+		convfn = FltConvFn[ fltt - 1 ];
+		fll = fltt - 1;
+		fl2 = fltt;
+		flo = fll + fl2;
+		BufRP = Buf + fll;
+
+		LatencyFrac = PrevLatency * 2.0;
+		Latency = (int) LatencyFrac;
+		LatencyFrac -= Latency;
+
+		R8BASSERT( Latency >= 0 );
+
+		if( DoConsumeLatency )
+		{
+			flb = BufLen - fll;
+		}
+		else
+		{
+			Latency += fl2 + fl2;
+			flb = BufLen - flo;
+		}
+
+		R8BCONSOLE( "CDSPHBUpsampler: sti=%i third=%i taps=%i att=%.1f "
+			"io=2/1\n", SteepIndex, (int) IsThird, fltt, att );
+
+		clear();
+	}
+
+	virtual int getLatency() const
+	{
+		return( DoConsumeLatency ? 0 : Latency );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	virtual int getMaxOutLen( const int MaxInLen ) const
+	{
+		R8BASSERT( MaxInLen >= 0 );
+
+		return( MaxInLen << 1 );
+	}
+
+	virtual void clear()
+	{
+		if( DoConsumeLatency )
+		{
+			LatencyLeft = Latency;
+			BufLeft = 0;
+		}
+		else
+		{
+			LatencyLeft = 0;
+			BufLeft = fl2;
+		}
+
+		WritePos = 0;
+		ReadPos = flb; // Set "read" position to account for filter's latency.
+
+		memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ]));
+	}
+
+	virtual int process( double* ip, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+
+		double* op = op0;
+
+		while( l > 0 )
+		{
+			// Add new input samples to both halves of the ring buffer.
+
+			const int b = min( min( l, BufLen - WritePos ), flb - BufLeft );
+
+			double* const wp1 = Buf + WritePos;
+			memcpy( wp1, ip, b * sizeof( wp1[ 0 ]));
+
+			if( WritePos < flo )
+			{
+				const int c = min( b, flo - WritePos );
+				memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ]));
+			}
+
+			ip += b;
+			WritePos = ( WritePos + b ) & BufLenMask;
+			l -= b;
+			BufLeft += b;
+
+			// Produce output.
+
+			if( BufLeft > fl2 )
+			{
+				const int c = BufLeft - fl2;
+
+				double* const opend = op + ( c + c );
+				( *convfn )( op, opend, fltp, BufRP, ReadPos );
+
+				op = opend;
+				ReadPos = ( ReadPos + c ) & BufLenMask;
+				BufLeft -= c;
+			}
+		}
+
+		int ol = (int) ( op - op0 );
+
+		if( LatencyLeft != 0 )
+		{
+			if( LatencyLeft >= ol )
+			{
+				LatencyLeft -= ol;
+				return( 0 );
+			}
+
+			ol -= LatencyLeft;
+			op0 += LatencyLeft;
+			LatencyLeft = 0;
+		}
+
+		return( ol );
+	}
+
+private:
+	static const int BufLenBits = 9; ///< The length of the ring buffer,
+		///< expressed as Nth power of 2. This value can be reduced if it is
+		///< known that only short input buffers will be passed to the
+		///< interpolator. The minimum value of this parameter is 5, and
+		///< 1 << BufLenBits should be at least 3 times larger than the
+		///< FilterLen.
+		///<
+	static const int BufLen = 1 << BufLenBits; ///< The length of the ring
+		///< buffer. The actual length is twice as long to allow "beyond max
+		///< position" positioning.
+		///<
+	static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
+		///< position wrapping.
+		///<
+	double Buf[ BufLen + 27 ]; ///< The ring buffer, including overrun
+		///< protection for the largest filter.
+		///<
+	double FltBuf[ 14 + 2 ]; ///< Holder for half-band filter taps, used with
+		///< 16-byte address-aligning, for SIMD use.
+		///<
+	double* fltp; ///< Half-band filter taps, points to FltBuf.
+		///<
+	int fll; ///< Input latency.
+		///<
+	int fl2; ///< Right-side filter length.
+		///<
+	int flo; ///< Overrrun length.
+		///<
+	int flb; ///< Initial read position and maximal buffer write length.
+		///<
+	const double* BufRP; ///< Offseted Buf pointer at ReadPos=0.
+		///<
+	bool DoConsumeLatency; ///< "True" if the output latency should be
+		///< consumed. Does not apply to the fractional part of the latency
+		///< (if such part is available).
+		///<
+	int Latency; ///< Initial latency that should be removed from the output.
+		///<
+	double LatencyFrac; ///< Fractional latency left on the output.
+		///<
+	int BufLeft; ///< The number of samples left in the buffer to process.
+		///< When this value is below FilterLenD2Plus1, the interpolation
+		///< cycle ends.
+		///<
+	int WritePos; ///< The current buffer write position. Incremented together
+		///< with the BufLeft variable.
+		///<
+	int ReadPos; ///< The current buffer read position.
+		///<
+	int LatencyLeft; ///< Latency left to remove.
+		///<
+	typedef void( *CConvolveFn )( double* op, double* const opend,
+		const double* const flt, const double* const rp0, int rpos ); ///<
+		///< Convolution function type.
+		///<
+	CConvolveFn convfn; ///< Convolution function in use.
+		///<
+
+#define R8BHBC1( fn ) \
+	static void fn( double* op, double* const opend, const double* const flt, \
+		const double* const rp0, int rpos ) \
+	{ \
+		while( op != opend ) \
+		{ \
+			const double* const rp = rp0 + rpos; \
+			op[ 0 ] = rp[ 0 ];
+
+#define R8BHBC2 \
+			rpos = ( rpos + 1 ) & BufLenMask; \
+			op += 2; \
+		} \
+	}
+
+#include "CDSPHBUpsampler.inc"
+
+#undef R8BHBC1
+#undef R8BHBC2
+};
+
+// ---------------------------------------------------------------------------
+
+} // namespace r8b
+
+#endif // R8B_CDSPHBUPSAMPLER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
new file mode 100644
index 00000000..f9431b1e
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc
@@ -0,0 +1,711 @@
+// Auto-generated by `genhbc`, do not edit!
+
+#if defined( R8B_SSE2 )
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve3 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve5 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve7 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve9 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve11 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+R8BHBC1( convolve13 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+__m128d v1, v2, m1, s1;
+v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = m1;
+__m128d v3, v4, m3, s3;
+v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = m3;
+v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 );
+m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ),
+	_mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 )));
+s3 = _mm_add_pd( s3, m3 );
+v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 );
+m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ),
+	_mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 )));
+s1 = _mm_add_pd( s1, m1 );
+s1 = _mm_add_pd( s1, s3 );
+_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 )));
+R8BHBC2
+
+#elif defined( R8B_NEON )
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve3 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve5 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve7 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve9 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve11 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+R8BHBC1( convolve13 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+float64x2_t v1, v2, s1;
+s1 = vdupq_n_f64( 0.0 );
+v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+float64x2_t v3, v4, s3;
+s3 = vdupq_n_f64( 0.0 );
+v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 );
+s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ),
+	vaddq_f64( v3, vextq_f64( v4, v4, 1 )));
+v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 );
+s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ),
+	vaddq_f64( v1, vextq_f64( v2, v2, 1 )));
+s1 = vaddq_f64( s1, s3 );
+op[ 1 ] = vaddvq_f64( s1 );
+R8BHBC2
+
+#else // SIMD
+
+R8BHBC1( convolve1 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]);
+R8BHBC2
+
+R8BHBC1( convolve2 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]);
+R8BHBC2
+
+R8BHBC1( convolve3 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]);
+R8BHBC2
+
+R8BHBC1( convolve4 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]);
+R8BHBC2
+
+R8BHBC1( convolve5 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]);
+R8BHBC2
+
+R8BHBC1( convolve6 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]);
+R8BHBC2
+
+R8BHBC1( convolve7 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]);
+R8BHBC2
+
+R8BHBC1( convolve8 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]);
+R8BHBC2
+
+R8BHBC1( convolve9 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]);
+R8BHBC2
+
+R8BHBC1( convolve10 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]);
+R8BHBC2
+
+R8BHBC1( convolve11 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]);
+R8BHBC2
+
+R8BHBC1( convolve12 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+	+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]);
+R8BHBC2
+
+R8BHBC1( convolve13 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+	+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+	+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]);
+R8BHBC2
+
+R8BHBC1( convolve14 )
+op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ])
+	+ flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ])
+	+ flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ])
+	+ flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ])
+	+ flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ])
+	+ flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ])
+	+ flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ])
+	+ flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ])
+	+ flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ])
+	+ flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ])
+	+ flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ])
+	+ flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ])
+	+ flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ])
+	+ flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]);
+R8BHBC2
+
+#endif // SIMD
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
new file mode 100644
index 00000000..51c7cbc9
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h
@@ -0,0 +1,103 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPProcessor.h
+ *
+ * @brief The base virtual class for DSP processing algorithms.
+ *
+ * This file includes the base virtual class for DSP processing algorithm
+ * classes like FIR filtering and interpolation.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPPROCESSOR_INCLUDED
+#define R8B_CDSPPROCESSOR_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @brief The base virtual class for DSP processing algorithms.
+ *
+ * This class can be used as a base class for various DSP processing
+ * algorithms (processors). DSP processors that are derived from this class
+ * can be seamlessly integrated into various DSP processing graphs.
+ */
+
+class CDSPProcessor : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPProcessor );
+
+public:
+	CDSPProcessor()
+	{
+	}
+
+	virtual ~CDSPProcessor()
+	{
+	}
+
+	/**
+	 * @return The latency, in samples, which is present in the output signal.
+	 * This value is usually zero if the DSP processor "consumes" the latency
+	 * automatically.
+	 */
+
+	virtual int getLatency() const = 0;
+
+	/**
+	 * @return Fractional latency, in samples, which is present in the output
+	 * signal. This value is usually zero if a linear-phase filtering is used.
+	 * With minimum-phase filters in use, this value can be non-zero even if
+	 * the getLatency() function returns zero.
+	 */
+
+	virtual double getLatencyFrac() const = 0;
+
+	/**
+	 * @param MaxInLen The number of samples planned to process at once, at
+	 * most.
+	 * @return The maximal length of the output buffer required when
+	 * processing the "MaxInLen" number of input samples.
+	 */
+
+	virtual int getMaxOutLen( const int MaxInLen ) const = 0;
+
+	/**
+	 * Function clears (resets) the state of *this object and returns it to
+	 * the state after construction. All input data accumulated in the
+	 * internal buffer so far will be discarded.
+	 */
+
+	virtual void clear() = 0;
+
+	/**
+	 * Function performs DSP processing.
+	 *
+	 * @param ip Input data pointer.
+	 * @param l0 How many samples to process.
+	 * @param[out] op0 Output data pointer. The capacity of this buffer should
+	 * be equal to the value returned by the getMaxOutLen() function for the
+	 * given "l0". This buffer can be equal to "ip" only if the
+	 * getMaxOutLen( l0 ) function returned a value lesser than "l0". This
+	 * pointer can be incremented on function's return if latency compensation
+	 * was performed by the processor. Note that on function's return, this
+	 * pointer may point to some internal buffers, including the "ip" buffer,
+	 * ignoring the originally passed value.
+	 * @return The number of output samples written to the "op0" buffer and
+	 * available after processing. This value can be smaller or larger in
+	 * comparison to the original "l0" value due to processing and filter's
+	 * latency compensation that took place, and due to resampling if it was
+	 * performed.
+	 */
+
+	virtual int process( double* ip, int l0, double*& op0 ) = 0;
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPPROCESSOR_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
new file mode 100644
index 00000000..5d88fa33
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h
@@ -0,0 +1,820 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPRealFFT.h
+ *
+ * @brief Real-valued FFT transform class.
+ *
+ * This file includes FFT object implementation. All created FFT objects are
+ * kept in a global list after use, for a future reusal. Such approach
+ * minimizes time necessary to initialize the FFT object of the required
+ * length.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPREALFFT_INCLUDED
+#define R8B_CDSPREALFFT_INCLUDED
+
+#include "r8bbase.h"
+
+#if !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
+	#include "fft4g.h"
+#endif // !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE
+
+#if R8B_PFFFT
+	#include "pffft.h"
+#endif // R8B_PFFFT
+
+#if R8B_PFFFT_DOUBLE
+	#include "pffft_double/pffft_double.h"
+#endif // R8B_PFFFT_DOUBLE
+
+namespace r8b {
+
+/**
+ * @brief Real-valued FFT transform class.
+ *
+ * Class implements a wrapper for real-valued discrete fast Fourier transform
+ * functions. The object of this class can only be obtained via the
+ * CDSPRealFFTKeeper class.
+ *
+ * Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ *
+ * Also uses Intel IPP library functions if available (if the R8B_IPP=1 macro
+ * was defined). Note that IPP library's FFT functions are 2-3 times more
+ * efficient on the modern Intel Core i7-3770K processor than Ooura's
+ * functions. It may be worthwhile investing in IPP. Note, that FFT functions
+ * take less than 20% of the overall sample rate conversion time. However,
+ * when the "power of 2" resampling is used the performance of FFT functions
+ * becomes "everything".
+ */
+
+class CDSPRealFFT : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPRealFFT );
+
+	friend class CDSPRealFFTKeeper;
+
+public:
+	/**
+	 * @return A multiplication constant that should be used after inverse
+	 * transform to obtain a correct value scale.
+	 */
+
+	double getInvMulConst() const
+	{
+		return( InvMulConst );
+	}
+
+	/**
+	 * @return The length (the number of real values in a transform) of *this
+	 * FFT object, expressed as Nth power of 2.
+	 */
+
+	int getLenBits() const
+	{
+		return( LenBits );
+	}
+
+	/**
+	 * @return The length (the number of real values in a transform) of *this
+	 * FFT object.
+	 */
+
+	int getLen() const
+	{
+		return( Len );
+	}
+
+	/**
+	 * Function performs in-place forward FFT.
+	 *
+	 * @param[in,out] p Pointer to data block to transform, length should be
+	 * equal to *this object's getLen().
+	 */
+
+	void forward( double* const p ) const
+	{
+	#if R8B_FLOATFFT
+
+		float* const op = (float*) p;
+		int i;
+
+		for( i = 0; i < Len; i++ )
+		{
+			op[ i ] = (float) p[ i ];
+		}
+
+	#endif // R8B_FLOATFFT
+
+	#if R8B_IPP
+
+		ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer );
+
+	#elif R8B_PFFFT
+
+		pffft_transform_ordered( setup, op, op, work, PFFFT_FORWARD );
+
+	#elif R8B_PFFFT_DOUBLE
+
+		pffftd_transform_ordered( setup, p, p, work, PFFFT_FORWARD );
+
+	#else // R8B_PFFFT_DOUBLE
+
+		ooura_fft :: rdft( Len, 1, p, wi.getPtr(), wd.getPtr() );
+
+	#endif // R8B_IPP
+	}
+
+	/**
+	 * Function performs in-place inverse FFT.
+	 *
+	 * @param[in,out] p Pointer to data block to transform, length should be
+	 * equal to *this object's getLen().
+	 */
+
+	void inverse( double* const p ) const
+	{
+	#if R8B_IPP
+
+		ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer );
+
+	#elif R8B_PFFFT
+
+		pffft_transform_ordered( setup, (float*) p, (float*) p, work,
+			PFFFT_BACKWARD );
+
+	#elif R8B_PFFFT_DOUBLE
+
+		pffftd_transform_ordered( setup, p, p, work, PFFFT_BACKWARD );
+
+	#else // R8B_PFFFT_DOUBLE
+
+		ooura_fft :: rdft( Len, -1, p, wi.getPtr(), wd.getPtr() );
+
+	#endif // R8B_IPP
+
+	#if R8B_FLOATFFT
+
+		const float* const ip = (const float*) p;
+		int i;
+
+		for( i = Len - 1; i >= 0; i-- )
+		{
+			p[ i ] = ip[ i ];
+		}
+
+	#endif // R8B_FLOATFFT
+	}
+
+	/**
+	 * Function multiplies two complex-valued data blocks and places result in
+	 * a new data block. Length of all data blocks should be equal to *this
+	 * object's block length. Input blocks should have been produced with the
+	 * forward() function of *this object.
+	 *
+	 * @param aip1 Input data block 1.
+	 * @param aip2 Input data block 2.
+	 * @param[out] aop Output data block, should not be equal to aip1 nor
+	 * aip2.
+	 */
+
+	void multiplyBlocks( const double* const aip1, const double* const aip2,
+		double* const aop ) const
+	{
+	#if R8B_FLOATFFT
+
+		const float* const ip1 = (const float*) aip1;
+		const float* const ip2 = (const float*) aip2;
+		float* const op = (float*) aop;
+
+	#else // R8B_FLOATFFT
+
+		const double* const ip1 = aip1;
+		const double* const ip2 = aip2;
+		double* const op = aop;
+
+	#endif // R8B_FLOATFFT
+
+	#if R8B_IPP
+
+		ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len );
+
+	#else // R8B_IPP
+
+		op[ 0 ] = ip1[ 0 ] * ip2[ 0 ];
+		op[ 1 ] = ip1[ 1 ] * ip2[ 1 ];
+
+		int i = 2;
+
+		while( i < Len )
+		{
+			op[ i ] = ip1[ i ] * ip2[ i ] - ip1[ i + 1 ] * ip2[ i + 1 ];
+			op[ i + 1 ] = ip1[ i ] * ip2[ i + 1 ] + ip1[ i + 1 ] * ip2[ i ];
+			i += 2;
+		}
+
+	#endif // R8B_IPP
+	}
+
+	/**
+	 * Function multiplies two complex-valued data blocks in-place. Length of
+	 * both data blocks should be equal to *this object's block length. Blocks
+	 * should have been produced with the forward() function of *this object.
+	 *
+	 * @param aip Input data block 1.
+	 * @param[in,out] aop Output/input data block 2.
+	 */
+
+	void multiplyBlocks( const double* const aip, double* const aop ) const
+	{
+	#if R8B_FLOATFFT
+
+		const float* const ip = (const float*) aip;
+		float* const op = (float*) aop;
+		float t;
+
+	#else // R8B_FLOATFFT
+
+		const double* const ip = aip;
+		double* const op = aop;
+
+		#if !R8B_IPP
+		double t;
+		#endif // !R8B_IPP
+
+	#endif // R8B_FLOATFFT
+
+	#if R8B_IPP
+
+		ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len );
+
+	#else // R8B_IPP
+
+		op[ 0 ] *= ip[ 0 ];
+		op[ 1 ] *= ip[ 1 ];
+
+		int i = 2;
+
+		while( i < Len )
+		{
+			t = op[ i ] * ip[ i ] - op[ i + 1 ] * ip[ i + 1 ];
+			op[ i + 1 ] = op[ i ] * ip[ i + 1 ] + op[ i + 1 ] * ip[ i ];
+			op[ i ] = t;
+			i += 2;
+		}
+
+	#endif // R8B_IPP
+	}
+
+	/**
+	 * Function multiplies two complex-valued data blocks in-place,
+	 * considering that the "ip" block contains "zero-phase" response. Length
+	 * of both data blocks should be equal to *this object's block length.
+	 * Blocks should have been produced with the forward() function of *this
+	 * object.
+	 *
+	 * @param aip Input data block 1, "zero-phase" response. This block should
+	 * be first transformed via the convertToZP() function.
+	 * @param[in,out] aop Output/input data block 2.
+	 */
+
+	void multiplyBlocksZP( const double* const aip, double* const aop ) const
+	{
+	#if R8B_FLOATFFT
+
+		const float* const ip = (const float*) aip;
+		float* const op = (float*) aop;
+
+	#else // R8B_FLOATFFT
+
+		const double* ip = aip;
+		double* op = aop;
+
+	#endif // R8B_FLOATFFT
+
+	// SIMD implementations assume that pointers are address-aligned.
+
+	#if !R8B_FLOATFFT && defined( R8B_SSE2 )
+
+		int c8 = Len >> 3;
+
+		while( c8 != 0 )
+		{
+			const __m128d iv1 = _mm_load_pd( ip );
+			const __m128d iv2 = _mm_load_pd( ip + 2 );
+			const __m128d ov1 = _mm_load_pd( op );
+			const __m128d ov2 = _mm_load_pd( op + 2 );
+			_mm_store_pd( op, _mm_mul_pd( iv1, ov1 ));
+			_mm_store_pd( op + 2, _mm_mul_pd( iv2, ov2 ));
+
+			const __m128d iv3 = _mm_load_pd( ip + 4 );
+			const __m128d ov3 = _mm_load_pd( op + 4 );
+			const __m128d iv4 = _mm_load_pd( ip + 6 );
+			const __m128d ov4 = _mm_load_pd( op + 6 );
+			_mm_store_pd( op + 4, _mm_mul_pd( iv3, ov3 ));
+			_mm_store_pd( op + 6, _mm_mul_pd( iv4, ov4 ));
+
+			ip += 8;
+			op += 8;
+			c8--;
+		}
+
+		int c = Len & 7;
+
+		while( c != 0 )
+		{
+			*op *= *ip;
+			ip++;
+			op++;
+			c--;
+		}
+
+	#elif !R8B_FLOATFFT && defined( R8B_NEON )
+
+		int c8 = Len >> 3;
+
+		while( c8 != 0 )
+		{
+			const float64x2_t iv1 = vld1q_f64( ip );
+			const float64x2_t iv2 = vld1q_f64( ip + 2 );
+			const float64x2_t ov1 = vld1q_f64( op );
+			const float64x2_t ov2 = vld1q_f64( op + 2 );
+			vst1q_f64( op, vmulq_f64( iv1, ov1 ));
+			vst1q_f64( op + 2, vmulq_f64( iv2, ov2 ));
+
+			const float64x2_t iv3 = vld1q_f64( ip + 4 );
+			const float64x2_t iv4 = vld1q_f64( ip + 6 );
+			const float64x2_t ov3 = vld1q_f64( op + 4 );
+			const float64x2_t ov4 = vld1q_f64( op + 6 );
+			vst1q_f64( op + 4, vmulq_f64( iv3, ov3 ));
+			vst1q_f64( op + 6, vmulq_f64( iv4, ov4 ));
+
+			ip += 8;
+			op += 8;
+			c8--;
+		}
+
+		int c = Len & 7;
+
+		while( c != 0 )
+		{
+			*op *= *ip;
+			ip++;
+			op++;
+			c--;
+		}
+
+	#else // SIMD
+
+		int i;
+
+		for( i = 0; i < Len; i++ )
+		{
+			op[ i ] *= ip[ i ];
+		}
+
+	#endif // SIMD
+	}
+
+	/**
+	 * Function converts the specified forward-transformed block into
+	 * "zero-phase" form suitable for use with the multiplyBlocksZ() function.
+	 *
+	 * @param[in,out] ap Block to transform.
+	 */
+
+	void convertToZP( double* const ap ) const
+	{
+	#if R8B_FLOATFFT
+
+		float* const p = (float*) ap;
+
+	#else // R8B_FLOATFFT
+
+		double* const p = ap;
+
+	#endif // R8B_FLOATFFT
+
+		int i = 2;
+
+		while( i < Len )
+		{
+			p[ i + 1 ] = p[ i ];
+			i += 2;
+		}
+	}
+
+private:
+	int LenBits; ///< Length of FFT block (expressed as Nth power of 2).
+		///<
+	int Len; ///< Length of FFT block (number of real values).
+		///<
+	double InvMulConst; ///< Inverse FFT multiply constant.
+		///<
+	CDSPRealFFT* Next; ///< Next object in a singly-linked list.
+		///<
+
+	#if R8B_IPP
+		IppsFFTSpec_R_64f* SPtr; ///< Pointer to initialized data buffer
+			///< to be passed to IPP's FFT functions.
+			///<
+		CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer.
+			///<
+		CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer.
+			///<
+	#elif R8B_PFFFT
+		PFFFT_Setup* setup; ///< PFFFT setup object.
+			///<
+		CFixedBuffer< float > work; ///< Working buffer.
+			///<
+	#elif R8B_PFFFT_DOUBLE
+		PFFFTD_Setup* setup; ///< PFFFTD setup object.
+			///<
+		CFixedBuffer< double > work; ///< Working buffer.
+			///<
+	#else // R8B_PFFFT_DOUBLE
+		CFixedBuffer< int > wi; ///< Working buffer (ints).
+			///<
+		CFixedBuffer< double > wd; ///< Working buffer (doubles).
+			///<
+	#endif // R8B_IPP
+
+	/**
+	 * A simple class that keeps the pointer to the object and deletes it
+	 * automatically.
+	 */
+
+	class CObjKeeper
+	{
+		R8BNOCTOR( CObjKeeper );
+
+	public:
+		CObjKeeper()
+			: Object( NULL )
+		{
+		}
+
+		~CObjKeeper()
+		{
+			delete Object;
+		}
+
+		CObjKeeper& operator = ( CDSPRealFFT* const aObject )
+		{
+			Object = aObject;
+			return( *this );
+		}
+
+		operator CDSPRealFFT* () const
+		{
+			return( Object );
+		}
+
+	private:
+		CDSPRealFFT* Object; ///< FFT object being kept.
+			///<
+	};
+
+	CDSPRealFFT()
+	{
+	}
+
+	/**
+	 * Constructor initializes FFT object.
+	 *
+	 * @param aLenBits The length of FFT block (Nth power of 2), specifies the
+	 * number of real values in a block. Values from 1 to 30 inclusive are
+	 * supported.
+	 */
+
+	CDSPRealFFT( const int aLenBits )
+		: LenBits( aLenBits )
+		, Len( 1 << aLenBits )
+	#if R8B_IPP
+		, InvMulConst( 1.0 / Len )
+	#elif R8B_PFFFT
+		, InvMulConst( 1.0 / Len )
+	#elif R8B_PFFFT_DOUBLE
+		, InvMulConst( 1.0 / Len )
+	#else // R8B_PFFFT_DOUBLE
+		, InvMulConst( 2.0 / Len )
+	#endif // R8B_IPP
+	{
+	#if R8B_IPP
+
+		int SpecSize;
+		int SpecBufferSize;
+		int BufferSize;
+
+		ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY,
+			ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize );
+
+		CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize );
+		SpecBuffer.alloc( SpecSize );
+		WorkBuffer.alloc( BufferSize );
+
+		ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY,
+			ippAlgHintFast, SpecBuffer, InitBuffer );
+
+	#elif R8B_PFFFT
+
+		setup = pffft_new_setup( Len, PFFFT_REAL );
+		work.alloc( Len );
+
+	#elif R8B_PFFFT_DOUBLE
+
+		setup = pffftd_new_setup( Len, PFFFT_REAL );
+		work.alloc( Len );
+
+	#else // R8B_PFFFT_DOUBLE
+
+		wi.alloc( (int) ceil( 2.0 + sqrt( (double) ( Len >> 1 ))));
+		wi[ 0 ] = 0;
+		wd.alloc( Len >> 1 );
+
+	#endif // R8B_IPP
+	}
+
+	~CDSPRealFFT()
+	{
+		#if R8B_PFFFT
+			pffft_destroy_setup( setup );
+		#elif R8B_PFFFT_DOUBLE
+			pffftd_destroy_setup( setup );
+		#endif // R8B_PFFFT_DOUBLE
+
+		delete Next;
+	}
+};
+
+/**
+ * @brief A "keeper" class for real-valued FFT transform objects.
+ *
+ * Class implements "keeper" functionality for handling CDSPRealFFT objects.
+ * The allocated FFT objects are placed on the global static list of objects
+ * for future reuse instead of deallocation.
+ */
+
+class CDSPRealFFTKeeper : public R8B_BASECLASS
+{
+	R8BNOCTOR( CDSPRealFFTKeeper );
+
+public:
+	CDSPRealFFTKeeper()
+		: Object( NULL )
+	{
+	}
+
+	/**
+	 * Function acquires FFT object with the specified block length.
+	 *
+	 * @param LenBits The length of FFT block (Nth power of 2), in the range
+	 * [1; 30] inclusive, specifies the number of real values in a FFT block.
+	 */
+
+	CDSPRealFFTKeeper( const int LenBits )
+	{
+		Object = acquire( LenBits );
+	}
+
+	~CDSPRealFFTKeeper()
+	{
+		if( Object != NULL )
+		{
+			release( Object );
+		}
+	}
+
+	/**
+	 * @return Pointer to the acquired FFT object.
+	 */
+
+	const CDSPRealFFT* operator -> () const
+	{
+		R8BASSERT( Object != NULL );
+
+		return( Object );
+	}
+
+	/**
+	 * Function acquires FFT object with the specified block length. This
+	 * function can be called any number of times.
+	 *
+	 * @param LenBits The length of FFT block (Nth power of 2), in the range
+	 * [1; 30] inclusive, specifies the number of real values in a FFT block.
+	 */
+
+	void init( const int LenBits )
+	{
+		if( Object != NULL )
+		{
+			if( Object -> LenBits == LenBits )
+			{
+				return;
+			}
+
+			release( Object );
+		}
+
+		Object = acquire( LenBits );
+	}
+
+	/**
+	 * Function releases a previously acquired FFT object.
+	 */
+
+	void reset()
+	{
+		if( Object != NULL )
+		{
+			release( Object );
+			Object = NULL;
+		}
+	}
+
+private:
+	CDSPRealFFT* Object; ///< FFT object.
+		///<
+
+	static CSyncObject StateSync; ///< FFTObjects synchronizer.
+		///<
+	static CDSPRealFFT :: CObjKeeper FFTObjects[]; ///< Pool of FFT objects of
+		///< various lengths.
+		///<
+
+	/**
+	 * Function acquires FFT object from the global pool.
+	 *
+	 * @param LenBits FFT block length (expressed as Nth power of 2).
+	 */
+
+	CDSPRealFFT* acquire( const int LenBits )
+	{
+		R8BASSERT( LenBits > 0 && LenBits <= 30 );
+
+		R8BSYNC( StateSync );
+
+		if( FFTObjects[ LenBits ] == NULL )
+		{
+			return( new CDSPRealFFT( LenBits ));
+		}
+
+		CDSPRealFFT* ffto = FFTObjects[ LenBits ];
+		FFTObjects[ LenBits ] = ffto -> Next;
+
+		return( ffto );
+	}
+
+	/**
+	 * Function releases a previously acquired FFT object.
+	 *
+	 * @param ffto FFT object to release.
+	 */
+
+	void release( CDSPRealFFT* const ffto )
+	{
+		R8BSYNC( StateSync );
+
+		ffto -> Next = FFTObjects[ ffto -> LenBits ];
+		FFTObjects[ ffto -> LenBits ] = ffto;
+	}
+};
+
+/**
+ * Function calculates the minimum-phase transform of the filter kernel, using
+ * a discrete Hilbert transform in cepstrum domain.
+ *
+ * For more details, see part III.B of
+ * http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf
+ *
+ * @param[in,out] Kernel Filter kernel buffer.
+ * @param KernelLen Filter kernel's length, in samples.
+ * @param LenMult Kernel length multiplier. Used as a coefficient of the
+ * "oversampling" in the frequency domain. Such oversampling is needed to
+ * improve the precision of the minimum-phase transform. If the filter's
+ * attenuation is high, this multiplier should be increased or otherwise the
+ * required attenuation will not be reached due to "smoothing" effect of this
+ * transform.
+ * @param DoFinalMul "True" if the final multiplication after transform should
+ * be performed or not. Such multiplication returns the gain of the signal to
+ * its original value. This parameter can be set to "false" if normalization
+ * of the resulting filter kernel is planned to be used.
+ * @param[out] DCGroupDelay If not NULL, this variable receives group delay
+ * at DC offset, in samples (can be a non-integer value).
+ */
+
+inline void calcMinPhaseTransform( double* const Kernel, const int KernelLen,
+	const int LenMult = 2, const bool DoFinalMul = true,
+	double* const DCGroupDelay = NULL )
+{
+	R8BASSERT( KernelLen > 0 );
+	R8BASSERT( LenMult >= 2 );
+
+	const int LenBits = getBitOccupancy(( KernelLen * LenMult ) - 1 );
+	const int Len = 1 << LenBits;
+	const int Len2 = Len >> 1;
+	int i;
+
+	CFixedBuffer< double > ip( Len );
+	CFixedBuffer< double > ip2( Len2 + 1 );
+
+	memcpy( &ip[ 0 ], Kernel, KernelLen * sizeof( ip[ 0 ]));
+	memset( &ip[ KernelLen ], 0, ( Len - KernelLen ) * sizeof( ip[ 0 ]));
+
+	CDSPRealFFTKeeper ffto( LenBits );
+	ffto -> forward( ip );
+
+	// Create the "log |c|" spectrum while saving the original power spectrum
+	// in the "ip2" buffer.
+
+	#if R8B_FLOATFFT
+		float* const aip = (float*) &ip[ 0 ];
+		float* const aip2 = (float*) &ip2[ 0 ];
+		const float nzbias = 1e-35;
+	#else // R8B_FLOATFFT
+		double* const aip = &ip[ 0 ];
+		double* const aip2 = &ip2[ 0 ];
+		const double nzbias = 1e-300;
+	#endif // R8B_FLOATFFT
+
+	aip2[ 0 ] = aip[ 0 ];
+	aip[ 0 ] = log( fabs( aip[ 0 ]) + nzbias );
+	aip2[ Len2 ] = aip[ 1 ];
+	aip[ 1 ] = log( fabs( aip[ 1 ]) + nzbias );
+
+	for( i = 1; i < Len2; i++ )
+	{
+		aip2[ i ] = sqrt( aip[ i * 2 ] * aip[ i * 2 ] +
+			aip[ i * 2 + 1 ] * aip[ i * 2 + 1 ]);
+
+		aip[ i * 2 ] = log( aip2[ i ] + nzbias );
+		aip[ i * 2 + 1 ] = 0.0;
+	}
+
+	// Convert to cepstrum and apply discrete Hilbert transform.
+
+	ffto -> inverse( ip );
+
+	const double m1 = ffto -> getInvMulConst();
+	const double m2 = -m1;
+
+	ip[ 0 ] = 0.0;
+
+	for( i = 1; i < Len2; i++ )
+	{
+		ip[ i ] *= m1;
+	}
+
+	ip[ Len2 ] = 0.0;
+
+	for( i = Len2 + 1; i < Len; i++ )
+	{
+		ip[ i ] *= m2;
+	}
+
+	// Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and
+	// perform its exponentiation, multiplied by the power spectrum previously
+	// saved in the "ip2" buffer.
+
+	ffto -> forward( ip );
+
+	aip[ 0 ] = aip2[ 0 ];
+	aip[ 1 ] = aip2[ Len2 ];
+
+	for( i = 1; i < Len2; i++ )
+	{
+		aip[ i * 2 + 0 ] = cos( aip[ i * 2 + 1 ]) * aip2[ i ];
+		aip[ i * 2 + 1 ] = sin( aip[ i * 2 + 1 ]) * aip2[ i ];
+	}
+
+	ffto -> inverse( ip );
+
+	if( DoFinalMul )
+	{
+		for( i = 0; i < KernelLen; i++ )
+		{
+			Kernel[ i ] = ip[ i ] * m1;
+		}
+	}
+	else
+	{
+		memcpy( &Kernel[ 0 ], &ip[ 0 ], KernelLen * sizeof( Kernel[ 0 ]));
+	}
+
+	if( DCGroupDelay != NULL )
+	{
+		double tmp;
+
+		calcFIRFilterResponseAndGroupDelay( Kernel, KernelLen, 0.0,
+			tmp, tmp, *DCGroupDelay );
+	}
+}
+
+} // namespace r8b
+
+#endif // VOX_CDSPREALFFT_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
new file mode 100644
index 00000000..9eb4a43d
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h
@@ -0,0 +1,769 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPResampler.h
+ *
+ * @brief The master sample rate converter (resampler) class.
+ *
+ * This file includes the master sample rate converter (resampler) class that
+ * combines all elements of this library into a single front-end class.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPRESAMPLER_INCLUDED
+#define R8B_CDSPRESAMPLER_INCLUDED
+
+#include "CDSPHBDownsampler.h"
+#include "CDSPHBUpsampler.h"
+#include "CDSPBlockConvolver.h"
+#include "CDSPFracInterpolator.h"
+
+namespace r8b {
+
+/**
+ * @brief The master sample rate converter (resampler) class.
+ *
+ * This class can be considered the "master" sample rate converter (resampler)
+ * class since it combines all functionality of this library into a single
+ * front-end class to perform sample rate conversion to/from any sample rate,
+ * including non-integer sample rates.
+ *
+ * Note that objects of this class can be constructed on the stack as it has a
+ * small member data size. The default template parameters of this class are
+ * suited for 27-bit fixed point resampling.
+ *
+ * Use the CDSPResampler16 class for 16-bit resampling.
+ *
+ * Use the CDSPResampler16IR class for 16-bit impulse response resampling.
+ *
+ * Use the CDSPResampler24 class for 24-bit resampling (including 32-bit
+ * floating point resampling).
+ */
+
+class CDSPResampler : public CDSPProcessor
+{
+public:
+	/**
+	 * Constructor initalizes the resampler object.
+	 *
+	 * Note that increasing the transition band and decreasing attenuation
+	 * reduces the filter length, this in turn reduces the "input before
+	 * output" delay. However, the filter length has only a minor influence on
+	 * the overall resampling speed.
+	 *
+	 * It should be noted that the ReqAtten specifies the minimal difference
+	 * between the loudest input signal component and the produced aliasing
+	 * artifacts during resampling. For example, if ReqAtten=100 was specified
+	 * when performing 2x upsampling, the analysis of the resulting signal may
+	 * display high-frequency components which are quieter than the loudest
+	 * part of the input signal by only 100 decibel meaning the high-frequency
+	 * part did not become "magically" completely silent after resampling. You
+	 * have to specify a higher ReqAtten value if you need a totally clean
+	 * high-frequency content. On the other hand, it may not be reasonable to
+	 * have a high-frequency content cleaner than the input signal itself: if
+	 * the input signal is 16-bit, setting ReqAtten to 180 will make its
+	 * high-frequency content 24-bit, but the original part of the signal will
+	 * remain 16-bit.
+	 *
+	 * @param SrcSampleRate Source signal sample rate. Both sample rates can
+	 * be specified as a ratio, e.g. SrcSampleRate = 1.0, DstSampleRate = 2.0.
+	 * @param DstSampleRate Destination signal sample rate. The "power of 2"
+	 * ratios between the source and destination sample rates force resampler
+	 * to use several fast "power of 2" resampling steps, without using
+	 * fractional interpolation at all.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler. The resampler relies on
+	 * this value as it allocates intermediate buffers. Input buffers longer
+	 * than this value should never be supplied to the resampler. Note that
+	 * upsampling produces more samples than was provided on input, so at
+	 * higher upsampling ratios it is advisable to use smaller MaxInLen
+	 * values to reduce memory footprint. When downsampling, a larger MaxInLen
+	 * is suggested in order to increase downsampling performance.
+	 * @param ReqTransBand Required transition band, in percent of the
+	 * spectral space of the input signal (or the output signal if
+	 * downsampling is performed) between filter's -3 dB point and the Nyquist
+	 * frequency. The range is from CDSPFIRFilter::getLPMinTransBand() to
+	 * CDSPFIRFilter::getLPMaxTransBand(), inclusive. When upsampling 88200 or
+	 * 96000 audio to a higher sample rates the ReqTransBand can be
+	 * considerably increased, up to 30. The selection of ReqTransBand depends
+	 * on the level of desire to preserve the high-frequency content. While
+	 * values 0.5 to 2 are extremely "greedy" settings, not necessary in most
+	 * cases, values 2 to 3 can be used in most cases. Values 3 to 4 are
+	 * relaxed settings, but they still offer a flat frequency response up to
+	 * 21kHz with 44.1k source or destination sample rate.
+	 * @param ReqAtten Required stop-band attenuation in decibel, in the
+	 * range CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(),
+	 * inclusive. The actual attenuation may be 0.40-4.46 dB higher. The
+	 * general formula for selecting the ReqAtten is 6.02 * Bits + 40, where
+	 * "Bits" is the bit resolution (e.g. 16, 24), "40" is an added resolution
+	 * for dynamic signals; this value can be decreased to 20 to 10 if the
+	 * signal being resampled is non-dynamic (e.g., an impulse response or
+	 * filter, with a non-steep frequency response).
+	 * @param ReqPhase Required filter's phase response. Note that this
+	 * setting does not affect interpolator's phase response which is always
+	 * linear-phase. Also note that if the "power of 2" resampling was engaged
+	 * by the resampler together with the minimum-phase response, the audio
+	 * stream may become fractionally delayed, depending on the minimum-phase
+	 * filter's actual fractional delay. Linear-phase filters do not have
+	 * fractional delay.
+	 * @see CDSPFIRFilterCache::getLPFilter()
+	 */
+
+	CDSPResampler( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0,
+		const double ReqAtten = 206.91,
+		const EDSPFilterPhaseResponse ReqPhase = fprLinearPhase )
+		: StepCapacity( 0 )
+		, StepCount( 0 )
+		, MaxInLen( aMaxInLen )
+		, CurMaxOutLen( aMaxInLen )
+		, LatencyFrac( 0.0 )
+	{
+		R8BASSERT( SrcSampleRate > 0.0 );
+		R8BASSERT( DstSampleRate > 0.0 );
+		R8BASSERT( MaxInLen > 0 );
+
+		R8BCONSOLE( "* CDSPResampler: src=%.1f dst=%.1f len=%i tb=%.1f "
+			"att=%.2f ph=%i\n", SrcSampleRate, DstSampleRate, aMaxInLen,
+			ReqTransBand, ReqAtten, (int) ReqPhase );
+
+		if( SrcSampleRate == DstSampleRate )
+		{
+			return;
+		}
+
+		TmpBufCapacities[ 0 ] = 0;
+		TmpBufCapacities[ 1 ] = 0;
+		CurTmpBuf = 0;
+
+		// Try some common efficient ratios requiring only a single step.
+
+		const int CommonRatioCount = 5;
+		const int CommonRatios[ CommonRatioCount ][ 2 ] = {
+			{ 1, 2 },
+			{ 1, 3 },
+			{ 2, 3 },
+			{ 3, 2 },
+			{ 3, 4 }
+		};
+
+		int i;
+
+		for( i = 0; i < CommonRatioCount; i++ )
+		{
+			const int num = CommonRatios[ i ][ 0 ];
+			const int den = CommonRatios[ i ][ 1 ];
+
+			if( SrcSampleRate * num == DstSampleRate * den )
+			{
+				addProcessor( new CDSPBlockConvolver(
+					CDSPFIRFilterCache :: getLPFilter(
+					1.0 / ( num > den ? num : den ), ReqTransBand,
+					ReqAtten, ReqPhase, num ), num, den, LatencyFrac ));
+
+				createTmpBuffers();
+				return;
+			}
+		}
+
+		// Try whole-number power-of-2 or 3*power-of-2 upsampling.
+
+		for( i = 2; i <= 3; i++ )
+		{
+			bool WasFound = false;
+			int c = 0;
+
+			while( true )
+			{
+				const double NewSR = SrcSampleRate * ( i << c );
+
+				if( NewSR == DstSampleRate )
+				{
+					WasFound = true;
+					break;
+				}
+
+				if( NewSR > DstSampleRate )
+				{
+					break;
+				}
+
+				c++;
+			}
+
+			if( WasFound )
+			{
+				addProcessor( new CDSPBlockConvolver(
+					CDSPFIRFilterCache :: getLPFilter( 1.0 / i, ReqTransBand,
+					ReqAtten, ReqPhase, i ), i, 1, LatencyFrac ));
+
+				const bool IsThird = ( i == 3 );
+
+				for( i = 0; i < c; i++ )
+				{
+					addProcessor( new CDSPHBUpsampler( ReqAtten, i, IsThird,
+						LatencyFrac ));
+				}
+
+				createTmpBuffers();
+				return;
+			}
+		}
+
+		if( DstSampleRate * 2.0 > SrcSampleRate )
+		{
+			// Upsampling or fractional downsampling down to 2X.
+
+			const double NormFreq = ( DstSampleRate > SrcSampleRate ? 0.5 :
+				0.5 * DstSampleRate / SrcSampleRate );
+
+			addProcessor( new CDSPBlockConvolver(
+				CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
+				ReqAtten, ReqPhase, 2.0 ), 2, 1, LatencyFrac ));
+
+			// Try intermediate interpolated resampling with subsequent 2X
+			// or 3X upsampling.
+
+			const double tbw = 0.0175; // Intermediate filter's transition
+				// band extension coefficient.
+			const double ThreshSampleRate = SrcSampleRate /
+				( 1.0 - tbw * ReqTransBand ); // Make sure intermediate
+				// filter's transition band is not steeper than ReqTransBand
+				// (this keeps the latency under control).
+
+			int c = 0;
+			int div = 1;
+
+			while( true )
+			{
+				const int ndiv = div * 2;
+
+				if( DstSampleRate < ThreshSampleRate * ndiv )
+				{
+					break;
+				}
+
+				div = ndiv;
+				c++;
+			}
+
+			int c2 = 0;
+			int div2 = 1;
+
+			while( true )
+			{
+				const int ndiv = div * ( c2 == 0 ? 3 : 2 );
+
+				if( DstSampleRate < ThreshSampleRate * ndiv )
+				{
+					break;
+				}
+
+				div2 = ndiv;
+				c2++;
+			}
+
+			const double SrcSampleRate2 = SrcSampleRate * 2.0;
+			int tmp1;
+			int tmp2;
+
+			if( c == 1 && getWholeStepping( SrcSampleRate2, DstSampleRate,
+				tmp1, tmp2 ))
+			{
+				// Do not use intermediate interpolation if whole stepping is
+				// available as it performs very fast.
+
+				c = 0;
+			}
+
+			if( c > 0 )
+			{
+				// Add steps using intermediate interpolation.
+
+				int num;
+
+				if( c2 > 0 && div2 > div )
+				{
+					div = div2;
+					c = c2;
+					num = 3;
+				}
+				else
+				{
+					num = 2;
+				}
+
+				addProcessor( new CDSPFracInterpolator( SrcSampleRate2 * div,
+					DstSampleRate, ReqAtten, false, LatencyFrac ));
+
+				double tb = ( 1.0 - SrcSampleRate * div / DstSampleRate ) /
+					tbw; // Divide TransBand by a constant that assures a
+					// linear response in the pass-band.
+
+				if( tb > CDSPFIRFilter :: getLPMaxTransBand() )
+				{
+					tb = CDSPFIRFilter :: getLPMaxTransBand();
+				}
+
+				addProcessor( new CDSPBlockConvolver(
+					CDSPFIRFilterCache :: getLPFilter( 1.0 / num, tb,
+					ReqAtten, ReqPhase, num ), num, 1, LatencyFrac ));
+
+				const bool IsThird = ( num == 3 );
+
+				for( i = 1; i < c; i++ )
+				{
+					addProcessor( new CDSPHBUpsampler( ReqAtten, i - 1,
+						IsThird, LatencyFrac ));
+				}
+			}
+			else
+			{
+				addProcessor( new CDSPFracInterpolator( SrcSampleRate2,
+					DstSampleRate, ReqAtten, false, LatencyFrac ));
+			}
+
+			createTmpBuffers();
+			return;
+		}
+
+		// Use downsampling steps, including power-of-2 downsampling.
+
+		double CheckSR = DstSampleRate * 4.0;
+		int c = 0;
+		double FinGain = 1.0;
+
+		while( CheckSR <= SrcSampleRate )
+		{
+			c++;
+			CheckSR *= 2.0;
+			FinGain *= 0.5;
+		}
+
+		const int SrcSRDiv = ( 1 << c );
+		int downf;
+		double NormFreq = 0.5;
+		bool UseInterp = true;
+		bool IsThird = false;
+
+		for( downf = 2; downf <= 3; downf++ )
+		{
+			if( DstSampleRate * SrcSRDiv * downf == SrcSampleRate )
+			{
+				NormFreq = 1.0 / downf;
+				UseInterp = false;
+				IsThird = ( downf == 3 );
+				break;
+			}
+		}
+
+		if( UseInterp )
+		{
+			downf = 1;
+			NormFreq = DstSampleRate * SrcSRDiv / SrcSampleRate;
+			IsThird = ( NormFreq * 3.0 <= 1.0 );
+		}
+
+		for( i = 0; i < c; i++ )
+		{
+			// Use a fixed very relaxed 2X downsampling filters, that at
+			// the final stage only guarantees stop-band between 0.75 and
+			// pi. 0.5-0.75 range will be aliased to 0.25-0.5 range which
+			// will then be filtered out by the final filter.
+
+			addProcessor( new CDSPHBDownsampler( ReqAtten, c - 1 - i, IsThird,
+				LatencyFrac ));
+		}
+
+		addProcessor( new CDSPBlockConvolver(
+			CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand,
+			ReqAtten, ReqPhase, FinGain ), 1, downf, LatencyFrac ));
+
+		if( UseInterp )
+		{
+			addProcessor( new CDSPFracInterpolator( SrcSampleRate,
+				DstSampleRate * SrcSRDiv, ReqAtten, IsThird, LatencyFrac ));
+		}
+
+		createTmpBuffers();
+	}
+
+	virtual ~CDSPResampler()
+	{
+		int i;
+
+		for( i = 0; i < StepCount; i++ )
+		{
+			delete Steps[ i ];
+		}
+	}
+
+	virtual int getLatency() const
+	{
+		return( 0 );
+	}
+
+	virtual double getLatencyFrac() const
+	{
+		return( LatencyFrac );
+	}
+
+	/**
+	 * This function ignores the supplied parameter and returns the maximal
+	 * output buffer length that depends on the MaxInLen supplied to the
+	 * constructor.
+	 */
+
+	virtual int getMaxOutLen( const int/* MaxInLen */ ) const
+	{
+		return( CurMaxOutLen );
+	}
+
+	/**
+	 * Function clears (resets) the state of *this object and returns it to
+	 * the state after construction. All input data accumulated in the
+	 * internal buffer so far will be discarded.
+	 *
+	 * This function makes it possible to use *this object for converting
+	 * separate streams from the same source sample rate to the same
+	 * destination sample rate without reconstructing the object. It is more
+	 * efficient to clear the state of the resampler object than to destroy it
+	 * and create a new object.
+	 */
+
+	virtual void clear()
+	{
+		int i;
+
+		for( i = 0; i < StepCount; i++ )
+		{
+			Steps[ i ] -> clear();
+		}
+	}
+
+	/**
+	 * Function performs sample rate conversion.
+	 *
+	 * If the source and destination sample rates are equal, the resampler
+	 * will do nothing and will simply return the input buffer unchanged.
+	 *
+	 * You do not need to allocate an intermediate output buffer for use with
+	 * this function. If required, the resampler will allocate a suitable
+	 * intermediate output buffer itself.
+	 *
+	 * @param ip0 Input buffer. This buffer is never used as output buffer by
+	 * this function. This pointer may be returned in "op0" if no resampling
+	 * is happening (source sample rate equals destination sample rate).
+	 * @param l The number of samples available in the input buffer. Should
+	 * not exceed the MaxInLen supplied in the constructor.
+	 * @param[out] op0 This variable receives the pointer to the resampled
+	 * data. On function's return, this pointer points to *this object's
+	 * internal buffer. In real-time applications it is suggested to pass this
+	 * pointer to the next output audio block and consume any data left from
+	 * the previous output audio block first before calling the process()
+	 * function again. The buffer pointed to by the "op0" on return is owned
+	 * by the resampler, so it should not be freed by the caller.
+	 * @return The number of samples available in the "op0" output buffer. If
+	 * the data from the output buffer "op0" is going to be written to a
+	 * bigger output buffer, it is suggested to check the returned number of
+	 * samples so that no overflow of the bigger output buffer happens.
+	 */
+
+	virtual int process( double* ip0, int l, double*& op0 )
+	{
+		R8BASSERT( l >= 0 );
+
+		double* ip = ip0;
+		int i;
+
+		for( i = 0; i < StepCount; i++ )
+		{
+			double* op = TmpBufs[ i & 1 ];
+			l = Steps[ i ] -> process( ip, l, op );
+			ip = op;
+		}
+
+		op0 = ip;
+		return( l );
+	}
+
+	/**
+	 * Function performs resampling of an input sample buffer of the specified
+	 * length in the "one-shot" mode. This function can be useful when impulse
+	 * response resampling is required.
+	 *
+	 * @param ip Input buffer pointer.
+	 * @param iplen Length of the input buffer in samples.
+	 * @param[out] op Output buffer pointer.
+	 * @param oplen Length of the output buffer in samples.
+	 * @tparam Tin Input buffer's element type.
+	 * @tparam Tout Output buffer's element type.
+	 */
+
+	template< typename Tin, typename Tout >
+	void oneshot( const Tin* ip, int iplen, Tout* op, int oplen )
+	{
+		CFixedBuffer< double > Buf( MaxInLen );
+		bool IsZero = false;
+
+		while( oplen > 0 )
+		{
+			int rc;
+			double* p;
+			int i;
+
+			if( iplen == 0 )
+			{
+				rc = MaxInLen;
+				p = &Buf[ 0 ];
+
+				if( !IsZero )
+				{
+					IsZero = true;
+					memset( p, 0, MaxInLen * sizeof( p[ 0 ]));
+				}
+			}
+			else
+			{
+				rc = min( iplen, MaxInLen );
+
+				if( sizeof( Tin ) == sizeof( double ))
+				{
+					p = (double*) ip;
+				}
+				else
+				{
+					p = &Buf[ 0 ];
+
+					for( i = 0; i < rc; i++ )
+					{
+						p[ i ] = ip[ i ];
+					}
+				}
+
+				ip += rc;
+				iplen -= rc;
+			}
+
+			double* op0;
+			int wc = process( p, rc, op0 );
+			wc = min( oplen, wc );
+
+			for( i = 0; i < wc; i++ )
+			{
+				op[ i ] = (Tout) op0[ i ];
+			}
+
+			op += wc;
+			oplen -= wc;
+		}
+
+		clear();
+	}
+
+	/**
+	 * Function obtains overall input sample count required to produce first
+	 * output sample. Function works by iteratively passing 1 sample at a time
+	 * until output begins. This is a relatively CPU-consuming operation. This
+	 * function should be called after the clear() function call or after
+	 * object's construction. The function itself calls the clear() function
+	 * before return.
+	 *
+	 * Note that it is advisable to cache the value returned by this function,
+	 * for each SrcSampleRate/DstSampleRate pair, if it is called frequently.
+	 */
+
+	int getInLenBeforeOutStart()
+	{
+		int inc = 0;
+
+		while( true )
+		{
+			double ins = 0.0;
+			double* op;
+
+			if( process( &ins, 1, op ) > 0 )
+			{
+				clear();
+				return( inc );
+			}
+
+			inc++;
+		}
+	}
+
+private:
+	CFixedBuffer< CDSPProcessor* > Steps; ///< Array of processing steps.
+		///<
+	int StepCapacity; ///< The capacity of the Steps array.
+		///<
+	int StepCount; ///< The number of created processing steps.
+		///<
+	int MaxInLen; ///< Maximal input length.
+		///<
+	CFixedBuffer< double > TmpBufAll; ///< Buffer containing both temporary
+		///< buffers.
+		///<
+	double* TmpBufs[ 2 ]; ///< Temporary output buffers.
+		///<
+	int TmpBufCapacities[ 2 ]; ///< Capacities of temporary buffers, updated
+		///< during processing steps building.
+		///<
+	int CurTmpBuf; ///< Current temporary buffer.
+		///<
+	int CurMaxOutLen; ///< Current maximal output length.
+		///<
+	double LatencyFrac; ///< Current fractional latency. After object's
+		///< construction, equals to the remaining fractional latency in the
+		///< output.
+		///<
+
+	/**
+	 * Function adds processor, updates MaxOutLen variable and adjusts length
+	 * of temporary internal buffers.
+	 *
+	 * @param Proc Processor to add. This pointer is inherited and will be
+	 * destroyed on *this object's destruction.
+	 */
+
+	void addProcessor( CDSPProcessor* const Proc )
+	{
+		if( StepCount == StepCapacity )
+		{
+			// Reallocate and increase Steps array's capacity.
+
+			const int NewCapacity = StepCapacity + 8;
+			Steps.realloc( StepCapacity, NewCapacity );
+			StepCapacity = NewCapacity;
+		}
+
+		LatencyFrac = Proc -> getLatencyFrac();
+		CurMaxOutLen = Proc -> getMaxOutLen( CurMaxOutLen );
+
+		if( CurMaxOutLen > TmpBufCapacities[ CurTmpBuf ])
+		{
+			TmpBufCapacities[ CurTmpBuf ] = CurMaxOutLen;
+		}
+
+		CurTmpBuf ^= 1;
+
+		Steps[ StepCount ] = Proc;
+		StepCount++;
+	}
+
+	/**
+	 * Function creates temporary buffers.
+	 */
+
+	void createTmpBuffers()
+	{
+		const int ol = TmpBufCapacities[ 0 ] + TmpBufCapacities[ 1 ];
+
+		if( ol > 0 )
+		{
+			TmpBufAll.alloc( ol );
+			TmpBufs[ 0 ] = &TmpBufAll[ 0 ];
+			TmpBufs[ 1 ] = &TmpBufAll[ TmpBufCapacities[ 0 ]];
+		}
+
+		R8BCONSOLE( "* CDSPResampler: init done\n" );
+	}
+};
+
+/**
+ * @brief The resampler class for 16-bit resampling.
+ *
+ * This class defines resampling parameters suitable for 16-bit resampling,
+ * using linear-phase low-pass filter. See the r8b::CDSPResampler class for
+ * details.
+ */
+
+class CDSPResampler16 : public CDSPResampler
+{
+public:
+	/**
+	 * Constructor initializes the 16-bit resampler. See the
+	 * r8b::CDSPResampler class for details.
+	 *
+	 * @param SrcSampleRate Source signal sample rate.
+	 * @param DstSampleRate Destination signal sample rate.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler.
+	 * @param ReqTransBand Required transition band, in percent.
+	 */
+
+	CDSPResampler16( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0 )
+		: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+			136.45, fprLinearPhase )
+	{
+	}
+};
+
+/**
+ * @brief The resampler class for 16-bit impulse response resampling.
+ *
+ * This class defines resampling parameters suitable for 16-bit impulse
+ * response resampling, using linear-phase low-pass filter. Impulse responses
+ * are non-dynamic signals, and thus need resampler with a lesser SNR. See the
+ * r8b::CDSPResampler class for details.
+ */
+
+class CDSPResampler16IR : public CDSPResampler
+{
+public:
+	/**
+	 * Constructor initializes the 16-bit impulse response resampler. See the
+	 * r8b::CDSPResampler class for details.
+	 *
+	 * @param SrcSampleRate Source signal sample rate.
+	 * @param DstSampleRate Destination signal sample rate.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler.
+	 * @param ReqTransBand Required transition band, in percent.
+	 */
+
+	CDSPResampler16IR( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0 )
+		: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+			109.56, fprLinearPhase )
+	{
+	}
+};
+
+/**
+ * @brief The resampler class for 24-bit resampling.
+ *
+ * This class defines resampling parameters suitable for 24-bit resampling
+ * (including 32-bit floating point resampling), using linear-phase low-pass
+ * filter. See the r8b::CDSPResampler class for details.
+ */
+
+class CDSPResampler24 : public CDSPResampler
+{
+public:
+	/**
+	 * Constructor initializes the 24-bit resampler (including 32-bit floating
+	 * point). See the r8b::CDSPResampler class for details.
+	 *
+	 * @param SrcSampleRate Source signal sample rate.
+	 * @param DstSampleRate Destination signal sample rate.
+	 * @param aMaxInLen The maximal planned length of the input buffer (in
+	 * samples) that will be passed to the resampler.
+	 * @param ReqTransBand Required transition band, in percent.
+	 */
+
+	CDSPResampler24( const double SrcSampleRate, const double DstSampleRate,
+		const int aMaxInLen, const double ReqTransBand = 2.0 )
+		: CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand,
+			180.15, fprLinearPhase )
+	{
+	}
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPRESAMPLER_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
new file mode 100644
index 00000000..32aa2e5a
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h
@@ -0,0 +1,687 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file CDSPSincFilterGen.h
+ *
+ * @brief Sinc function-based FIR filter generator class.
+ *
+ * This file includes the CDSPSincFilterGen class implementation that
+ * generates FIR filters.
+ *
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8B_CDSPSINCFILTERGEN_INCLUDED
+#define R8B_CDSPSINCFILTERGEN_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @brief Sinc function-based FIR filter generator class.
+ *
+ * Structure that holds state used to perform generation of sinc functions of
+ * various types, windowed by the Blackman window by default (but the window
+ * function can be changed if necessary).
+ */
+
+class CDSPSincFilterGen
+{
+public:
+	double Len2; ///< Required half filter kernel's length in samples (can be
+		///< a fractional value). Final physical kernel length will be
+		///< provided in the KernelLen variable. Len2 should be >= 2.
+		///<
+	int KernelLen; ///< Resulting length of the filter kernel, this variable
+		///< is set after the call to one of the "init" functions.
+		///<
+	int fl2; ///< Internal "half kernel length" value. This value can be used
+		///< as filter's latency in samples (taps), this variable is set after
+		///< the call to one of the "init" functions.
+		///<
+
+	union
+	{
+		struct
+		{
+			double Freq1; ///< Required corner circular frequency 1 [0; pi].
+				///< Used only in the generateBand() function.
+				///<
+			double Freq2; ///< Required corner circular frequency 2 [0; pi].
+				///< Used only in the generateBand() function. The range
+				///< [Freq1; Freq2] defines a pass band for the generateBand()
+				///< function.
+				///<
+		};
+
+		struct
+		{
+			double FracDelay; ///< Fractional delay in the range [0; 1], used
+				///< only in the generateFrac() function. Note that the
+				///< FracDelay parameter is actually inversed. At 0.0 value it
+				///< produces 1 sample delay (with the latency equal to fl2),
+				///< at 1.0 value it produces 0 sample delay (with the latency
+				///< equal to fl2 - 1).
+				///<
+		};
+	};
+
+	/**
+	 * Window function type.
+	 */
+
+	enum EWindowFunctionType
+	{
+		wftCosine, ///< Generalized cosine window function. No parameters
+			///< required. The "Power" parameter is optional.
+			///<
+		wftKaiser, ///< Kaiser window function. Requires the "Beta" parameter.
+			///< The "Power" parameter is optional.
+			///<
+		wftGaussian ///< Gaussian window function. Requires the "Sigma"
+			///< parameter. The "Power" parameter is optional.
+			///<
+	};
+
+	typedef double( CDSPSincFilterGen :: *CWindowFunc )(); ///< Window
+		///< calculation function pointer type.
+		///<
+
+	/**
+	 * Function initializes *this structure for generation of a window
+	 * function, odd-sized.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initWindow( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) floor( Len2 );
+		KernelLen = fl2 + fl2 + 1;
+
+		setWindow( WinType, Params, UsePower, true );
+	}
+
+	/**
+	 * Function initializes *this structure for generation of band-limited
+	 * sinc filter kernel. The generateBand() function should be used to
+	 * calculate the filter.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initBand( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) floor( Len2 );
+		KernelLen = fl2 + fl2 + 1;
+		f1.init( Freq1, 0.0 );
+		f2.init( Freq2, 0.0 );
+
+		setWindow( WinType, Params, UsePower, true );
+	}
+
+	/**
+	 * Function initializes *this structure for Hilbert transformation filter
+	 * calculation. Freq1 and Freq2 variables are not used.
+	 * The generateHilbert() function should be used to calculate the filter.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initHilbert( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) floor( Len2 );
+		KernelLen = fl2 + fl2 + 1;
+
+		setWindow( WinType, Params, UsePower, true );
+	}
+
+	/**
+	 * Function initializes *this structure for generation of full-bandwidth
+	 * fractional delay sinc filter kernel. Freq1 and Freq2 variables are not
+	 * used. The generateFrac() function should be used to calculate the
+	 * filter.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 */
+
+	void initFrac( const EWindowFunctionType WinType = wftCosine,
+		const double* const Params = NULL, const bool UsePower = false )
+	{
+		R8BASSERT( Len2 >= 2.0 );
+
+		fl2 = (int) ceil( Len2 );
+		KernelLen = fl2 + fl2;
+
+		setWindow( WinType, Params, UsePower, false, FracDelay );
+	}
+
+	/**
+	 * @return The next "Hann" window function coefficient.
+	 */
+
+	double calcWindowHann()
+	{
+		return( 0.5 + 0.5 * w1.generate() );
+	}
+
+	/**
+	 * @return The next "Hamming" window function coefficient.
+	 */
+
+	double calcWindowHamming()
+	{
+		return( 0.54 + 0.46 * w1.generate() );
+	}
+
+	/**
+	 * @return The next "Blackman" window function coefficient.
+	 */
+
+	double calcWindowBlackman()
+	{
+		return( 0.42 + 0.5 * w1.generate() + 0.08 * w2.generate() );
+	}
+
+	/**
+	 * @return The next "Nuttall" window function coefficient.
+	 */
+
+	double calcWindowNuttall()
+	{
+		return( 0.355768 + 0.487396 * w1.generate() +
+			0.144232 * w2.generate() + 0.012604 * w3.generate() );
+	}
+
+	/**
+	 * @return The next "Blackman-Nuttall" window function coefficient.
+	 */
+
+	double calcWindowBlackmanNuttall()
+	{
+		return( 0.3635819 + 0.4891775 * w1.generate() +
+			0.1365995 * w2.generate() + 0.0106411 * w3.generate() );
+	}
+
+	/**
+	 * @return The next "Kaiser" window function coefficient.
+	 */
+
+	double calcWindowKaiser()
+	{
+		const double n = 1.0 - sqr( wn / Len2 + KaiserLen2Frac );
+		wn++;
+
+		if( n < 0.0 )
+		{
+			return( 0.0 );
+		}
+
+		return( besselI0( KaiserBeta * sqrt( n )) / KaiserDiv );
+	}
+
+	/**
+	 * @return The next "Gaussian" window function coefficient.
+	 */
+
+	double calcWindowGaussian()
+	{
+		const double f = exp( -0.5 * sqr( wn / GaussianSigma +
+			GaussianSigmaFrac ));
+
+		wn++;
+
+		return( f );
+	}
+
+	/**
+	 * Function calculates window function only.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 */
+
+	void generateWindow( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+	{
+		op += fl2;
+		double* op2 = op;
+
+		int l = fl2;
+
+		if( Power < 0.0 )
+		{
+			*op = ( *this.*wfunc )();
+
+			while( l > 0 )
+			{
+				const double v = ( *this.*wfunc )();
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				l--;
+			}
+		}
+		else
+		{
+			*op = pows(( *this.*wfunc )(), Power );
+
+			while( l > 0 )
+			{
+				const double v = pows(( *this.*wfunc )(), Power );
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				l--;
+			}
+		}
+	}
+
+	/**
+	 * Function calculates band-limited windowed sinc function-based filter
+	 * kernel.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 */
+
+	void generateBand( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+	{
+		op += fl2;
+		double* op2 = op;
+		f1.generate();
+		f2.generate();
+		int t = 1;
+
+		if( Power < 0.0 )
+		{
+			*op = ( Freq2 - Freq1 ) * ( *this.*wfunc )() / R8B_PI;
+
+			while( t <= fl2 )
+			{
+				const double v = ( f2.generate() - f1.generate() ) *
+					( *this.*wfunc )() / ( t * R8B_PI );
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				t++;
+			}
+		}
+		else
+		{
+			*op = ( Freq2 - Freq1 ) * pows(( *this.*wfunc )(), Power ) /
+				R8B_PI;
+
+			while( t <= fl2 )
+			{
+				const double v = ( f2.generate() - f1.generate() ) *
+					pows(( *this.*wfunc )(), Power ) / ( t * R8B_PI );
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = v;
+				t++;
+			}
+		}
+	}
+
+	/**
+	 * Function calculates windowed Hilbert transformer filter kernel.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 */
+
+	void generateHilbert( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman )
+	{
+		static const double fvalues[ 2 ] = { 0.0, 2.0 / R8B_PI };
+		op += fl2;
+		double* op2 = op;
+
+		( *this.*wfunc )();
+		*op = 0.0;
+
+		int t = 1;
+
+		if( Power < 0.0 )
+		{
+			while( t <= fl2 )
+			{
+				const double v = fvalues[ t & 1 ] * ( *this.*wfunc )() / t;
+				op++;
+				op2--;
+				*op = v;
+				*op2 = -v;
+				t++;
+			}
+		}
+		else
+		{
+			while( t <= fl2 )
+			{
+				const double v = fvalues[ t & 1 ] *
+					pows( ( *this.*wfunc )(), Power ) / t;
+
+				op++;
+				op2--;
+				*op = v;
+				*op2 = -v;
+				t++;
+			}
+		}
+	}
+
+	/**
+	 * Function calculates windowed fractional delay filter kernel.
+	 *
+	 * @param[out] op Output buffer, length = KernelLen.
+	 * @param wfunc Window calculation function to use.
+	 * @param opinc Output buffer increment, in "op" elements.
+	 */
+
+	void generateFrac( double* op,
+		CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman,
+		const int opinc = 1 )
+	{
+		R8BASSERT( opinc != 0 );
+
+		double f[ 2 ];
+		f[ 0 ] = sin( FracDelay * R8B_PI ) / R8B_PI;
+		f[ 1 ] = -f[ 0 ];
+
+		int t = -fl2;
+
+		if( t + FracDelay < -Len2 )
+		{
+			( *this.*wfunc )();
+			*op = 0.0;
+			op += opinc;
+			t++;
+		}
+
+		int IsZeroX = ( fabs( FracDelay - 1.0 ) < 0x1p-42 );
+		int mt = 0 - IsZeroX;
+		IsZeroX = ( IsZeroX || fabs( FracDelay ) < 0x1p-42 );
+
+		if( Power < 0.0 )
+		{
+			while( t < mt )
+			{
+				*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
+				op += opinc;
+				t++;
+			}
+
+			if( IsZeroX ) // t+FracDelay==0
+			{
+				*op = ( *this.*wfunc )();
+			}
+			else
+			{
+				*op = f[ t & 1 ] * ( *this.*wfunc )() / FracDelay; // t==0
+			}
+
+			mt = fl2 - 2;
+
+			while( t < mt )
+			{
+				op += opinc;
+				t++;
+				*op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay );
+			}
+
+			op += opinc;
+			t++;
+			const double ut = t + FracDelay;
+			*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * ( *this.*wfunc )() / ut );
+		}
+		else
+		{
+			while( t < mt )
+			{
+				*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+					( t + FracDelay );
+
+				op += opinc;
+				t++;
+			}
+
+			if( IsZeroX ) // t+FracDelay==0
+			{
+				*op = pows( ( *this.*wfunc )(), Power );
+			}
+			else
+			{
+				*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+					FracDelay; // t==0
+			}
+
+			mt = fl2 - 2;
+
+			while( t < mt )
+			{
+				op += opinc;
+				t++;
+				*op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) /
+					( t + FracDelay );
+			}
+
+			op += opinc;
+			t++;
+			const double ut = t + FracDelay;
+			*op = ( ut > Len2 ? 0.0 : f[ t & 1 ] *
+				pows( ( *this.*wfunc )(), Power ) / ut );
+		}
+	}
+
+private:
+	double Power; ///< The power factor used to raise the window function.
+		///< Equals a negative value if the power factor should not be used.
+		///<
+	CSineGen f1; ///< Sine function 1. Used in the generateBand() function.
+		///<
+	CSineGen f2; ///< Sine function 2. Used in the generateBand() function.
+		///<
+	int wn; ///< Window function integer position. 0 - center of the window
+		///< function. This variable may not be used by some window functions.
+		///<
+	CSineGen w1; ///< Cosine wave 1 for window function.
+		///<
+	CSineGen w2; ///< Cosine wave 2 for window function.
+		///<
+	CSineGen w3; ///< Cosine wave 3 for window function.
+		///<
+
+	union
+	{
+		struct
+		{
+			double KaiserBeta; ///< Kaiser window function's "Beta"
+				///< coefficient.
+				///<
+			double KaiserDiv; ///< Kaiser window function's divisor.
+				///<
+			double KaiserLen2Frac; ///< Equals FracDelay / Len2.
+				///<
+		};
+
+		struct
+		{
+			double GaussianSigma; ///< Gaussian window function's "Sigma"
+				///< coefficient.
+				///<
+			double GaussianSigmaFrac; ///< Equals FracDelay / GaussianSigma.
+				///<
+		};
+	};
+
+	/**
+	 * Function initializes Kaiser window function calculation. The FracDelay
+	 * variable should be initialized when using this window function.
+	 *
+	 * @param Params Function parameters. If NULL, the default values will be
+	 * used. If not NULL, the first parameter should specify the "Beta" value.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function.
+	 * @param IsCentered "True" if centered window should be used. This
+	 * parameter usually equals to "false" for fractional delay filters only.
+	 */
+
+	void setWindowKaiser( const double* Params, const bool UsePower,
+		const bool IsCentered )
+	{
+		wn = ( IsCentered ? 0 : -fl2 );
+
+		if( Params == NULL )
+		{
+			KaiserBeta = 9.5945013206755156;
+			Power = ( UsePower ? 1.9718457932433306 : -1.0 );
+		}
+		else
+		{
+			KaiserBeta = clampr( Params[ 0 ], 1.0, 350.0 );
+			Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
+		}
+
+		KaiserDiv = besselI0( KaiserBeta );
+		KaiserLen2Frac = FracDelay / Len2;
+	}
+
+	/**
+	 * Function initializes Gaussian window function calculation. The FracDelay
+	 * variable should be initialized when using this window function.
+	 *
+	 * @param Params Function parameters. If NULL, the table values will be
+	 * used. If not NULL, the first parameter should specify the "Sigma"
+	 * value.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function.
+	 * @param IsCentered "True" if centered window should be used. This
+	 * parameter usually equals to "false" for fractional delay filters only.
+	 */
+
+	void setWindowGaussian( const double* Params, const bool UsePower,
+		const bool IsCentered )
+	{
+		wn = ( IsCentered ? 0 : -fl2 );
+
+		if( Params == NULL )
+		{
+			GaussianSigma = 1.0;
+			Power = -1.0;
+		}
+		else
+		{
+			GaussianSigma = clampr( fabs( Params[ 0 ]), 1e-1, 100.0 );
+			Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 );
+		}
+
+		GaussianSigma *= Len2;
+		GaussianSigmaFrac = FracDelay / GaussianSigma;
+	}
+
+	/**
+	 * Function initializes calculation of window function of the specified
+	 * type.
+	 *
+	 * @param WinType Window function type.
+	 * @param Params Window function's parameters. If NULL, the table values
+	 * may be used.
+	 * @param UsePower "True" if the power factor should be used to raise the
+	 * window function. If "true", the power factor should be specified as the
+	 * last value in the Params array. If Params is NULL, the table or default
+	 * value of -1.0 (off) will be used.
+	 * @param IsCentered "True" if centered window should be used. This
+	 * parameter usually equals to "false" for fractional delay filters only.
+	 * @param UseFracDelay Fractional delay to use.
+	 */
+
+	void setWindow( const EWindowFunctionType WinType,
+		const double* const Params, const bool UsePower,
+		const bool IsCentered, const double UseFracDelay = 0.0 )
+	{
+		FracDelay = UseFracDelay;
+
+		if( WinType == wftCosine )
+		{
+			if( IsCentered )
+			{
+				w1.init( R8B_PI / Len2, R8B_PId2 );
+				w2.init( R8B_2PI / Len2, R8B_PId2 );
+				w3.init( R8B_3PI / Len2, R8B_PId2 );
+			}
+			else
+			{
+				const double step1 = R8B_PI / Len2;
+				w1.init( step1, R8B_PId2 - step1 * fl2 + step1 * FracDelay );
+
+				const double step2 = R8B_2PI / Len2;
+				w2.init( step2, R8B_PId2 - step2 * fl2 + step2 * FracDelay );
+
+				const double step3 = R8B_3PI / Len2;
+				w3.init( step3, R8B_PId2 - step3 * fl2 + step3 * FracDelay );
+			}
+
+			Power = ( UsePower && Params != NULL ? Params[ 0 ] : -1.0 );
+		}
+		else
+		if( WinType == wftKaiser )
+		{
+			setWindowKaiser( Params, UsePower, IsCentered );
+		}
+		else
+		if( WinType == wftGaussian )
+		{
+			setWindowGaussian( Params, UsePower, IsCentered );
+		}
+	}
+};
+
+} // namespace r8b
+
+#endif // R8B_CDSPSINCFILTERGEN_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
new file mode 100644
index 00000000..669a7839
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2013-2022 Aleksey Vaneev
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
new file mode 100644
index 00000000..2fcb3b9d
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt
@@ -0,0 +1,6 @@
+r8brain-free resampling library from https://github.com/avaneev/r8brain-free-src
+version 5.6.
+The example program, DLL and "other" folder, PFFFT sources and logo file have been removed.
+No further local changes have been made.
+For building, premake is used to generate Visual Studio project files.
+See ../build/premake/ for details.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
new file mode 100644
index 00000000..af0412c1
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md
@@ -0,0 +1,408 @@
+# r8brain-free-src - High-Quality Resampler #
+
+## Introduction ##
+
+Open source (under the MIT license) high-quality professional audio sample
+rate converter (SRC) / resampler C++ library.  Features routines for SRC,
+both up- and downsampling, to/from any sample rate, including non-integer
+sample rates: it can be also used for conversion to/from SACD/DSD sample
+rates, and even go beyond that.  SRC routines were implemented in a
+multi-platform C++ code, and have a high level of optimality. Also suitable
+for fast general-purpose 1D time-series resampling / interpolation (with
+relaxed filter parameters).
+
+The structure of this library's objects is such that they can be frequently
+created and destroyed in large applications with a minimal performance impact
+due to a high level of reusability of its most "initialization-expensive"
+objects: the fast Fourier transform and FIR filter objects.
+
+The SRC algorithm at first produces 2X oversampled (relative to the source
+sample rate, or the destination sample rate if the downsampling is performed)
+signal, then performs interpolation using a bank of short (8 to 30 taps,
+depending on the required precision) polynomial-interpolated sinc
+function-based fractional delay filters.  This puts the algorithm into the
+league of the fastest among the most precise SRC algorithms.  The more precise
+alternative being only the whole number-factored SRC, which can be slower.
+
+P.S. Please credit the creator of this library in your documentation in the
+following way: "Sample rate converter designed by Aleksey Vaneev of Voxengo".
+
+## Requirements ##
+
+C++ compiler and system with the "double" floating point type (53-bit
+mantissa) support.  No explicit code for the "float" type is present in this
+library, because as practice has shown the "float"-based code performs
+considerably slower on a modern processor, at least in this library.  This
+library does not have dependencies beside the standard C library, the
+"windows.h" on Windows and the "pthread.h" on macOS and Linux.
+
+## Links ##
+
+* [Documentation](https://www.voxengo.com/public/r8brain-free-src/Documentation/)
+* [Discussion](https://www.kvraudio.com/forum/viewtopic.php?t=389711)
+
+## Usage Information ##
+
+The sample rate converter (resampler) is represented by the
+**r8b::CDSPResampler** class, which is a single front-end class for the
+whole library.  You do not basically need to use nor understand any other
+classes beside this class.  Several derived classes that have varying levels
+of precision are also available (for full-resolution 16-bit and 24-bit
+resampling).
+
+The code of the library resides in the "r8b" C++ namespace, effectively
+isolating it from all other code.  The code is thread-safe.  A separate
+resampler object should be created for each audio channel or stream being
+processed concurrently.
+
+Note that you will need to compile the "r8bbase.cpp" source file and include
+the resulting object file into your application build.  This source file
+includes definitions of several global static objects used by the library.
+You may also need to include to your project: the "Kernel32" library (on
+Windows) and the "pthread" library on macOS and Linux.
+
+The library is able to process signal of any scale and loudness: it is not
+limited to just a "usual" -1.0 to 1.0 range.
+
+By defining the `R8B_IPP` configuration macro it is possible to enable Intel
+IPP back-end for FFT functions, instead of the default Ooura FFT.  IPP FFT
+makes sample rate conversion faster by 23% on average.
+
+    #define R8B_IPP 1
+
+If a larger initial processing delay and a very minor sample timing error are
+not an issue, for the most efficiency you can define these macros at
+the beginning of the `r8bconf.h` file, or during compilation:
+
+    #define R8B_IPP 1
+    #define R8B_FASTTIMING 1
+    #define R8B_EXTFFT 1
+
+If you do not have access to the Intel IPP then you may consider enabling the
+PFFFT which is only slightly slower than Intel IPP FFT in performance.  There
+are two macros available: `R8B_PFFFT` and `R8B_PFFFT_DOUBLE`.  The first macro
+enables PFFFT that works in single-precision resolution, thus limiting the
+overall resampler's precision to 24-bit sample rate conversions (for
+mission-critical professional audio applications, using the `R8B_PFFFT` macro
+is not recommended as its peak error is quite large).  The second macro
+enables PFFFT implementation that works in double-precision resolution, making
+use of SSE2, AVX, and NEON intrinsics, yielding precision that is equal to
+both Intel IPP and Ooura FFT implementations.
+
+To use the PFFFT, define the `R8B_PFFFT` or `R8B_PFFFT_DOUBLE` macro, compile
+and include the supplied `pffft.cpp` or `pffft_double/pffft_double.c` file to
+your project build.
+
+    #define R8B_PFFFT 1
+
+    or
+
+    #define R8B_PFFFT_DOUBLE 1
+
+The code of this library was commented in the [Doxygen](http://www.doxygen.org/)
+style.  To generate the documentation locally you may run the
+`doxygen ./other/r8bdoxy.txt` command from the library's folder.
+
+Preliminary tests show that the r8b::CDSPResampler24 resampler class achieves
+`31.8*n_cores` Mrops (`44*n_cores` for Intel IPP FFT) when converting 1
+channel of 24-bit audio from 44100 to 96000 sample rate (2% transition band),
+on a Ryzen 3700X processor-based 64-bit system.  This approximately translates
+to a real-time resampling of `720*n_cores` (`1000*n_cores`) audio streams, at
+100% CPU load.  Speed performance when converting to other sample rates may
+vary greatly.  When comparing performance of this resampler library to another
+library make sure that the competing library is also tuned to produce a fully
+linear-phase response, has similar stop-band characteristics and similar
+sample timing precision.
+
+## Dynamic Link Library ##
+
+The functions of this SRC library are also accessible in simplified form via
+the DLL file on Windows, requiring a processor with SSE2 support (Win64
+version includes AVX2 auto-dispatch code).  Delphi Pascal interface unit file
+for the DLL file is available.  DLL and C LIB files are distributed in the DLL
+folder on the project's homepage.  On non-Windows systems it is preferrable
+to use the C++ library directly.  Note that the DLL was compiled with the
+Intel IPP enabled.
+
+## Real-Time Applications ##
+
+The resampler class of this library was designed as an asynchronous processor:
+it may produce any number of output samples, depending on the input sample
+data length and the resampling parameters.  The resampler must be fed with the
+input sample data until enough output sample data were produced, with any
+excess output samples used before feeding the resampler with more input data.
+A "relief" factor here is that the resampler removes the initial processing
+latency automatically, and that after initial moments of processing the output
+becomes steady, with only minor output sample data length fluctuations.
+
+So, while for an off-line resampling a "push" method can be used,
+demonstrated in the `example.cpp` file, for a real-time resampling a "pull"
+method should be used which calls the resampling process until the output
+buffer is filled.
+
+## Notes ##
+
+When using the **r8b::CDSPResampler** class directly, you may select the
+transition band/steepness of the low-pass (reconstruction) filter, expressed
+as a percentage of the full spectral bandwidth of the input signal (or the
+output signal if the downsampling is performed), and the desired stop-band
+attenuation in decibel.
+
+The transition band is specified as the normalized spectral space of the input
+signal (or the output signal if the downsampling is performed) between the
+low-pass filter's -3 dB point and the Nyquist frequency, and ranges from 0.5%
+to 45%.  Stop-band attenuation can be specified in the range from 49 to 218
+decibel.  Both the transition band and stop-band attenuation affect
+resampler's overall speed performance and initial output delay.  For your
+information, transition frequency range spans 175% of the specified transition
+band, which means that for 2% transition band, frequency response below
+0.965\*Nyquist is linear.
+
+This SRC library also implements a much faster "power of 2" resampling (e.g.
+2X, 4X, 8X, 16X, 3X, 3\*2X, 3\*4X, 3\*8X, etc. upsampling and downsampling),
+which is engaged automatically if the resampling parameters permit.
+
+This library was tested for compatibility with [GNU C++](https://gcc.gnu.org/),
+[Microsoft Visual C++](https://visualstudio.microsoft.com/),
+[LLVM](https://llvm.org/) and [Intel C++](https://software.intel.com/en-us/c-compilers)
+compilers, on 32- and 64-bit Windows, macOS, and CentOS Linux.
+
+Most code is "inline", without the need to compile many source files. The
+memory footprint is quite modest.
+
+## Acknowledgements ##
+
+r8brain-free-src is bundled with the following code:
+
+* FFT routines Copyright (c) 1996-2001 Takuya OOURA.
+[Homepage](http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html)
+* PFFFT Copyright (c) 2013 Julien Pommier.
+[Homepage](https://bitbucket.org/jpommier/pffft)
+* PFFFT DOUBLE Copyright (c) 2020 Hayati Ayguen, Dario Mambro.
+[Homepage](https://github.com/unevens/pffft)
+
+## Users ##
+
+This library is used by:
+
+* [REAPER](https://reaper.fm/)
+* [AUDIRVANA](https://audirvana.com/)
+* [Red Dead Redemption 2](https://www.rockstargames.com/reddeadredemption2/credits)
+* [Mini Piano Lite](https://play.google.com/store/apps/details?id=umito.android.minipiano)
+* [OpenMPT](https://openmpt.org/)
+* [Boogex Guitar Amp audio plugin](https://www.voxengo.com/product/boogex/)
+* [r8brain free sample rate converter](https://www.voxengo.com/product/r8brain/)
+* [Voice Aloud Reader](https://play.google.com/store/apps/details?id=com.hyperionics.avarLic)
+* [Zynewave Podium](https://zynewave.com/podium/)
+* [Phonometrica](http://www.phonometrica-ling.org/index.html)
+* [Ripcord](https://cancel.fm/ripcord/)
+* [TensorVox](https://github.com/ZDisket/TensorVox)
+* [Curvessor](https://github.com/unevens/Curvessor)
+
+Please send me a note via aleksey.vaneev@gmail.com and I will include a link
+to your software product to this list of users. This list is important in
+maintaining confidence in this library among the interested parties. The
+inclusion into this list is not mandatory.
+
+## Change Log ##
+
+Version 5.6:
+
+* Added SSE and NEON implementations to `CDSPHBUpsampler` yielding 15%
+performance improvement of power-of-2 upsampling.
+* Added SSE and NEON implementations to the `CDSPRealFFT::multiplyBlocksZP`
+function, for 2-3% performance improvement.
+* Added intermediate interpolator's transition band limitation, for logical
+hardness (not practically needed).
+* Added the `aDoConsumeLatency` parameter to `CDSPHBUpsampler` constructor,
+for "inline" DSP uses of the class.
+* Made various minor changes across the codebase.
+
+Version 5.5:
+
+* Hardened positional logic of fractional filter calculation, removed
+redundant multiplications.
+* Removed unnecessary function templating from the `CDSPSincFilterGen` class.
+* Added the `__ARM_NEON` macro to NEON availability detection.
+
+Version 5.4:
+
+* Added compiler specializations to previously optimized inner loops.
+"Shuffled" SIMD interpolation code is not efficient on Apple M1. Intel C++
+Compiler vectorizes "whole stepping" interpolation as good as a
+manually-written SSE.
+* Reorganized SIMD instructions for a slightly better performance.
+* Changed internal buffer sizes of half-band resamplers (1-2% performance
+boost).
+* Fixed compiler warnings in PFFFT code.
+* Added several asserts to the code.
+
+Version 5.3:
+
+* Optimized inner loops of the fractional interpolator, added SSE2 and NEON
+intrinsics, resulting in a measurable (8-25%) performance gain.
+* Optimized filter calculation functions: changed some divisions by a constant
+to multiplications.
+* Renamed M_PI macros to R8B_PI, to avoid macro collisions.
+* Removed redundant code and macros.
+
+Version 5.2:
+
+* Modified `PFFFT` and `PFFFT DOUBLE` conditional pre-processor directives to
+always enable NEON on `aarch64`/`arm64` (this includes code built for
+Apple M1).
+
+Version 5.1:
+
+* Changed alignment in the `CFixedBuffer` class to 64 bytes. This improves AVX
+performance of the `PFFFT DOUBLE` implementation by a few percent.
+* Removed redundant files from the `pffft_double` folder, integrated the
+`pffft_common.c` file into the `pffft_double.c` file.
+
+Version 5.0:
+
+* Removed a long-outdated macros from the `r8bconf.h` file.
+* Changed a conditional pre-processor directive in the `pf_sse2_double.h` file
+as per PFFFT DOUBLE author's suggestion, to allow SSE2 intrinsics in most
+compilers.
+* Fixed "License.txt" misnaming in the source files to "LICENSE".
+
+Version 4.10:
+
+* Added the `PFFFT DOUBLE` implementation support. Now available via the
+`R8B_PFFFT_DOUBLE` definition macro.
+
+Version 4.9:
+
+* Reoptimized half-band and fractional interpolation filters with a stricter
+frequency response linearity constraints. This did not impact the average
+speed performance.
+
+Version 4.8:
+
+* Added a limit to the intermediate filter's transition band, to keep the
+latency under control at any resampling ratio.
+
+Version 4.7:
+
+* Added `#ifndef _USE_MATH_DEFINES` to `pffft.cpp`.
+* Moved `#include "pffft.h"` to `CDSPRealFFT.h`.
+
+Version 4.6:
+
+* Removed the `MaxInLen` parameter from the `oneshot()` function.
+* Decreased intermediate low-pass filter's transition band slightly, for more
+stable quality.
+
+Version 4.5:
+
+* Fixed VS2017 compiler warnings.
+
+Version 4.4:
+
+* Fixed the "Declaration hides a member" Intel C++ compiler warnings.
+
+Version 4.3:
+
+* Added //$ markers for internal debugging purposes.
+
+Version 4.2:
+
+* Backed-off max transition band to 45 and MinAtten to 49.
+* Implemented Wave64 and AIFF file input in the `r8bfreesrc` bench tool. The
+tool is now compiled with the `R8B_IPP 1` and `R8B_EXTFFT 1` macros to
+demonstrate the maximal achievable performance.
+
+Version 4.1:
+
+* Updated allowed ReqAtten range to 52-218, ReqTransBand 0.5-56. It is
+possible to specify filter parameters slightly beyond these values, but the
+resulting filter will be slightly out of specification as well.
+* Optimized static filter banks allocation.
+
+Version 4.0:
+
+* A major overhaul of interpolation classes: now templated parameters are not
+used, all required parameters are calculated at runtime. Static filter bank
+object is not used, it is created when necessary, and then cached.
+* Implemented one-third interpolation filters, however, this did not
+measurably increase resampler's speed.
+
+Version 3.7:
+
+* Used ippsMul_64f_I() in the CDSPRealFFT::multiplyBlockZ() function for a
+minor conversion speed increase in Intel IPP mode.
+
+Version 3.6:
+
+* Added memory alignment to allocated buffers which boosts performance by 1.5%
+when Intel IPP FFT is in use.
+* Implemented PFFFT support.
+
+Version 3.5:
+
+* Improved resampling speed very slightly.
+* Updated the `r8bfreesrc` benchmark tool to support RF64 WAV files.
+
+Version 3.4:
+
+* Added a more efficient half-band filters for >= 256 resampling ratios.
+
+Version 3.3:
+
+* Made minor fix to downsampling for some use cases of CDSPBlockConvolver,
+did not affect resampler.
+* Converted CDSPHBUpsampler and CDSPHBDownsampler's inner functions to
+static functions, which boosted high-ratio resampling performance measurably.
+
+Version 3.2:
+
+* Minor fix to the latency consumption mechanism.
+
+Version 3.1:
+
+* Reoptimized fractional delay filter's windowing function.
+
+Version 3.0:
+
+* Implemented a new variant of the getInLenBeforeOutStart() function.
+* Reimplemented oneshot() function to support `float` buffer types.
+* Considerably improved downsampling performance at high resampling ratios.
+* Implemented intermediate interpolation technique which boosted upsampling
+performance for most resampling ratios considerably.
+* Removed the ConvCount constant - now resampler supports virtually any
+resampling ratios.
+* Removed the UsePower2 parameter from the resampler constructor.
+* Now resampler's process() function always returns pointer to the internal
+buffer, input buffer is returned only if no resampling happens.
+* Resampler's getMaxOutLen() function can now be used to obtain the maximal
+output length that can be produced by the resampler in a single call.
+* Added a more efficient "one-third" filters to half-band upsampler and
+downsampler.
+
+Version 2.1:
+
+* Optimized 2X half-band downsampler.
+
+Version 2.0:
+
+* Optimized power-of-2 upsampling.
+
+Version 1.9:
+
+* Optimized half-band downsampling filter.
+* Implemented whole-number stepping resampling.
+* Added `R8B_EXTFFT` configuration option.
+* Fixed initial sub-sample offseting on downsampling.
+
+Version 1.8:
+
+* Added `R8B_FASTTIMING` configuration option.
+
+Version 1.7:
+
+* Improved sample timing precision.
+* Increased CDSPResampler :: ConvCountMax to 28 to support a lot wider
+resampling ratios.
+* Added `bench` tools.
+* Removed getInLenBeforeOutStart() due to incorrect calculation.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
new file mode 100644
index 00000000..f6f2bc2a
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h
@@ -0,0 +1,1316 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file fft4g.h
+ *
+ * @brief Wrapper class for Takuya OOURA's FFT functions.
+ *
+ * Functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA
+ * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ *
+ * Modified and used with permission granted by the license.
+ *
+ * Here, the original "fft4g.c" file was wrapped into the "ooura_fft" class.
+ */
+
+#ifndef R8B_FFT4G_INCLUDED
+#define R8B_FFT4G_INCLUDED
+
+/*
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :4, 2
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, FPType *, int *, FPType *);
+    void rdft(int, int, FPType *, int *, FPType *);
+    void ddct(int, int, FPType *, int *, FPType *);
+    void ddst(int, int, FPType *, int *, FPType *);
+    void dfct(int, FPType *, FPType *, int *, FPType *);
+    void dfst(int, FPType *, FPType *, int *, FPType *);
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (FPType *)
+                        input data
+                            a[2*j] = Re(x[j]),
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]),
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (FPType *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            cdft(2*n, -1, a, ip, w);
+        is
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (FPType *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (FPType *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            rdft(n, 1, a, ip, w);
+        is
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (FPType *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (FPType *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddct(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (FPType *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (FPType *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddst(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (FPType *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (FPType *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (FPType *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (FPType *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (FPType *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (FPType *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            dfst(n, a, t, ip, w);
+        is
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+namespace r8b {
+
+/**
+ * @brief A wrapper class around Takuya OOURA's FFT functions.
+ *
+ * A wrapper class around fft4g.c file's FFT functions by Takuya OOURA.
+ * Provides static private functions for use by the CDSPRealFFT class.
+ */
+
+class ooura_fft
+{
+friend class CDSPRealFFT;
+
+private:
+typedef double FPType;
+
+static void cdft(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+    if (n > (ip[0] << 2)) {
+        makewt(n >> 2, ip, w);
+    }
+    if (n > 4) {
+        if (isgn >= 0) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+        } else {
+            bitrv2conj(n, ip + 2, a);
+            cftbsub(n, a, w);
+        }
+    } else if (n == 4) {
+        cftfsub(n, a, w);
+    }
+}
+
+static void rdft(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+    int nw, nc;
+    double xi;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = 0.5 * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+}
+
+static void ddct(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+    int j, nw, nc;
+    double xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = a[j] - a[j - 1];
+            a[j] += a[j - 1];
+        }
+        a[1] = a[0] - xr;
+        a[0] += xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dctsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = a[j] - a[j + 1];
+            a[j] += a[j + 1];
+        }
+        a[n - 1] = xr;
+    }
+}
+
+static void ddst(int n, int isgn, FPType *a, int *ip, FPType *w)
+{
+    int j, nw, nc;
+    double xr;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > nc) {
+        nc = n;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn < 0) {
+        xr = a[n - 1];
+        for (j = n - 2; j >= 2; j -= 2) {
+            a[j + 1] = -a[j] - a[j - 1];
+            a[j] -= a[j - 1];
+        }
+        a[1] = a[0] + xr;
+        a[0] -= xr;
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            bitrv2(n, ip + 2, a);
+            cftbsub(n, a, w);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+    }
+    dstsub(n, a, nc, w + nw);
+    if (isgn >= 0) {
+        if (n > 4) {
+            bitrv2(n, ip + 2, a);
+            cftfsub(n, a, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, w);
+        }
+        xr = a[0] - a[1];
+        a[0] += a[1];
+        for (j = 2; j < n; j += 2) {
+            a[j - 1] = -a[j] - a[j + 1];
+            a[j] -= a[j + 1];
+        }
+        a[n - 1] = -xr;
+    }
+}
+
+static void dfct(int n, FPType *a, FPType *t, int *ip, FPType *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    m = n >> 1;
+    yi = a[m];
+    xi = a[0] + a[n];
+    a[0] -= a[n];
+    t[0] = xi - yi;
+    t[m] = xi + yi;
+    if (n > 2) {
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] - a[n - j];
+            xi = a[j] + a[n - j];
+            yr = a[k] - a[n - k];
+            yi = a[k] + a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi - yi;
+            t[k] = xi + yi;
+        }
+        t[mh] = a[mh] + a[n - mh];
+        a[mh] -= a[n - mh];
+        dctsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[0] - a[1];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] + a[j + 1];
+            a[2 * j - 1] = a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dctsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[0] - t[1];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = t[j] - t[j + 1];
+                a[k + l] = t[j] + t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 0; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] - t[m + j];
+                t[k] = t[m + k] + t[m + j];
+            }
+            t[mh] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+        a[n] = t[2] - t[1];
+        a[0] = t[2] + t[1];
+    } else {
+        a[1] = a[0];
+        a[2] = t[0];
+        a[0] = t[1];
+    }
+}
+
+static void dfst(int n, FPType *a, FPType *t, int *ip, FPType *w)
+{
+    int j, k, l, m, mh, nw, nc;
+    double xr, xi, yr, yi;
+
+    nw = ip[0];
+    if (n > (nw << 3)) {
+        nw = n >> 3;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 1)) {
+        nc = n >> 1;
+        makect(nc, ip, w + nw);
+    }
+    if (n > 2) {
+        m = n >> 1;
+        mh = m >> 1;
+        for (j = 1; j < mh; j++) {
+            k = m - j;
+            xr = a[j] + a[n - j];
+            xi = a[j] - a[n - j];
+            yr = a[k] + a[n - k];
+            yi = a[k] - a[n - k];
+            a[j] = xr;
+            a[k] = yr;
+            t[j] = xi + yi;
+            t[k] = xi - yi;
+        }
+        t[0] = a[mh] - a[n - mh];
+        a[mh] += a[n - mh];
+        a[0] = a[m];
+        dstsub(m, a, nc, w + nw);
+        if (m > 4) {
+            bitrv2(m, ip + 2, a);
+            cftfsub(m, a, w);
+            rftfsub(m, a, nc, w + nw);
+        } else if (m == 4) {
+            cftfsub(m, a, w);
+        }
+        a[n - 1] = a[1] - a[0];
+        a[1] = a[0] + a[1];
+        for (j = m - 2; j >= 2; j -= 2) {
+            a[2 * j + 1] = a[j] - a[j + 1];
+            a[2 * j - 1] = -a[j] - a[j + 1];
+        }
+        l = 2;
+        m = mh;
+        while (m >= 2) {
+            dstsub(m, t, nc, w + nw);
+            if (m > 4) {
+                bitrv2(m, ip + 2, t);
+                cftfsub(m, t, w);
+                rftfsub(m, t, nc, w + nw);
+            } else if (m == 4) {
+                cftfsub(m, t, w);
+            }
+            a[n - l] = t[1] - t[0];
+            a[l] = t[0] + t[1];
+            k = 0;
+            for (j = 2; j < m; j += 2) {
+                k += l << 2;
+                a[k - l] = -t[j] - t[j + 1];
+                a[k + l] = t[j] - t[j + 1];
+            }
+            l <<= 1;
+            mh = m >> 1;
+            for (j = 1; j < mh; j++) {
+                k = m - j;
+                t[j] = t[m + k] + t[m + j];
+                t[k] = t[m + k] - t[m + j];
+            }
+            t[0] = t[m + mh];
+            m = mh;
+        }
+        a[l] = t[0];
+    }
+    a[0] = 0;
+}
+
+/* -------- initializing routines -------- */
+
+static void makewt(int nw, int *ip, FPType *w)
+{
+    int j, nwh;
+    double delta, x, y;
+
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atan(1.0) / nwh;
+        w[0] = 1;
+        w[1] = 0;
+        w[nwh] = cos(delta * nwh);
+        w[nwh + 1] = w[nwh];
+        if (nwh > 2) {
+            for (j = 2; j < nwh; j += 2) {
+                x = cos(delta * j);
+                y = sin(delta * j);
+                w[j] = x;
+                w[j + 1] = y;
+                w[nw - j] = y;
+                w[nw - j + 1] = x;
+            }
+            bitrv2(nw, ip + 2, w);
+        }
+    }
+}
+
+static void makect(int nc, int *ip, FPType *c)
+{
+    int j, nch;
+    double delta;
+
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atan(1.0) / nch;
+        c[0] = cos(delta * nch);
+        c[nch] = 0.5 * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = 0.5 * cos(delta * j);
+            c[nc - j] = 0.5 * sin(delta * j);
+        }
+    }
+}
+
+/* -------- child routines -------- */
+
+static void bitrv2(int n, int *ip, FPType *a)
+{
+    int j, j1, k, k1, l, m, m2;
+    double xr, xi, yr, yi;
+
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            j1 = 2 * k + m2 + ip[k];
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+        }
+    }
+}
+
+static void bitrv2conj(int n, int *ip, FPType *a)
+{
+    int j, j1, k, k1, l, m, m2;
+    double xr, xi, yr, yi;
+
+    ip[0] = 0;
+    l = n;
+    m = 1;
+    while ((m << 3) < l) {
+        l >>= 1;
+        for (j = 0; j < m; j++) {
+            ip[m + j] = ip[j] + l;
+        }
+        m <<= 1;
+    }
+    m2 = 2 * m;
+    if ((m << 3) == l) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 -= m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += 2 * m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            j1 = k1 + m2;
+            k1 = j1 + m2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            k1 += m2;
+            a[k1 + 1] = -a[k1 + 1];
+        }
+    } else {
+        a[1] = -a[1];
+        a[m2 + 1] = -a[m2 + 1];
+        for (k = 1; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 2 * j + ip[k];
+                k1 = 2 * k + ip[j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += m2;
+                k1 += m2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 2 * k + ip[k];
+            a[k1 + 1] = -a[k1 + 1];
+            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
+        }
+    }
+}
+
+static void cftfsub(int n, FPType *a, const FPType *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i - x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i + x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i - x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = a[j + 1] - a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] += a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+static void cftbsub(int n, FPType *a, const FPType *w)
+{
+    int j, j1, j2, j3, l;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    l = 2;
+    if (n > 8) {
+        cft1st(n, a, w);
+        l = 8;
+        while ((l << 2) < n) {
+            cftmdl(n, l, a, w);
+            l <<= 2;
+        }
+    }
+    if ((l << 2) == n) {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = -a[j + 1] - a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = -a[j + 1] + a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i - x2i;
+            a[j2] = x0r - x2r;
+            a[j2 + 1] = x0i + x2i;
+            a[j1] = x1r - x3i;
+            a[j1 + 1] = x1i - x3r;
+            a[j3] = x1r + x3i;
+            a[j3 + 1] = x1i + x3r;
+        }
+    } else {
+        for (j = 0; j < l; j += 2) {
+            j1 = j + l;
+            x0r = a[j] - a[j1];
+            x0i = -a[j + 1] + a[j1 + 1];
+            a[j] += a[j1];
+            a[j + 1] = -a[j + 1] - a[j1 + 1];
+            a[j1] = x0r;
+            a[j1 + 1] = x0i;
+        }
+    }
+}
+
+static void cft1st(int n, FPType *a, const FPType *w)
+{
+    int j, k1, k2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    x0r = a[0] + a[2];
+    x0i = a[1] + a[3];
+    x1r = a[0] - a[2];
+    x1i = a[1] - a[3];
+    x2r = a[4] + a[6];
+    x2i = a[5] + a[7];
+    x3r = a[4] - a[6];
+    x3i = a[5] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+    wk1r = w[2];
+    x0r = a[8] + a[10];
+    x0i = a[9] + a[11];
+    x1r = a[8] - a[10];
+    x1i = a[9] - a[11];
+    x2r = a[12] + a[14];
+    x2i = a[13] + a[15];
+    x3r = a[12] - a[14];
+    x3i = a[13] - a[15];
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[12] = x2i - x0i;
+    a[13] = x0r - x2r;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[10] = wk1r * (x0r - x0i);
+    a[11] = wk1r * (x0r + x0i);
+    x0r = x3i + x1r;
+    x0i = x3r - x1i;
+    a[14] = wk1r * (x0i - x0r);
+    a[15] = wk1r * (x0i + x0r);
+    k1 = 0;
+    for (j = 16; j < n; j += 16) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        x0r = a[j] + a[j + 2];
+        x0i = a[j + 1] + a[j + 3];
+        x1r = a[j] - a[j + 2];
+        x1i = a[j + 1] - a[j + 3];
+        x2r = a[j + 4] + a[j + 6];
+        x2i = a[j + 5] + a[j + 7];
+        x3r = a[j + 4] - a[j + 6];
+        x3i = a[j + 5] - a[j + 7];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 4] = wk2r * x0r - wk2i * x0i;
+        a[j + 5] = wk2r * x0i + wk2i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 2] = wk1r * x0r - wk1i * x0i;
+        a[j + 3] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 6] = wk3r * x0r - wk3i * x0i;
+        a[j + 7] = wk3r * x0i + wk3i * x0r;
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        x0r = a[j + 8] + a[j + 10];
+        x0i = a[j + 9] + a[j + 11];
+        x1r = a[j + 8] - a[j + 10];
+        x1i = a[j + 9] - a[j + 11];
+        x2r = a[j + 12] + a[j + 14];
+        x2i = a[j + 13] + a[j + 15];
+        x3r = a[j + 12] - a[j + 14];
+        x3i = a[j + 13] - a[j + 15];
+        a[j + 8] = x0r + x2r;
+        a[j + 9] = x0i + x2i;
+        x0r -= x2r;
+        x0i -= x2i;
+        a[j + 12] = -wk2i * x0r - wk2r * x0i;
+        a[j + 13] = -wk2i * x0i + wk2r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j + 10] = wk1r * x0r - wk1i * x0i;
+        a[j + 11] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j + 14] = wk3r * x0r - wk3i * x0i;
+        a[j + 15] = wk3r * x0i + wk3i * x0r;
+    }
+}
+
+static void cftmdl(int n, int l, FPType *a, const FPType *w)
+{
+    int j, j1, j2, j3, k, k1, k2, m, m2;
+    double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    m = l << 2;
+    for (j = 0; j < l; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x0r - x2r;
+        a[j2 + 1] = x0i - x2i;
+        a[j1] = x1r - x3i;
+        a[j1 + 1] = x1i + x3r;
+        a[j3] = x1r + x3i;
+        a[j3 + 1] = x1i - x3r;
+    }
+    wk1r = w[2];
+    for (j = m; j < l + m; j += 2) {
+        j1 = j + l;
+        j2 = j1 + l;
+        j3 = j2 + l;
+        x0r = a[j] + a[j1];
+        x0i = a[j + 1] + a[j1 + 1];
+        x1r = a[j] - a[j1];
+        x1i = a[j + 1] - a[j1 + 1];
+        x2r = a[j2] + a[j3];
+        x2i = a[j2 + 1] + a[j3 + 1];
+        x3r = a[j2] - a[j3];
+        x3i = a[j2 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j2] = x2i - x0i;
+        a[j2 + 1] = x0r - x2r;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j1] = wk1r * (x0r - x0i);
+        a[j1 + 1] = wk1r * (x0r + x0i);
+        x0r = x3i + x1r;
+        x0i = x3r - x1i;
+        a[j3] = wk1r * (x0i - x0r);
+        a[j3 + 1] = wk1r * (x0i + x0r);
+    }
+    k1 = 0;
+    m2 = 2 * m;
+    for (k = m2; k < n; k += m2) {
+        k1 += 2;
+        k2 = 2 * k1;
+        wk2r = w[k1];
+        wk2i = w[k1 + 1];
+        wk1r = w[k2];
+        wk1i = w[k2 + 1];
+        wk3r = wk1r - 2 * wk2i * wk1i;
+        wk3i = 2 * wk2i * wk1r - wk1i;
+        for (j = k; j < l + k; j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = wk2r * x0r - wk2i * x0i;
+            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+        wk1r = w[k2 + 2];
+        wk1i = w[k2 + 3];
+        wk3r = wk1r - 2 * wk2r * wk1i;
+        wk3i = 2 * wk2r * wk1r - wk1i;
+        for (j = k + m; j < l + (k + m); j += 2) {
+            j1 = j + l;
+            j2 = j1 + l;
+            j3 = j2 + l;
+            x0r = a[j] + a[j1];
+            x0i = a[j + 1] + a[j1 + 1];
+            x1r = a[j] - a[j1];
+            x1i = a[j + 1] - a[j1 + 1];
+            x2r = a[j2] + a[j3];
+            x2i = a[j2 + 1] + a[j3 + 1];
+            x3r = a[j2] - a[j3];
+            x3i = a[j2 + 1] - a[j3 + 1];
+            a[j] = x0r + x2r;
+            a[j + 1] = x0i + x2i;
+            x0r -= x2r;
+            x0i -= x2i;
+            a[j2] = -wk2i * x0r - wk2r * x0i;
+            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
+            x0r = x1r - x3i;
+            x0i = x1i + x3r;
+            a[j1] = wk1r * x0r - wk1i * x0i;
+            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
+            x0r = x1r + x3i;
+            x0i = x1i - x3r;
+            a[j3] = wk3r * x0r - wk3i * x0i;
+            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
+        }
+    }
+}
+
+static void rftfsub(int n, FPType *a, int nc, const FPType *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+static void rftbsub(int n, FPType *a, int nc, const FPType *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    a[1] = -a[1];
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] = yi - a[j + 1];
+        a[k] += yr;
+        a[k + 1] = yi - a[k + 1];
+    }
+    a[m + 1] = -a[m + 1];
+}
+
+static void dctsub(int n, FPType *a, int nc, const FPType *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[j] - wkr * a[k];
+        a[j] = wkr * a[j] + wki * a[k];
+        a[k] = xr;
+    }
+    a[m] *= c[0];
+}
+
+static void dstsub(int n, FPType *a, int nc, const FPType *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr;
+
+    m = n >> 1;
+    ks = nc / n;
+    kk = 0;
+    for (j = 1; j < m; j++) {
+        k = n - j;
+        kk += ks;
+        wkr = c[kk] - c[nc - kk];
+        wki = c[kk] + c[nc - kk];
+        xr = wki * a[k] - wkr * a[j];
+        a[k] = wkr * a[k] + wki * a[j];
+        a[j] = xr;
+    }
+    a[m] *= c[0];
+}
+};
+
+} // namespace r8b
+
+#endif // R8B_FFT4G_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
new file mode 100644
index 00000000..f877862e
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp
@@ -0,0 +1,38 @@
+/**
+ * @file r8bbase.cpp
+ *
+ * @brief C++ file that should be compiled and included into your application.
+ *
+ * This is a single library file that should be compiled and included into the
+ * project that uses the "r8brain-free-src" sample rate converter. This file
+ * defines several global static objects used by the library.
+ *
+ * You may also need to include to your project: the "Kernel32" library
+ * (on Windows) and the "pthread" library on macOS and Linux.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#include "CDSPFIRFilter.h"
+#include "CDSPFracInterpolator.h"
+
+namespace r8b {
+
+#if R8B_FLTTEST
+	int InterpFilterFracs = -1;
+#endif // R8B_FLTTEST
+
+CSyncObject CDSPRealFFTKeeper :: StateSync;
+CDSPRealFFT :: CObjKeeper CDSPRealFFTKeeper :: FFTObjects[ 31 ];
+
+CSyncObject CDSPFIRFilterCache :: StateSync;
+CPtrKeeper< CDSPFIRFilter* > CDSPFIRFilterCache :: Objects;
+int CDSPFIRFilterCache :: ObjCount = 0;
+
+CSyncObject CDSPFracDelayFilterBankCache :: StateSync;
+CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: Objects;
+CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: StaticObjects;
+int CDSPFracDelayFilterBankCache :: ObjCount = 0;
+
+} // namespace r8b
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
new file mode 100644
index 00000000..32054965
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h
@@ -0,0 +1,1206 @@
+//$ nobt
+
+/**
+ * @file r8bbase.h
+ *
+ * @brief The "base" inclusion file with basic classes and functions.
+ *
+ * This is the "base" inclusion file for the "r8brain-free-src" sample rate
+ * converter. This inclusion file contains implementations of several small
+ * utility classes and functions used by the library.
+ *
+ * @mainpage
+ *
+ * @section intro_sec Introduction
+ *
+ * Open source (under the MIT license) high-quality professional audio sample
+ * rate converter (SRC) (resampling) library. Features routines for SRC, both
+ * up- and downsampling, to/from any sample rate, including non-integer sample
+ * rates: it can be also used for conversion to/from SACD sample rate and even
+ * go beyond that. SRC routines were implemented in multi-platform C++ code,
+ * and have a high level of optimality.
+ *
+ * For more information, please visit
+ * https://github.com/avaneev/r8brain-free-src
+ *
+ * @section license License
+ *
+ * The MIT License (MIT)
+ * 
+ * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ * 
+ * Please credit the creator of this library in your documentation in the
+ * following way: "Sample rate converter designed by Aleksey Vaneev of
+ * Voxengo"
+ *
+ * @version 5.6
+ */
+
+#ifndef R8BBASE_INCLUDED
+#define R8BBASE_INCLUDED
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <math.h>
+#include "r8bconf.h"
+
+#if defined( _WIN32 )
+	#include <windows.h>
+#else // defined( _WIN32 )
+	#include <pthread.h>
+#endif // defined( _WIN32 )
+
+#if defined( __SSE4_2__ ) || defined( __SSE4_1__ ) || \
+	defined( __SSSE3__ ) || defined( __SSE3__ ) || defined( __SSE2__ ) || \
+	defined( __x86_64__ ) || defined( _M_AMD64 ) || defined( _M_X64 ) || \
+	defined( __amd64 )
+
+	#include <immintrin.h>
+
+	#define R8B_SSE2
+	#define R8B_SIMD_ISH
+
+#elif defined( __aarch64__ ) || defined( __arm64__ ) || defined( __ARM_NEON )
+
+	#include <arm_neon.h>
+
+	#define R8B_NEON
+
+	#if !defined( __APPLE__ )
+		#define R8B_SIMD_ISH // Shuffled interpolation is inefficient on M1.
+	#endif // !defined( __APPLE__ )
+
+#endif // ARM64
+
+/**
+ * @brief The "r8brain-free-src" library namespace.
+ *
+ * The "r8brain-free-src" sample rate converter library namespace.
+ */
+
+namespace r8b {
+
+/**
+ * Macro defines r8brain-free-src version string.
+ */
+
+#define R8B_VERSION "5.6"
+
+/**
+ * The macro equals to "pi" constant, fits 53-bit floating point mantissa.
+ */
+
+#define R8B_PI 3.14159265358979324
+
+/**
+ * The R8B_2PI macro equals to "2 * pi" constant, fits 53-bit floating point
+ * mantissa.
+ */
+
+#define R8B_2PI 6.28318530717958648
+
+/**
+ * The R8B_3PI macro equals to "3 * pi" constant, fits 53-bit floating point
+ * mantissa.
+ */
+
+#define R8B_3PI 9.42477796076937972
+
+/**
+ * The R8B_PId2 macro equals to "pi divided by 2" constant, fits 53-bit
+ * floating point mantissa.
+ */
+
+#define R8B_PId2 1.57079632679489662
+
+/**
+ * A special macro that defines empty copy-constructor and copy operator with
+ * the "private:" prefix. This macro should be used in classes that cannot be
+ * copied in a standard C++ way.
+ *
+ * This macro does not need to be defined in classes derived from a class
+ * where such macro was already used.
+ *
+ * @param ClassName The name of the class which uses this macro.
+ */
+
+#define R8BNOCTOR( ClassName ) \
+	private: \
+		ClassName( const ClassName& ) { } \
+		ClassName& operator = ( const ClassName& ) { return( *this ); }
+
+/**
+ * @brief The default base class for objects created on heap.
+ *
+ * Class that implements "new" and "delete" operators that use standard
+ * malloc() and free() functions.
+ */
+
+class CStdClassAllocator
+{
+public:
+	/**
+	 * @param n The size of the object, in bytes.
+	 * @param p Pointer to object's pre-allocated memory block.
+	 * @return Pointer to object.
+	 */
+
+	void* operator new( size_t, void* p )
+	{
+		return( p );
+	}
+
+	/**
+	 * @param n The size of the object, in bytes.
+	 * @return Pointer to the allocated memory block for the object.
+	 */
+
+	void* operator new( size_t n )
+	{
+		return( :: malloc( n ));
+	}
+
+	/**
+	 * @param n The size of the object, in bytes.
+	 * @return Pointer to the allocated memory block for the object.
+	 */
+
+	void* operator new[]( size_t n )
+	{
+		return( :: malloc( n ));
+	}
+
+	/**
+	 * Operator frees a previously allocated memory block for the object.
+	 *
+	 * @param p Pointer to the allocated memory block for the object.
+	 */
+
+	void operator delete( void* p )
+	{
+		:: free( p );
+	}
+
+	/**
+	 * Operator frees a previously allocated memory block for the object.
+	 *
+	 * @param p Pointer to the allocated memory block for the object.
+	 */
+
+	void operator delete[]( void* p )
+	{
+		:: free( p );
+	}
+};
+
+/**
+ * @brief The default base class for objects that allocate blocks of memory.
+ *
+ * Memory buffer allocator that uses "stdlib" standard memory functions.
+ */
+
+class CStdMemAllocator : public CStdClassAllocator
+{
+public:
+	/**
+	 * Function allocates memory block.
+	 *
+	 * @param Size The size of the block, in bytes.
+	 * @result The pointer to the allocated block.
+	 */
+
+	static void* allocmem( const size_t Size )
+	{
+		return( :: malloc( Size ));
+	}
+
+	/**
+	 * Function reallocates a previously allocated memory block.
+	 *
+	 * @param p Pointer to the allocated block, can be NULL.
+	 * @param Size The new size of the block, in bytes.
+	 * @result The pointer to the (re)allocated block.
+	 */
+
+	static void* reallocmem( void* p, const size_t Size )
+	{
+		return( :: realloc( p, Size ));
+	}
+
+	/**
+	 * Function frees a previously allocated memory block.
+	 *
+	 * @param p Pointer to the allocated block, can be NULL.
+	 */
+
+	static void freemem( void* p )
+	{
+		:: free( p );
+	}
+};
+
+/**
+ * This function forces the provided "ptr" pointer to be aligned to
+ * "align" bytes. Works with power-of-2 alignments only.
+ *
+ * @param ptr Pointer to align.
+ * @param align Alignment, in bytes, power-of-2.
+ * @tparam T Pointer's element type.
+ */
+
+template< typename T >
+inline T* alignptr( T* const ptr, const uintptr_t align )
+{
+	return( (T*) (( (uintptr_t) ptr + align - 1 ) & ~( align - 1 )));
+}
+
+/**
+ * @brief Templated memory buffer class for element buffers of fixed capacity.
+ *
+ * Fixed memory buffer object. Supports allocation of a fixed amount of
+ * memory. Does not store buffer's capacity - the user should know the actual
+ * capacity of the buffer. Does not feature "internal" storage, memory is
+ * always allocated via the R8B_MEMALLOCCLASS class's functions. Thus the
+ * object of this class can be moved in memory.
+ *
+ * This class manages memory space only - it does not perform element class
+ * construction nor destruction operations.
+ *
+ * This class applies 64-byte memory address alignment to the allocated data
+ * block.
+ *
+ * @tparam T The type of the stored elements (e.g. "double").
+ */
+
+template< typename T >
+class CFixedBuffer : public R8B_MEMALLOCCLASS
+{
+	R8BNOCTOR( CFixedBuffer );
+
+public:
+	CFixedBuffer()
+		: Data0( NULL )
+		, Data( NULL )
+	{
+	}
+
+	/**
+	 * Constructor allocates memory so that the specified number of elements
+	 * of type T can be stored in *this buffer object.
+	 *
+	 * @param Capacity Storage for this number of elements to allocate.
+	 */
+
+	CFixedBuffer( const int Capacity )
+	{
+		R8BASSERT( Capacity > 0 || Capacity == 0 );
+
+		Data0 = allocmem( Capacity * sizeof( T ) + Alignment );
+		Data = (T*) alignptr( Data0, Alignment );
+
+		R8BASSERT( Data0 != NULL || Capacity == 0 );
+	}
+
+	~CFixedBuffer()
+	{
+		freemem( Data0 );
+	}
+
+	/**
+	 * Function allocates memory so that the specified number of elements of
+	 * type T can be stored in *this buffer object.
+	 *
+	 * @param Capacity Storage for this number of elements to allocate.
+	 */
+
+	void alloc( const int Capacity )
+	{
+		R8BASSERT( Capacity > 0 || Capacity == 0 );
+
+		freemem( Data0 );
+		Data0 = allocmem( Capacity * sizeof( T ) + Alignment );
+		Data = (T*) alignptr( Data0, Alignment );
+
+		R8BASSERT( Data0 != NULL || Capacity == 0 );
+	}
+
+	/**
+	 * Function reallocates memory so that the specified number of elements of
+	 * type T can be stored in *this buffer object. Previously allocated data
+	 * is copied to the new memory buffer.
+	 *
+	 * @param PrevCapacity Previous capacity of *this buffer.
+	 * @param NewCapacity Storage for this number of elements to allocate.
+	 */
+
+	void realloc( const int PrevCapacity, const int NewCapacity )
+	{
+		R8BASSERT( PrevCapacity >= 0 );
+		R8BASSERT( NewCapacity >= 0 );
+
+		void* const NewData0 = allocmem( NewCapacity * sizeof( T ) +
+			Alignment );
+
+		T* const NewData = (T*) alignptr( NewData0, Alignment );
+		const size_t CopySize = ( PrevCapacity > NewCapacity ?
+			NewCapacity : PrevCapacity ) * sizeof( T );
+
+		if( CopySize > 0 )
+		{
+			memcpy( NewData, Data, CopySize );
+		}
+
+		freemem( Data0 );
+		Data0 = NewData0;
+		Data = NewData;
+
+		R8BASSERT( Data0 != NULL || NewCapacity == 0 );
+	}
+
+	/**
+	 * Function deallocates a previously allocated buffer.
+	 */
+
+	void free()
+	{
+		freemem( Data0 );
+		Data0 = NULL;
+		Data = NULL;
+	}
+
+	/**
+	 * @return Pointer to the first element of the allocated buffer, NULL if
+	 * not allocated.
+	 */
+
+	T* getPtr() const
+	{
+		return( Data );
+	}
+
+	/**
+	 * @return Pointer to the first element of the allocated buffer, NULL if
+	 * not allocated.
+	 */
+
+	operator T* () const
+	{
+		return( Data );
+	}
+
+private:
+	static const size_t Alignment = 64; ///< Buffer address alignment, in
+		///< bytes.
+		///<
+	void* Data0; ///< Buffer pointer, original unaligned.
+		///<
+	T* Data; ///< Element buffer pointer, aligned.
+		///<
+};
+
+/**
+ * @brief Pointer-to-object "keeper" class with automatic deletion.
+ *
+ * An auxiliary class that can be used for keeping a pointer to object that
+ * should be deleted together with the "keeper" by calling object's "delete"
+ * operator.
+ *
+ * @tparam T Pointer type to operate with, must include the asterisk (e.g.
+ * "CDSPFIRFilter*").
+ */
+
+template< class T >
+class CPtrKeeper
+{
+	R8BNOCTOR( CPtrKeeper );
+
+public:
+	CPtrKeeper()
+		: Object( NULL )
+	{
+	}
+
+	/**
+	 * Constructor assigns a pointer to object to *this keeper.
+	 *
+	 * @param aObject Pointer to object to keep, can be NULL.
+	 * @tparam T2 Object's pointer type.
+	 */
+
+	template< class T2 >
+	CPtrKeeper( T2 const aObject )
+		: Object( aObject )
+	{
+	}
+
+	~CPtrKeeper()
+	{
+		delete Object;
+	}
+
+	/**
+	 * Function assigns a pointer to object to *this keeper. A previously
+	 * keeped pointer will be reset and object deleted.
+	 *
+	 * @param aObject Pointer to object to keep, can be NULL.
+	 * @tparam T2 Object's pointer type.
+	 */
+
+	template< class T2 >
+	void operator = ( T2 const aObject )
+	{
+		reset();
+		Object = aObject;
+	}
+
+	/**
+	 * @return Pointer to keeped object, NULL if no object is being kept.
+	 */
+
+	T operator -> () const
+	{
+		return( Object );
+	}
+
+	/**
+	 * @return Pointer to keeped object, NULL if no object is being kept.
+	 */
+
+	operator T () const
+	{
+		return( Object );
+	}
+
+	/**
+	 * Function resets the keeped pointer and deletes the keeped object.
+	 */
+
+	void reset()
+	{
+		T DelObj = Object;
+		Object = NULL;
+		delete DelObj;
+	}
+
+	/**
+	 * @return Function returns the keeped pointer and resets it in *this
+	 * keeper without object deletion.
+	 */
+
+	T unkeep()
+	{
+		T ResObject = Object;
+		Object = NULL;
+		return( ResObject );
+	}
+
+private:
+	T Object; ///< Pointer to keeped object.
+		///<
+};
+
+/**
+ * @brief Multi-threaded synchronization object class.
+ *
+ * This class uses standard OS thread-locking (mutex) mechanism which is
+ * fairly efficient in most cases.
+ *
+ * The acquire() function can be called recursively, in the same thread, for
+ * this kind of thread-locking mechanism. This will not produce a dead-lock.
+ */
+
+class CSyncObject
+{
+	R8BNOCTOR( CSyncObject );
+
+public:
+	CSyncObject()
+	{
+		#if defined( _WIN32 )
+			InitializeCriticalSectionAndSpinCount( &CritSec, 4000 );
+		#else // defined( _WIN32 )
+			pthread_mutexattr_t MutexAttrs;
+			pthread_mutexattr_init( &MutexAttrs );
+			pthread_mutexattr_settype( &MutexAttrs, PTHREAD_MUTEX_RECURSIVE );
+			pthread_mutex_init( &Mutex, &MutexAttrs );
+			pthread_mutexattr_destroy( &MutexAttrs );
+		#endif // defined( _WIN32 )
+	}
+
+	~CSyncObject()
+	{
+		#if defined( _WIN32 )
+			DeleteCriticalSection( &CritSec );
+		#else // defined( _WIN32 )
+			pthread_mutex_destroy( &Mutex );
+		#endif // defined( _WIN32 )
+	}
+
+	/**
+	 * Function "acquires" *this thread synchronizer object immediately or
+	 * waits until another thread releases it.
+	 */
+
+	void acquire()
+	{
+		#if defined( _WIN32 )
+			EnterCriticalSection( &CritSec );
+		#else // defined( _WIN32 )
+			pthread_mutex_lock( &Mutex );
+		#endif // defined( _WIN32 )
+	}
+
+	/**
+	 * Function "releases" *this previously acquired thread synchronizer
+	 * object.
+	 */
+
+	void release()
+	{
+		#if defined( _WIN32 )
+			LeaveCriticalSection( &CritSec );
+		#else // defined( _WIN32 )
+			pthread_mutex_unlock( &Mutex );
+		#endif // defined( _WIN32 )
+	}
+
+private:
+	#if defined( _WIN32 )
+		CRITICAL_SECTION CritSec; ///< Standard Windows critical section
+			///< structure.
+			///<
+	#else // defined( _WIN32 )
+		pthread_mutex_t Mutex; ///< pthread.h mutex object.
+			///<
+	#endif // defined( _WIN32 )
+};
+
+/**
+ * @brief A "keeper" class for CSyncObject-based synchronization.
+ *
+ * Sync keeper class. The object of this class can be used as auto-init and
+ * auto-deinit object for calling the acquire() and release() functions of an
+ * object of the CSyncObject class. This "keeper" object is best used in
+ * functions as an "automatic" object allocated on the stack, possibly via the
+ * R8BSYNC() macro.
+ */
+
+class CSyncKeeper
+{
+	R8BNOCTOR( CSyncKeeper );
+
+public:
+	CSyncKeeper()
+		: SyncObj( NULL )
+	{
+	}
+
+	/**
+	 * @param aSyncObj Pointer to the sync object which should be used for
+	 * sync'ing, can be NULL.
+	 */
+
+	CSyncKeeper( CSyncObject* const aSyncObj )
+		: SyncObj( aSyncObj )
+	{
+		if( SyncObj != NULL )
+		{
+			SyncObj -> acquire();
+		}
+	}
+
+	/**
+	 * @param aSyncObj Reference to the sync object which should be used for
+	 * sync'ing.
+	 */
+
+	CSyncKeeper( CSyncObject& aSyncObj )
+		: SyncObj( &aSyncObj )
+	{
+		SyncObj -> acquire();
+	}
+
+	~CSyncKeeper()
+	{
+		if( SyncObj != NULL )
+		{
+			SyncObj -> release();
+		}
+	}
+
+protected:
+	CSyncObject* SyncObj; ///< Sync object in use (can be NULL).
+		///<
+};
+
+/**
+ * The synchronization macro. The R8BSYNC( obj ) macro, which creates and
+ * object of the r8b::CSyncKeeper class on stack, should be put before
+ * sections of the code that may potentially change data asynchronously with
+ * other threads at the same time. The R8BSYNC( obj ) macro "acquires" the
+ * synchronization object thus blocking execution of other threads that also
+ * use the same R8BSYNC( obj ) macro. The blocked section begins with the
+ * R8BSYNC( obj ) macro and finishes at the end of the current C++ code block.
+ * Multiple R8BSYNC() macros may be defined from within the same code block.
+ *
+ * @param SyncObject An object of the CSyncObject type that is used for
+ * synchronization.
+ */
+
+#define R8BSYNC( SyncObject ) R8BSYNC_( SyncObject, __LINE__ )
+#define R8BSYNC_( SyncObject, id ) R8BSYNC__( SyncObject, id )
+#define R8BSYNC__( SyncObject, id ) CSyncKeeper SyncKeeper##id( SyncObject )
+
+/**
+ * @brief Sine signal generator class.
+ *
+ * Class implements sine signal generator without biasing.
+ */
+
+class CSineGen
+{
+public:
+	CSineGen()
+	{
+	}
+
+	/**
+	 * Constructor initializes *this sine signal generator, with unity gain
+	 * output.
+	 *
+	 * @param si Sine function increment, in radians.
+	 * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+	 */
+
+	CSineGen( const double si, const double ph )
+		: svalue1( sin( ph ))
+		, svalue2( sin( ph - si ))
+		, sincr( 2.0 * cos( si ))
+	{
+	}
+
+	/**
+	 * Constructor initializes *this sine signal generator.
+	 *
+	 * @param si Sine function increment, in radians.
+	 * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+	 * @param g The overall gain factor, 1.0 for unity gain (-1.0 to 1.0
+	 * amplitude).
+	 */
+
+	CSineGen( const double si, const double ph, const double g )
+		: svalue1( sin( ph ) * g )
+		, svalue2( sin( ph - si ) * g )
+		, sincr( 2.0 * cos( si ))
+	{
+	}
+
+	/**
+	 * Function initializes *this sine signal generator, with unity gain
+	 * output.
+	 *
+	 * @param si Sine function increment, in radians.
+	 * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+	 */
+
+	void init( const double si, const double ph )
+	{
+		svalue1 = sin( ph );
+		svalue2 = sin( ph - si );
+		sincr = 2.0 * cos( si );
+	}
+
+	/**
+	 * Function initializes *this sine signal generator.
+	 *
+	 * @param si Sine function increment, in radians.
+	 * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function.
+	 * @param g The overall gain factor, 1.0 for unity gain (-1.0 to 1.0
+	 * amplitude).
+	 */
+
+	void init( const double si, const double ph, const double g )
+	{
+		svalue1 = sin( ph ) * g;
+		svalue2 = sin( ph - si ) * g;
+		sincr = 2.0 * cos( si );
+	}
+
+	/**
+	 * @return Next value of the sine function, without biasing.
+	 */
+
+	double generate()
+	{
+		const double res = svalue1;
+
+		svalue1 = sincr * res - svalue2;
+		svalue2 = res;
+
+		return( res );
+	}
+
+private:
+	double svalue1; ///< Current sine value.
+		///<
+	double svalue2; ///< Previous sine value.
+		///<
+	double sincr; ///< Sine value increment.
+		///<
+};
+
+/**
+ * @param v Input value.
+ * @return Calculated bit occupancy of the specified input value. Bit
+ * occupancy means how many significant lower bits are necessary to store a
+ * specified value. Function treats the input value as unsigned.
+ */
+
+inline int getBitOccupancy( const int v )
+{
+	static const uint8_t OccupancyTable[] =
+	{
+		1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+		5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+		8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
+	};
+
+	const int tt = v >> 16;
+
+	if( tt != 0 )
+	{
+		const int t = v >> 24;
+
+		return( t != 0 ? 24 + OccupancyTable[ t & 0xFF ] :
+			16 + OccupancyTable[ tt ]);
+	}
+	else
+	{
+		const int t = v >> 8;
+
+		return( t != 0 ? 8 + OccupancyTable[ t ] : OccupancyTable[ v ]);
+	}
+}
+
+/**
+ * Function calculates frequency response of the specified FIR filter at the
+ * specified circular frequency. Phase can be calculated as atan2( im, re ).
+ *
+ * @param flt FIR filter's coefficients.
+ * @param fltlen Number of coefficients (taps) in the filter.
+ * @param th Circular frequency [0; pi].
+ * @param[out] re0 Resulting real part of the complex frequency response.
+ * @param[out] im0 Resulting imaginary part of the complex frequency response.
+ * @param fltlat Filter's latency, in samples.
+ */
+
+inline void calcFIRFilterResponse( const double* flt, int fltlen,
+	const double th, double& re0, double& im0, const int fltlat = 0 )
+{
+	const double sincr = 2.0 * cos( th );
+	double cvalue1;
+	double svalue1;
+
+	if( fltlat == 0 )
+	{
+		cvalue1 = 1.0;
+		svalue1 = 0.0;
+	}
+	else
+	{
+		cvalue1 = cos( -fltlat * th );
+		svalue1 = sin( -fltlat * th );
+	}
+
+	double cvalue2 = cos( -( fltlat + 1 ) * th );
+	double svalue2 = sin( -( fltlat + 1 ) * th );
+
+	double re = 0.0;
+	double im = 0.0;
+
+	while( fltlen > 0 )
+	{
+		re += cvalue1 * flt[ 0 ];
+		im += svalue1 * flt[ 0 ];
+		flt++;
+		fltlen--;
+
+		double tmp = cvalue1;
+		cvalue1 = sincr * cvalue1 - cvalue2;
+		cvalue2 = tmp;
+
+		tmp = svalue1;
+		svalue1 = sincr * svalue1 - svalue2;
+		svalue2 = tmp;
+	}
+
+	re0 = re;
+	im0 = im;
+}
+
+/**
+ * Function calculates frequency response and group delay of the specified FIR
+ * filter at the specified circular frequency. The group delay is calculated
+ * by evaluating the filter's response at close side-band frequencies of "th".
+ *
+ * @param flt FIR filter's coefficients.
+ * @param fltlen Number of coefficients (taps) in the filter.
+ * @param th Circular frequency [0; pi].
+ * @param[out] re Resulting real part of the complex frequency response.
+ * @param[out] im Resulting imaginary part of the complex frequency response.
+ * @param[out] gd Resulting group delay at the specified frequency, in
+ * samples.
+ */
+
+inline void calcFIRFilterResponseAndGroupDelay( const double* const flt,
+	const int fltlen, const double th, double& re, double& im, double& gd )
+{
+	// Calculate response at "th".
+
+	calcFIRFilterResponse( flt, fltlen, th, re, im );
+
+	// Calculate response at close sideband frequencies.
+
+	const int Count = 2;
+	const double thd2 = 1e-9;
+	double ths[ Count ] = { th - thd2, th + thd2 };
+
+	if( ths[ 0 ] < 0.0 )
+	{
+		ths[ 0 ] = 0.0;
+	}
+
+	if( ths[ 1 ] > R8B_PI )
+	{
+		ths[ 1 ] = R8B_PI;
+	}
+
+	double ph1[ Count ];
+	int i;
+
+	for( i = 0; i < Count; i++ )
+	{
+		double re1;
+		double im1;
+
+		calcFIRFilterResponse( flt, fltlen, ths[ i ], re1, im1 );
+		ph1[ i ] = atan2( im1, re1 );
+	}
+
+	if( fabs( ph1[ 1 ] - ph1[ 0 ]) > R8B_PI )
+	{
+		if( ph1[ 1 ] > ph1[ 0 ])
+		{
+			ph1[ 1 ] -= R8B_2PI;
+		}
+		else
+		{
+			ph1[ 1 ] += R8B_2PI;
+		}
+	}
+
+	const double thd = ths[ 1 ] - ths[ 0 ];
+	gd = ( ph1[ 1 ] - ph1[ 0 ]) / thd;
+}
+
+/**
+ * Function normalizes FIR filter so that its frequency response at DC is
+ * equal to DCGain.
+ *
+ * @param[in,out] p Filter coefficients.
+ * @param l Filter length.
+ * @param DCGain Filter's gain at DC (linear, non-decibel value).
+ * @param pstep "p" array step.
+ */
+
+inline void normalizeFIRFilter( double* const p, const int l,
+	const double DCGain, const int pstep = 1 )
+{
+	R8BASSERT( l > 0 );
+	R8BASSERT( pstep != 0 );
+
+	double s = 0.0;
+	double* pp = p;
+	int i = l;
+
+	while( i > 0 )
+	{
+		s += *pp;
+		pp += pstep;
+		i--;
+	}
+
+	s = DCGain / s;
+	pp = p;
+	i = l;
+
+	while( i > 0 )
+	{
+		*pp *= s;
+		pp += pstep;
+		i--;
+	}
+}
+
+/**
+ * Function calculates coefficients used to calculate 3rd order spline
+ * (polynomial) on the equidistant lattice, using 8 points.
+ *
+ * @param[out] c Output coefficients buffer, length = 4.
+ * @param xm3 Point at x-3 position.
+ * @param xm2 Point at x-2 position.
+ * @param xm1 Point at x-1 position.
+ * @param x0 Point at x position.
+ * @param x1 Point at x+1 position.
+ * @param x2 Point at x+2 position.
+ * @param x3 Point at x+3 position.
+ * @param x4 Point at x+4 position.
+ */
+
+inline void calcSpline3p8Coeffs( double* const c, const double xm3,
+	const double xm2, const double xm1, const double x0, const double x1,
+	const double x2, const double x3, const double x4 )
+{
+	c[ 0 ] = x0;
+	c[ 1 ] = ( 61.0 * ( x1 - xm1 ) + 16.0 * ( xm2 - x2 ) +
+		3.0 * ( x3 - xm3 )) / 76.0;
+
+	c[ 2 ] = ( 106.0 * ( xm1 + x1 ) + 10.0 * x3 + 6.0 * xm3 - 3.0 * x4 -
+		29.0 * ( xm2 + x2 ) - 167.0 * x0 ) / 76.0;
+
+	c[ 3 ] = ( 91.0 * ( x0 - x1 ) + 45.0 * ( x2 - xm1 ) +
+		13.0 * ( xm2 - x3 ) + 3.0 * ( x4 - xm3 )) / 76.0;
+}
+
+/**
+ * Function calculates coefficients used to calculate 2rd order spline
+ * (polynomial) on the equidistant lattice, using 8 points. This function is
+ * based on the calcSpline3p8Coeffs() function, but without the 3rd order
+ * coefficient.
+ *
+ * @param[out] c Output coefficients buffer, length = 3.
+ * @param xm3 Point at x-3 position.
+ * @param xm2 Point at x-2 position.
+ * @param xm1 Point at x-1 position.
+ * @param x0 Point at x position.
+ * @param x1 Point at x+1 position.
+ * @param x2 Point at x+2 position.
+ * @param x3 Point at x+3 position.
+ * @param x4 Point at x+4 position.
+ */
+
+inline void calcSpline2p8Coeffs( double* const c, const double xm3,
+	const double xm2, const double xm1, const double x0, const double x1,
+	const double x2, const double x3, const double x4 )
+{
+	c[ 0 ] = x0;
+	c[ 1 ] = ( 61.0 * ( x1 - xm1 ) + 16.0 * ( xm2 - x2 ) +
+		3.0 * ( x3 - xm3 )) / 76.0;
+
+	c[ 2 ] = ( 106.0 * ( xm1 + x1 ) + 10.0 * x3 + 6.0 * xm3 - 3.0 * x4 -
+		29.0 * ( xm2 + x2 ) - 167.0 * x0 ) / 76.0;
+}
+
+/**
+ * Function calculates coefficients used to calculate 3rd order segment
+ * interpolation polynomial on the equidistant lattice, using 4 points.
+ *
+ * @param[out] c Output coefficients buffer, length = 4.
+ * @param[in] y Equidistant point values. Value at offset 1 corresponds to
+ * x=0 point.
+ */
+
+inline void calcSpline3p4Coeffs( double* const c, const double* const y )
+{
+	c[ 0 ] = y[ 1 ];
+	c[ 1 ] = 0.5 * ( y[ 2 ] - y[ 0 ]);
+	c[ 2 ] = y[ 0 ] - 2.5 * y[ 1 ] + y[ 2 ] + y[ 2 ] - 0.5 * y[ 3 ];
+	c[ 3 ] = 0.5 * ( y[ 3 ] - y[ 0 ] ) + 1.5 * ( y[ 1 ] - y[ 2 ]);
+}
+
+/**
+ * Function calculates coefficients used to calculate 3rd order segment
+ * interpolation polynomial on the equidistant lattice, using 6 points.
+ *
+ * @param[out] c Output coefficients buffer, length = 4.
+ * @param[in] y Equidistant point values. Value at offset 2 corresponds to
+ * x=0 point.
+ */
+
+inline void calcSpline3p6Coeffs( double* const c, const double* const y )
+{
+	c[ 0 ] = y[ 2 ];
+	c[ 1 ] = ( 11.0 * ( y[ 3 ] - y[ 1 ]) + 2.0 * ( y[ 0 ] - y[ 4 ])) / 14.0;
+	c[ 2 ] = ( 20.0 * ( y[ 1 ] + y[ 3 ]) + 2.0 * y[ 5 ] - 4.0 * y[ 0 ] -
+		7.0 * y[ 4 ] - 31.0 * y[ 2 ]) / 14.0;
+
+	c[ 3 ] = ( 17.0 * ( y[ 2 ] - y[ 3 ]) + 9.0 * ( y[ 4 ] - y[ 1 ]) +
+		2.0 * ( y[ 0 ] - y[ 5 ])) / 14.0;
+}
+
+#if !defined( min )
+
+/**
+ * @param v1 Value 1.
+ * @param v2 Value 2.
+ * @tparam T Values' type.
+ * @return The minimum of 2 values.
+ */
+
+template< typename T >
+inline T min( const T& v1, const T& v2 )
+{
+	return( v1 < v2 ? v1 : v2 );
+}
+
+#endif // min
+
+#if !defined( max )
+
+/**
+ * @param v1 Value 1.
+ * @param v2 Value 2.
+ * @tparam T Values' type.
+ * @return The maximum of 2 values.
+ */
+
+template< typename T >
+inline T max( const T& v1, const T& v2 )
+{
+	return( v1 > v2 ? v1 : v2 );
+}
+
+#endif // max
+
+/**
+ * Function "clamps" (clips) the specified value so that it is not lesser than
+ * "minv", and not greater than "maxv".
+ *
+ * @param Value Value to clamp.
+ * @param minv Minimal allowed value.
+ * @param maxv Maximal allowed value.
+ * @return "Clamped" value.
+ */
+
+inline double clampr( const double Value, const double minv,
+	const double maxv )
+{
+	if( Value < minv )
+	{
+		return( minv );
+	}
+	else
+	if( Value > maxv )
+	{
+		return( maxv );
+	}
+	else
+	{
+		return( Value );
+	}
+}
+
+/**
+ * @param x Value to square.
+ * @return Squared value of the argument.
+ */
+
+inline double sqr( const double x )
+{
+	return( x * x );
+}
+
+/**
+ * @param v Input value.
+ * @param p Power factor.
+ * @return Returns pow() function's value with input value's sign check.
+ */
+
+inline double pows( const double v, const double p )
+{
+	return( v < 0.0 ? -pow( -v, p ) : pow( v, p ));
+}
+
+/**
+ * @param v Input value.
+ * @return Calculated single-argument Gaussian function of the input value.
+ */
+
+inline double gauss( const double v )
+{
+	return( exp( -( v * v )));
+}
+
+/**
+ * @param v Input value.
+ * @return Calculated inverse hyperbolic sine of the input value.
+ */
+
+inline double asinh( const double v )
+{
+	return( log( v + sqrt( v * v + 1.0 )));
+}
+
+/**
+ * @param x Input value.
+ * @return Calculated zero-th order modified Bessel function of the first kind
+ * of the input value. Approximate value.
+ */
+
+inline double besselI0( const double x )
+{
+	const double ax = fabs( x );
+	double y;
+
+	if( ax < 3.75 )
+	{
+		y = x / 3.75;
+		y *= y;
+
+		return( 1.0 + y * ( 3.5156229 + y * ( 3.0899424 + y * ( 1.2067492 +
+			y * ( 0.2659732 + y * ( 0.360768e-1 + y * 0.45813e-2 ))))));
+	}
+
+	y = 3.75 / ax;
+
+	return( exp( ax ) / sqrt( ax ) * ( 0.39894228 + y * ( 0.1328592e-1 +
+		y * ( 0.225319e-2 + y * ( -0.157565e-2 + y * ( 0.916281e-2 +
+		y * ( -0.2057706e-1 + y * ( 0.2635537e-1 + y * ( -0.1647633e-1 +
+		y * 0.392377e-2 )))))))));
+}
+
+} // namespace r8b
+
+#endif // R8BBASE_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
new file mode 100644
index 00000000..ef08b16b
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h
@@ -0,0 +1,176 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file r8bconf.h
+ *
+ * @brief The "configuration" inclusion file you can modify.
+ *
+ * This is the "configuration" inclusion file for the "r8brain-free-src"
+ * sample rate converter. You may redefine the macros here as you see fit.
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8BCONF_INCLUDED
+#define R8BCONF_INCLUDED
+
+#if !defined( R8B_IPP )
+	/**
+	 * Set the R8B_IPP macro definition to 1 to enable the use of Intel IPP's
+	 * fast Fourier transform functions. Also uncomment and correct the IPP
+	 * header inclusion macros.
+	 *
+	 * Do not forget to call the ippInit() function at the start of the
+	 * application, before using this library's functions.
+	 */
+
+	#define R8B_IPP 0
+
+//	#include <ippcore.h>
+//	#include <ipps.h>
+#endif // !defined( R8B_IPP )
+
+#if !defined( R8BASSERT )
+	/**
+	 * Assertion macro used to check for certain run-time conditions. By
+	 * default no action is taken if assertion fails.
+	 *
+	 * @param e Expression to check.
+	 */
+
+	#define R8BASSERT( e )
+#endif // !defined( R8BASSERT )
+
+#if !defined( R8BCONSOLE )
+	/**
+	 * Console output macro, used to output various resampler status strings,
+	 * including filter design parameters, convolver parameters.
+	 *
+	 * @param e Expression to send to the console, usually consists of a
+	 * standard "printf" format string followed by several parameters
+	 * (__VA_ARGS__).
+	 */
+
+	#define R8BCONSOLE( ... )
+#endif // !defined( R8BCONSOLE )
+
+#if !defined( R8B_BASECLASS )
+	/**
+	 * Macro defines the name of the class from which all classes that are
+	 * designed to be created on heap are derived. The default
+	 * r8b::CStdClassAllocator class uses "stdlib" memory allocation
+	 * functions.
+	 *
+	 * The classes that are best placed on stack or as class members are not
+	 * derived from any class.
+	 */
+
+	#define R8B_BASECLASS :: r8b :: CStdClassAllocator
+#endif // !defined( R8B_BASECLASS )
+
+#if !defined( R8B_MEMALLOCCLASS )
+	/**
+	 * Macro defines the name of the class that implements raw memory
+	 * allocation functions, see the r8b::CStdMemAllocator class for details.
+	 */
+
+	#define R8B_MEMALLOCCLASS :: r8b :: CStdMemAllocator
+#endif // !defined( R8B_MEMALLOCCLASS )
+
+#if !defined( R8B_FILTER_CACHE_MAX )
+	/**
+	 * This macro specifies the number of filters kept in the cache at most.
+	 * The actual number can be higher if many different filters are in use at
+	 * the same time.
+	 */
+
+	#define R8B_FILTER_CACHE_MAX 96
+#endif // !defined( R8B_FILTER_CACHE_MAX )
+
+#if !defined( R8B_FRACBANK_CACHE_MAX )
+	/**
+	 * This macro specifies the number of whole-number stepping fractional
+	 * delay filter banks kept in the cache at most. The actual number can be
+	 * higher if many different filter banks are in use at the same time. As
+	 * filter banks are usually big objects, it is advisable to keep this
+	 * cache size small.
+	 */
+
+	#define R8B_FRACBANK_CACHE_MAX 12
+#endif // !defined( R8B_FRACBANK_CACHE_MAX )
+
+#if !defined( R8B_FLTTEST )
+	/**
+	 * This macro, when equal to 1, enables fractional delay filter bank
+	 * testing: in this mode the filter bank becomes a dynamic member of the
+	 * CDSPFracInterpolator object instead of being a global static object.
+	 */
+
+	#define R8B_FLTTEST 0
+#endif // !defined( R8B_FLTTEST )
+
+#if !defined( R8B_FASTTIMING )
+	/**
+	 * This macro, when equal to 1, enables a fast interpolation sample
+	 * timing technique. This technique improves interpolation performance
+	 * (by around 10%) at the expense of a minor sample timing drift which is
+	 * on the order of 1e-6 samples per 10 billion output samples. This
+	 * setting does not apply to whole-number stepping, if it is in use, as
+	 * this stepping provides zero timing error without performance impact.
+	 * Also does not apply to the cases when a whole-numbered (2X, 3X, etc.)
+	 * resampling is in the actual use.
+	 */
+
+	#define R8B_FASTTIMING 0
+#endif // !defined( R8B_FASTTIMING )
+
+#if !defined( R8B_EXTFFT )
+	/**
+	 * This macro, when equal to 1, extends length of low-pass filters' FFT
+	 * block by a factor of 2, by zero-padding it. This usually improves the
+	 * overall time performance of the resampler at the expense of a higher
+	 * overall latency (initial processing delay). If such delay is not an
+	 * issue, setting this macro to 1 is preferrable. This macro can only have
+	 * a value of 0 or 1.
+	 */
+
+	#define R8B_EXTFFT 0
+#endif // !defined( R8B_EXTFFT )
+
+#if !defined( R8B_PFFFT )
+	/**
+	 * When defined as 1, enables PFFFT routines which are fast, but which
+	 * are limited to 24-bit precision. May be a good choice for time-series
+	 * interpolation, when stop-band attenuation higher than 120 dB is not
+	 * required.
+	 */
+
+	#define R8B_PFFFT 0
+#endif // !defined( R8B_PFFFT )
+
+#if R8B_PFFFT
+	#define R8B_FLOATFFT 1
+#endif // R8B_PFFFT
+
+#if !defined( R8B_PFFFT_DOUBLE )
+	/**
+	 * When defined as 1, enables PFFFT "double" routines which are fast, and
+	 * which provide the highest precision.
+	 */
+
+	#define R8B_PFFFT_DOUBLE 0
+#endif // !defined( R8B_PFFFT_DOUBLE )
+
+#if !defined( R8B_FLOATFFT )
+	/**
+	 * The R8B_FLOATFFT definition enables double-to-float buffer conversions
+	 * for FFT operations, for algorithms that work with "float" values. This
+	 * macro should not be changed from the default "0" here.
+	 */
+
+	#define R8B_FLOATFFT 0
+#endif // !defined( R8B_FLOATFFT )
+
+#endif // R8BCONF_INCLUDED
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
new file mode 100644
index 00000000..2f967b4b
--- /dev/null
+++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h
@@ -0,0 +1,306 @@
+//$ nobt
+//$ nocpp
+
+/**
+ * @file r8butil.h
+ *
+ * @brief The inclusion file with several utility functions.
+ *
+ * This file includes several utility functions used by various utility
+ * programs like "calcErrorTable.cpp".
+ *
+ * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev
+ * See the "LICENSE" file for license.
+ */
+
+#ifndef R8BUTIL_INCLUDED
+#define R8BUTIL_INCLUDED
+
+#include "r8bbase.h"
+
+namespace r8b {
+
+/**
+ * @param re Real part of the frequency response.
+ * @param im Imaginary part of the frequency response.
+ * @return A magnitude response value converted from the linear scale to the
+ * logarithmic scale.
+ */
+
+inline double convertResponseToLog( const double re, const double im )
+{
+	return( 4.34294481903251828 * log( re * re + im * im + 1e-100 ));
+}
+
+/**
+ * An utility function that performs frequency response scanning step update
+ * based on the current magnitude response's slope.
+ *
+ * @param[in,out] step The current scanning step. Will be updated on
+ * function's return. Must be a positive value.
+ * @param curg Squared magnitude response at the current frequency point.
+ * @param[in,out] prevg_log Previous magnitude response, log scale. Will be
+ * updated on function's return.
+ * @param prec Precision multiplier, affects the size of the step.
+ * @param maxstep The maximal allowed step.
+ * @param minstep The minimal allowed step.
+ */
+
+inline void updateScanStep( double& step, const double curg,
+	double& prevg_log, const double prec, const double maxstep,
+	const double minstep = 1e-11 )
+{
+	double curg_log = 4.34294481903251828 * log( curg + 1e-100 );
+	curg_log += ( prevg_log - curg_log ) * 0.7;
+
+	const double slope = fabs( curg_log - prevg_log );
+	prevg_log = curg_log;
+
+	if( slope > 0.0 )
+	{
+		step /= prec * slope;
+		step = max( min( step, maxstep ), minstep );
+	}
+}
+
+/**
+ * Function locates normalized frequency at which the minimum filter gain
+ * is reached. The scanning is performed from lower (left) to higher
+ * (right) frequencies, the whole range is scanned.
+ *
+ * Function expects that the magnitude response is always reducing from lower
+ * to high frequencies, starting at "minth".
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param[out] ming The current minimal gain (squared). On function's return
+ * will contain the minimal gain value found (squared).
+ * @param[out] minth The normalized frequency where the minimal gain is
+ * currently at. On function's return will point to the normalized frequency
+ * where the new minimum was found.
+ * @param thend The ending frequency, inclusive.
+ */
+
+inline void findFIRFilterResponseMinLtoR( const double* const flt,
+	const int fltlen, double& ming, double& minth, const double thend )
+{
+	const double maxstep = minth * 2e-3;
+	double curth = minth;
+	double re;
+	double im;
+	calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+	double prevg_log = convertResponseToLog( re, im );
+	double step = 1e-11;
+
+	while( true )
+	{
+		curth += step;
+
+		if( curth > thend )
+		{
+			break;
+		}
+
+		calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+		const double curg = re * re + im * im;
+
+		if( curg > ming )
+		{
+			ming = curg;
+			minth = curth;
+			break;
+		}
+
+		ming = curg;
+		minth = curth;
+
+		updateScanStep( step, curg, prevg_log, 0.31, maxstep );
+	}
+}
+
+/**
+ * Function locates normalized frequency at which the maximal filter gain
+ * is reached. The scanning is performed from lower (left) to higher
+ * (right) frequencies, the whole range is scanned.
+ *
+ * Note: this function may "stall" in very rare cases if the magnitude
+ * response happens to be "saw-tooth" like, requiring a very small stepping to
+ * be used. If this happens, it may take dozens of seconds to complete.
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param[out] maxg The current maximal gain (squared). On function's return
+ * will contain the maximal gain value (squared).
+ * @param[out] maxth The normalized frequency where the maximal gain is
+ * currently at. On function's return will point to the normalized frequency
+ * where the maximum was reached.
+ * @param thend The ending frequency, inclusive.
+ */
+
+inline void findFIRFilterResponseMaxLtoR( const double* const flt,
+	const int fltlen, double& maxg, double& maxth, const double thend )
+{
+	const double maxstep = maxth * 1e-4;
+	double premaxth = maxth;
+	double premaxg = maxg;
+	double postmaxth = maxth;
+	double postmaxg = maxg;
+
+	double prevth = maxth;
+	double prevg = maxg;
+	double curth = maxth;
+	double re;
+	double im;
+	calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+	double prevg_log = convertResponseToLog( re, im );
+	double step = 1e-11;
+
+	bool WasPeak = false;
+	int AfterPeakCount = 0;
+
+	while( true )
+	{
+		curth += step;
+
+		if( curth > thend )
+		{
+			break;
+		}
+
+		calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im );
+		const double curg = re * re + im * im;
+
+		if( curg > maxg )
+		{
+			premaxth = prevth;
+			premaxg = prevg;
+			maxg = curg;
+			maxth = curth;
+			WasPeak = true;
+			AfterPeakCount = 0;
+		}
+		else
+		if( WasPeak )
+		{
+			if( AfterPeakCount == 0 )
+			{
+				postmaxth = curth;
+				postmaxg = curg;
+			}
+
+			if( AfterPeakCount == 5 )
+			{
+				// Perform 2 approximate binary searches.
+
+				int k;
+
+				for( k = 0; k < 2; k++ )
+				{
+					double l = ( k == 0 ? premaxth : maxth );
+					double curgl = ( k == 0 ? premaxg : maxg );
+					double r = ( k == 0 ? maxth : postmaxth );
+					double curgr = ( k == 0 ? maxg : postmaxg );
+
+					while( true )
+					{
+						const double c = ( l + r ) * 0.5;
+						calcFIRFilterResponse( flt, fltlen, R8B_PI * c,
+							re, im );
+
+						const double curg = re * re + im * im;
+
+						if( curgl > curgr )
+						{
+							r = c;
+							curgr = curg;
+						}
+						else
+						{
+							l = c;
+							curgl = curg;
+						}
+
+						if( r - l < 1e-11 )
+						{
+							if( curgl > curgr )
+							{
+								maxth = l;
+								maxg = curgl;
+							}
+							else
+							{
+								maxth = r;
+								maxg = curgr;
+							}
+
+							break;
+						}
+					}
+				}
+
+				break;
+			}
+
+			AfterPeakCount++;
+		}
+
+		prevth = curth;
+		prevg = curg;
+
+		updateScanStep( step, curg, prevg_log, 1.0, maxstep );
+	}
+}
+
+/**
+ * Function locates normalized frequency at which the specified maximum
+ * filter gain is reached. The scanning is performed from higher (right)
+ * to lower (left) frequencies, scanning stops when the required gain
+ * value was crossed. Function uses an extremely efficient binary search and
+ * thus expects that the magnitude response has the "main lobe" form produced
+ * by windowing, with a minimal pass-band ripple.
+ *
+ * @param flt Filter response.
+ * @param fltlen Filter response's length in samples (taps).
+ * @param maxg Maximal gain (squared).
+ * @param[out] th The current normalized frequency. On function's return will
+ * point to the normalized frequency where "maxg" is reached.
+ * @param thend The leftmost frequency to scan, inclusive.
+ */
+
+inline void findFIRFilterResponseLevelRtoL( const double* const flt,
+	const int fltlen, const double maxg, double& th, const double thend )
+{
+	// Perform exact binary search.
+
+	double l = thend;
+	double r = th;
+
+	while( true )
+	{
+		const double c = ( l + r ) * 0.5;
+
+		if( r - l < 1e-14 )
+		{
+			th = c;
+			break;
+		}
+
+		double re;
+		double im;
+		calcFIRFilterResponse( flt, fltlen, R8B_PI * c, re, im );
+		const double curg = re * re + im * im;
+
+		if( curg > maxg )
+		{
+			l = c;
+		}
+		else
+		{
+			r = c;
+		}
+	}
+}
+
+} // namespace r8b
+
+#endif // R8BUTIL_INCLUDED
author	Jef <jef@targetspot.com>	2024-09-24 08:54:57 -0400
committer	Jef <jef@targetspot.com>	2024-09-24 08:54:57 -0400
commit	20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch)
tree	12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/external_dependencies/openmpt-trunk/include/r8brain
parent	537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff)
download	winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz