diff options
author | Jef <jef@targetspot.com> | 2024-09-24 08:54:57 -0400 |
---|---|---|
committer | Jef <jef@targetspot.com> | 2024-09-24 08:54:57 -0400 |
commit | 20d28e80a5c861a9d5f449ea911ab75b4f37ad0d (patch) | |
tree | 12f17f78986871dd2cfb0a56e5e93b545c1ae0d0 /Src/external_dependencies/openmpt-trunk/include/r8brain | |
parent | 537bcbc86291b32fc04ae4133ce4d7cac8ebe9a7 (diff) | |
download | winamp-20d28e80a5c861a9d5f449ea911ab75b4f37ad0d.tar.gz |
Initial community commit
Diffstat (limited to 'Src/external_dependencies/openmpt-trunk/include/r8brain')
18 files changed, 10313 insertions, 0 deletions
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h new file mode 100644 index 00000000..98297b65 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPBlockConvolver.h @@ -0,0 +1,642 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPBlockConvolver.h + * + * @brief Single-block overlap-save convolution processor class. + * + * This file includes single-block overlap-save convolution processor class. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPBLOCKCONVOLVER_INCLUDED +#define R8B_CDSPBLOCKCONVOLVER_INCLUDED + +#include "CDSPFIRFilter.h" +#include "CDSPProcessor.h" + +namespace r8b { + +/** + * @brief Single-block overlap-save convolution processing class. + * + * Class that implements single-block overlap-save convolution processing. The + * length of a single FFT block used depends on the length of the filter + * kernel. + * + * The rationale behind "single-block" processing is that increasing the FFT + * block length by 2 is more efficient than performing convolution at the same + * FFT block length but using two blocks. + * + * This class also implements a built-in resampling by any whole-number + * factor, which simplifies the overall resampling objects topology. + */ + +class CDSPBlockConvolver : public CDSPProcessor +{ +public: + /** + * Constructor initializes internal variables and constants of *this + * object. + * + * @param aFilter Pre-calculated filter data. Reference to this object is + * inhertied by *this object, and the object will be released when *this + * object is destroyed. If upsampling is used, filter's gain should be + * equal to the upsampling factor. + * @param aUpFactor The upsampling factor, positive value. E.g. value of 2 + * means 2x upsampling should be performed over the input data. + * @param aDownFactor The downsampling factor, positive value. E.g. value + * of 2 means 2x downsampling should be performed over the output data. + * @param PrevLatency Latency, in samples (any value >=0), which was left + * in the output signal by a previous process. This value is usually + * non-zero if the minimum-phase filters are in use. This value is always + * zero if the linear-phase filters are in use. + * @param aDoConsumeLatency "True" if the output latency should be + * consumed. Does not apply to the fractional part of the latency (if such + * part is available). + */ + + CDSPBlockConvolver( CDSPFIRFilter& aFilter, const int aUpFactor, + const int aDownFactor, const double PrevLatency = 0.0, + const bool aDoConsumeLatency = true ) + : Filter( &aFilter ) + , UpFactor( aUpFactor ) + , DownFactor( aDownFactor ) + , DoConsumeLatency( aDoConsumeLatency ) + , BlockLen2( 2 << Filter -> getBlockLenBits() ) + { + R8BASSERT( UpFactor > 0 ); + R8BASSERT( DownFactor > 0 ); + R8BASSERT( PrevLatency >= 0.0 ); + + int fftinBits; + UpShift = getBitOccupancy( UpFactor ) - 1; + + if(( 1 << UpShift ) == UpFactor ) + { + fftinBits = Filter -> getBlockLenBits() + 1 - UpShift; + PrevInputLen = ( Filter -> getKernelLen() - 1 + UpFactor - 1 ) / + UpFactor; + + InputLen = BlockLen2 - PrevInputLen * UpFactor; + } + else + { + UpShift = -1; + fftinBits = Filter -> getBlockLenBits() + 1; + PrevInputLen = Filter -> getKernelLen() - 1; + InputLen = BlockLen2 - PrevInputLen; + } + + OutOffset = ( Filter -> isZeroPhase() ? Filter -> getLatency() : 0 ); + LatencyFrac = Filter -> getLatencyFrac() + PrevLatency * UpFactor; + Latency = (int) LatencyFrac; + const int InLatency = Latency + Filter -> getLatency() - OutOffset; + LatencyFrac -= Latency; + LatencyFrac /= DownFactor; + + Latency += InputLen + Filter -> getLatency(); + + int fftoutBits; + InputDelay = 0; + DownSkipInit = 0; + DownShift = getBitOccupancy( DownFactor ) - 1; + + if(( 1 << DownShift ) == DownFactor ) + { + fftoutBits = Filter -> getBlockLenBits() + 1 - DownShift; + + if( DownFactor > 1 ) + { + if( UpShift > 0 ) + { + // This case never happens in practice due to mutual + // exclusion of "power of 2" DownFactor and UpFactor + // values. + + R8BASSERT( UpShift == 0 ); + } + else + { + // Make sure InputLen is divisible by DownFactor. + + const int ilc = InputLen & ( DownFactor - 1 ); + PrevInputLen += ilc; + InputLen -= ilc; + Latency -= ilc; + + // Correct InputDelay for input and filter's latency. + + const int lc = InLatency & ( DownFactor - 1 ); + + if( lc > 0 ) + { + InputDelay = DownFactor - lc; + } + + if( !DoConsumeLatency ) + { + Latency /= DownFactor; + } + } + } + } + else + { + fftoutBits = Filter -> getBlockLenBits() + 1; + DownShift = -1; + + if( !DoConsumeLatency && DownFactor > 1 ) + { + DownSkipInit = Latency % DownFactor; + Latency /= DownFactor; + } + } + + R8BASSERT( Latency >= 0 ); + + fftin = new CDSPRealFFTKeeper( fftinBits ); + + if( fftoutBits == fftinBits ) + { + fftout = fftin; + } + else + { + ffto2 = new CDSPRealFFTKeeper( fftoutBits ); + fftout = ffto2; + } + + WorkBlocks.alloc( BlockLen2 * 2 + PrevInputLen ); + CurInput = &WorkBlocks[ 0 ]; + CurOutput = &WorkBlocks[ BlockLen2 ]; // CurInput and + // CurOutput are address-aligned. + PrevInput = &WorkBlocks[ BlockLen2 * 2 ]; + + clear(); + + R8BCONSOLE( "CDSPBlockConvolver: flt_len=%i in_len=%i io=%i/%i " + "fft=%i/%i latency=%i\n", Filter -> getKernelLen(), InputLen, + UpFactor, DownFactor, (*fftin) -> getLen(), (*fftout) -> getLen(), + getLatency() ); + } + + virtual ~CDSPBlockConvolver() + { + Filter -> unref(); + } + + virtual int getLatency() const + { + return( DoConsumeLatency ? 0 : Latency ); + } + + virtual double getLatencyFrac() const + { + return( LatencyFrac ); + } + + virtual int getMaxOutLen( const int MaxInLen ) const + { + R8BASSERT( MaxInLen >= 0 ); + + return(( MaxInLen * UpFactor + DownFactor - 1 ) / DownFactor ); + } + + virtual void clear() + { + memset( &PrevInput[ 0 ], 0, PrevInputLen * sizeof( PrevInput[ 0 ])); + + if( DoConsumeLatency ) + { + LatencyLeft = Latency; + } + else + { + LatencyLeft = 0; + + if( DownShift > 0 ) + { + memset( &CurOutput[ 0 ], 0, ( BlockLen2 >> DownShift ) * + sizeof( CurOutput[ 0 ])); + } + else + { + memset( &CurOutput[ BlockLen2 - OutOffset ], 0, OutOffset * + sizeof( CurOutput[ 0 ])); + + memset( &CurOutput[ 0 ], 0, ( InputLen - OutOffset ) * + sizeof( CurOutput[ 0 ])); + } + } + + memset( CurInput, 0, InputDelay * sizeof( CurInput[ 0 ])); + + InDataLeft = InputLen - InputDelay; + UpSkip = 0; + DownSkip = DownSkipInit; + } + + virtual int process( double* ip, int l0, double*& op0 ) + { + R8BASSERT( l0 >= 0 ); + R8BASSERT( UpFactor / DownFactor <= 1 || ip != op0 || l0 == 0 ); + + double* op = op0; + int l = l0 * UpFactor; + l0 = 0; + + while( l > 0 ) + { + const int Offs = InputLen - InDataLeft; + + if( l < InDataLeft ) + { + InDataLeft -= l; + + if( UpShift >= 0 ) + { + memcpy( &CurInput[ Offs >> UpShift ], ip, + ( l >> UpShift ) * sizeof( CurInput[ 0 ])); + } + else + { + copyUpsample( ip, &CurInput[ Offs ], l ); + } + + copyToOutput( Offs - OutOffset, op, l, l0 ); + break; + } + + const int b = InDataLeft; + l -= b; + InDataLeft = InputLen; + int ilu; + + if( UpShift >= 0 ) + { + const int bu = b >> UpShift; + memcpy( &CurInput[ Offs >> UpShift ], ip, + bu * sizeof( CurInput[ 0 ])); + + ip += bu; + ilu = InputLen >> UpShift; + } + else + { + copyUpsample( ip, &CurInput[ Offs ], b ); + ilu = InputLen; + } + + const size_t pil = PrevInputLen * sizeof( CurInput[ 0 ]); + memcpy( &CurInput[ ilu ], PrevInput, pil ); + memcpy( PrevInput, &CurInput[ ilu - PrevInputLen ], pil ); + + (*fftin) -> forward( CurInput ); + + if( UpShift > 0 ) + { + #if R8B_FLOATFFT + mirrorInputSpectrum( (float*) CurInput ); + #else // R8B_FLOATFFT + mirrorInputSpectrum( CurInput ); + #endif // R8B_FLOATFFT + } + + if( Filter -> isZeroPhase() ) + { + (*fftout) -> multiplyBlocksZP( Filter -> getKernelBlock(), + CurInput ); + } + else + { + (*fftout) -> multiplyBlocks( Filter -> getKernelBlock(), + CurInput ); + } + + if( DownShift > 0 ) + { + const int z = BlockLen2 >> DownShift; + + #if R8B_FLOATFFT + float* const kb = (float*) Filter -> getKernelBlock(); + float* const p = (float*) CurInput; + #else // R8B_FLOATFFT + const double* const kb = Filter -> getKernelBlock(); + double* const p = CurInput; + #endif // R8B_FLOATFFT + + p[ 1 ] = kb[ z ] * p[ z ] - kb[ z + 1 ] * p[ z + 1 ]; + } + + (*fftout) -> inverse( CurInput ); + + copyToOutput( Offs - OutOffset, op, b, l0 ); + + double* const tmp = CurInput; + CurInput = CurOutput; + CurOutput = tmp; + } + + return( l0 ); + } + +private: + CDSPFIRFilter* Filter; ///< Filter in use. + ///< + CPtrKeeper< CDSPRealFFTKeeper* > fftin; ///< FFT object 1, used to produce + ///< the input spectrum (can embed the "power of 2" upsampling). + ///< + CPtrKeeper< CDSPRealFFTKeeper* > ffto2; ///< FFT object 2 (can be NULL). + ///< + CDSPRealFFTKeeper* fftout; ///< FFT object used to produce the output + ///< signal (can embed the "power of 2" downsampling), may point to + ///< either "fftin" or "ffto2". + ///< + int UpFactor; ///< Upsampling factor. + ///< + int DownFactor; ///< Downsampling factor. + ///< + bool DoConsumeLatency; ///< "True" if the output latency should be + ///< consumed. Does not apply to the fractional part of the latency + ///< (if such part is available). + ///< + int BlockLen2; ///< Equals block length * 2. + ///< + int OutOffset; ///< Output offset, depends on filter's introduced latency. + ///< + int PrevInputLen; ///< The length of previous input data saved, used for + ///< overlap. + ///< + int InputLen; ///< The number of input samples that should be accumulated + ///< before the input block is processed. + ///< + int Latency; ///< Processing latency, in samples. + ///< + double LatencyFrac; ///< Fractional latency, in samples, that is left in + ///< the output signal. + ///< + int UpShift; ///< "Power of 2" upsampling shift. Equals -1 if UpFactor is + ///< not a "power of 2" value. Equals 0 if UpFactor equals 1. + ///< + int DownShift; ///< "Power of 2" downsampling shift. Equals -1 if + ///< DownFactor is not a "power of 2". Equals 0 if DownFactor equals + ///< 1. + ///< + int InputDelay; ///< Additional input delay, in samples. Used to make the + ///< output delay divisible by DownShift. Used only if UpShift <= 0 + ///< and DownShift > 0. + ///< + CFixedBuffer< double > WorkBlocks; ///< Previous input data, input and + ///< output data blocks, overall capacity = BlockLen2 * 2 + + ///< PrevInputLen. Used in the flip-flop manner. + ///< + double* PrevInput; ///< Previous input data buffer, capacity = BlockLen. + ///< + double* CurInput; ///< Input data buffer, capacity = BlockLen2. + ///< + double* CurOutput; ///< Output data buffer, capacity = BlockLen2. + ///< + int InDataLeft; ///< Samples left before processing input and output FFT + ///< blocks. Initialized to InputLen on clear. + ///< + int LatencyLeft; ///< Latency in samples left to skip. + ///< + int UpSkip; ///< The current upsampling sample skip (value in the range + ///< 0 to UpFactor - 1). + ///< + int DownSkip; ///< The current downsampling sample skip (value in the + ///< range 0 to DownFactor - 1). Not used if DownShift > 0. + ///< + int DownSkipInit; ///< The initial DownSkip value after clear(). + ///< + + /** + * Function copies samples from the input buffer to the output buffer + * while inserting zeros inbetween them to perform the whole-numbered + * upsampling. + * + * @param[in,out] ip0 Input buffer. Will be advanced on function's return. + * @param[out] op Output buffer. + * @param l0 The number of samples to fill in the output buffer, including + * both input samples and interpolation (zero) samples. + */ + + void copyUpsample( double*& ip0, double* op, int l0 ) + { + int b = min( UpSkip, l0 ); + + if( b != 0 ) + { + UpSkip -= b; + l0 -= b; + + *op = 0.0; + op++; + + while( --b != 0 ) + { + *op = 0.0; + op++; + } + } + + double* ip = ip0; + const int upf = UpFactor; + int l = l0 / upf; + int lz = l0 - l * upf; + + if( upf == 3 ) + { + while( l != 0 ) + { + op[ 0 ] = *ip; + op[ 1 ] = 0.0; + op[ 2 ] = 0.0; + ip++; + op += upf; + l--; + } + } + else + if( upf == 5 ) + { + while( l != 0 ) + { + op[ 0 ] = *ip; + op[ 1 ] = 0.0; + op[ 2 ] = 0.0; + op[ 3 ] = 0.0; + op[ 4 ] = 0.0; + ip++; + op += upf; + l--; + } + } + else + { + const size_t zc = ( upf - 1 ) * sizeof( op[ 0 ]); + + while( l != 0 ) + { + *op = *ip; + ip++; + + memset( op + 1, 0, zc ); + op += upf; + l--; + } + } + + if( lz != 0 ) + { + *op = *ip; + ip++; + op++; + + UpSkip = upf - lz; + + while( --lz != 0 ) + { + *op = 0.0; + op++; + } + } + + ip0 = ip; + } + + /** + * Function copies sample data from the CurOutput buffer to the specified + * output buffer and advances its position. If necessary, this function + * "consumes" latency and performs downsampling. + * + * @param Offs CurOutput buffer offset, can be negative. + * @param[out] op0 Output buffer pointer, will be advanced. + * @param b The number of output samples available, including those which + * are discarded during whole-number downsampling. + * @param l0 The overall output sample count, will be increased. + */ + + void copyToOutput( int Offs, double*& op0, int b, int& l0 ) + { + if( Offs < 0 ) + { + if( Offs + b <= 0 ) + { + Offs += BlockLen2; + } + else + { + copyToOutput( Offs + BlockLen2, op0, -Offs, l0 ); + b += Offs; + Offs = 0; + } + } + + if( LatencyLeft != 0 ) + { + if( LatencyLeft >= b ) + { + LatencyLeft -= b; + return; + } + + Offs += LatencyLeft; + b -= LatencyLeft; + LatencyLeft = 0; + } + + const int df = DownFactor; + + if( DownShift > 0 ) + { + int Skip = Offs & ( df - 1 ); + + if( Skip > 0 ) + { + Skip = df - Skip; + b -= Skip; + Offs += Skip; + } + + if( b > 0 ) + { + b = ( b + df - 1 ) >> DownShift; + memcpy( op0, &CurOutput[ Offs >> DownShift ], + b * sizeof( op0[ 0 ])); + + op0 += b; + l0 += b; + } + } + else + { + if( df > 1 ) + { + const double* ip = &CurOutput[ Offs + DownSkip ]; + int l = ( b + df - 1 - DownSkip ) / df; + DownSkip += l * df - b; + + double* op = op0; + l0 += l; + op0 += l; + + while( l > 0 ) + { + *op = *ip; + ip += df; + op++; + l--; + } + } + else + { + memcpy( op0, &CurOutput[ Offs ], b * sizeof( op0[ 0 ])); + op0 += b; + l0 += b; + } + } + } + + /** + * Function performs input spectrum mirroring which is used to perform a + * fast "power of 2" upsampling. Such mirroring is equivalent to insertion + * of zeros into the input signal. + * + * @param p Spectrum data block to mirror. + * @tparam T Buffer's element type. + */ + + template< typename T > + void mirrorInputSpectrum( T* const p ) + { + const int bl1 = BlockLen2 >> UpShift; + const int bl2 = bl1 + bl1; + int i; + + for( i = bl1 + 2; i < bl2; i += 2 ) + { + p[ i ] = p[ bl2 - i ]; + p[ i + 1 ] = -p[ bl2 - i + 1 ]; + } + + p[ bl1 ] = p[ 1 ]; + p[ bl1 + 1 ] = 0.0; + p[ 1 ] = p[ 0 ]; + + for( i = 1; i < UpShift; i++ ) + { + const int z = bl1 << i; + memcpy( &p[ z ], p, z * sizeof( p[ 0 ])); + p[ z + 1 ] = 0.0; + } + } +}; + +} // namespace r8b + +#endif // R8B_CDSPBLOCKCONVOLVER_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h new file mode 100644 index 00000000..e2a23768 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFIRFilter.h @@ -0,0 +1,719 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPFIRFilter.h + * + * @brief FIR filter generator and filter cache classes. + * + * This file includes low-pass FIR filter generator and filter cache. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPFIRFILTER_INCLUDED +#define R8B_CDSPFIRFILTER_INCLUDED + +#include "CDSPSincFilterGen.h" +#include "CDSPRealFFT.h" + +namespace r8b { + +/** + * Enumeration of filter's phase responses. + */ + +enum EDSPFilterPhaseResponse +{ + fprLinearPhase = 0, ///< Linear-phase response. Features a linear-phase + ///< high-latency response, with the latency expressed as integer + ///< value. + ///< + fprMinPhase ///< Minimum-phase response. Features a minimal latency + ///< response, but the response's phase is non-linear. The latency is + ///< usually expressed as non-integer value, and usually is small, but + ///< is never equal to zero. The minimum-phase filter is transformed + ///< from the linear-phase filter. The transformation has precision + ///< limits which may skew both the -3 dB point and attenuation of the + ///< filter being transformed: as it was measured, the skew happens + ///< purely at random, and in most cases it is within tolerable range. + ///< In a small (1%) random subset of cases the skew is bigger and + ///< cannot be predicted. Minimum-phase transform requires 64-bit + ///< floating point FFT precision, results with 32-bit float FFT are + ///< far from optimal. + ///< +}; + +/** + * @brief Calculation and storage class for FIR filters. + * + * Class that implements calculation and storing of a FIR filter (currently + * contains low-pass filter calculation routine designed for sample rate + * conversion). Objects of this class cannot be created directly, but can be + * obtained via the CDSPFilterCache::getLPFilter() static function. + */ + +class CDSPFIRFilter : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPFIRFilter ); + + friend class CDSPFIRFilterCache; + +public: + ~CDSPFIRFilter() + { + R8BASSERT( RefCount == 0 ); + + delete Next; + } + + /** + * @return The minimal allowed low-pass filter's transition band, in + * percent. + */ + + static double getLPMinTransBand() + { + return( 0.5 ); + } + + /** + * @return The maximal allowed low-pass filter's transition band, in + * percent. + */ + + static double getLPMaxTransBand() + { + return( 45.0 ); + } + + /** + * @return The minimal allowed low-pass filter's stop-band attenuation, in + * decibel. + */ + + static double getLPMinAtten() + { + return( 49.0 ); + } + + /** + * @return The maximal allowed low-pass filter's stop-band attenuation, in + * decibel. + */ + + static double getLPMaxAtten() + { + return( 218.0 ); + } + + /** + * @return "True" if kernel block of *this filter has zero-phase response. + */ + + bool isZeroPhase() const + { + return( IsZeroPhase ); + } + + /** + * @return Filter's latency, in samples (integer part). + */ + + int getLatency() const + { + return( Latency ); + } + + /** + * @return Filter's latency, in samples (fractional part). Always zero for + * linear-phase filters. + */ + + double getLatencyFrac() const + { + return( LatencyFrac ); + } + + /** + * @return Filter kernel length, in samples. Not to be confused with the + * block length. + */ + + int getKernelLen() const + { + return( KernelLen ); + } + + /** + * @return Filter's block length, espressed as Nth power of 2. The actual + * length is twice as large due to zero-padding. + */ + + int getBlockLenBits() const + { + return( BlockLenBits ); + } + + /** + * @return Filter's kernel block, in complex-numbered form obtained via + * the CDSPRealFFT::forward() function call, zero-padded, gain-adjusted + * with the CDSPRealFFT::getInvMulConst() * ReqGain constant, immediately + * suitable for convolution. Kernel block may have "zero-phase" response, + * depending on the isZeroPhase() function's result. + */ + + const double* getKernelBlock() const + { + return( KernelBlock ); + } + + /** + * This function should be called when the filter obtained via the + * filter cache is no longer needed. + */ + + void unref(); + +private: + double ReqNormFreq; ///< Required normalized frequency, 0 to 1 inclusive. + ///< + double ReqTransBand; ///< Required transition band in percent, as passed + ///< by the user. + ///< + double ReqAtten; ///< Required stop-band attenuation in decibel, as passed + ///< by the user (positive value). + ///< + EDSPFilterPhaseResponse ReqPhase; ///< Required filter's phase response. + ///< + double ReqGain; ///< Required overall filter's gain. + ///< + CDSPFIRFilter* Next; ///< Next FIR filter in cache's list. + ///< + int RefCount; ///< The number of references made to *this FIR filter. + ///< + bool IsZeroPhase; ///< "True" if kernel block of *this filter has + ///< zero-phase response. + ///< + int Latency; ///< Filter's latency in samples (integer part). + ///< + double LatencyFrac; ///< Filter's latency in samples (fractional part). + ///< + int KernelLen; ///< Filter kernel length, in samples. + ///< + int BlockLenBits; ///< Block length used to store *this FIR filter, + ///< expressed as Nth power of 2. This value is used directly by the + ///< convolver. + ///< + CFixedBuffer< double > KernelBlock; ///< FIR filter buffer, capacity + ///< equals to 1 << ( BlockLenBits + 1 ). Second part of the buffer + ///< contains zero-padding to allow alias-free convolution. + ///< Address-aligned. + ///< + + CDSPFIRFilter() + : RefCount( 1 ) + { + } + + /** + * Function builds filter kernel based on the "Req" parameters. + * + * @param ExtAttenCorrs External attentuation correction table, for + * internal use. + */ + + void buildLPFilter( const double* const ExtAttenCorrs ) + { + const double tb = ReqTransBand * 0.01; + double pwr; + double fo1; + double hl; + double atten = -ReqAtten; + + if( tb >= 0.25 ) + { + if( ReqAtten >= 117.0 ) + { + atten -= 1.60; + } + else + if( ReqAtten >= 60.0 ) + { + atten -= 1.91; + } + else + { + atten -= 2.25; + } + } + else + if( tb >= 0.10 ) + { + if( ReqAtten >= 117.0 ) + { + atten -= 0.69; + } + else + if( ReqAtten >= 60.0 ) + { + atten -= 0.73; + } + else + { + atten -= 1.13; + } + } + else + { + if( ReqAtten >= 117.0 ) + { + atten -= 0.21; + } + else + if( ReqAtten >= 60.0 ) + { + atten -= 0.25; + } + else + { + atten -= 0.36; + } + } + + static const int AttenCorrCount = 264; + static const double AttenCorrMin = 49.0; + static const double AttenCorrDiff = 176.25; + int AttenCorr = (int) floor(( -atten - AttenCorrMin ) * + AttenCorrCount / AttenCorrDiff + 0.5 ); + + AttenCorr = min( AttenCorrCount, max( 0, AttenCorr )); + + if( ExtAttenCorrs != NULL ) + { + atten -= ExtAttenCorrs[ AttenCorr ]; + } + else + if( tb >= 0.25 ) + { + static const double AttenCorrScale = 101.0; + static const signed char AttenCorrs[] = { + -127, -127, -125, -125, -122, -119, -115, -110, -104, -97, + -91, -82, -75, -24, -16, -6, 4, 14, 24, 29, 30, 32, 37, 44, + 51, 57, 63, 67, 65, 50, 53, 56, 58, 60, 63, 64, 66, 68, 74, + 77, 78, 78, 78, 79, 79, 60, 60, 60, 61, 59, 52, 47, 41, 36, + 30, 24, 17, 9, 0, -8, -10, -11, -14, -13, -18, -25, -31, -38, + -44, -50, -57, -63, -68, -74, -81, -89, -96, -101, -104, -107, + -109, -110, -86, -84, -85, -82, -80, -77, -73, -67, -62, -55, + -48, -42, -35, -30, -20, -11, -2, 5, 6, 6, 7, 11, 16, 21, 26, + 34, 41, 46, 49, 52, 55, 56, 48, 49, 51, 51, 52, 52, 52, 52, + 52, 51, 51, 50, 47, 47, 50, 48, 46, 42, 38, 35, 31, 27, 24, + 20, 16, 12, 11, 12, 10, 8, 4, -1, -6, -11, -16, -19, -17, -21, + -24, -27, -32, -34, -37, -38, -40, -41, -40, -40, -42, -41, + -44, -45, -43, -41, -34, -31, -28, -24, -21, -18, -14, -10, + -5, -1, 2, 5, 8, 7, 4, 3, 2, 2, 4, 6, 8, 9, 9, 10, 10, 10, 10, + 9, 8, 9, 11, 14, 13, 12, 11, 10, 8, 7, 6, 5, 3, 2, 2, -1, -1, + -3, -3, -4, -4, -5, -4, -6, -7, -9, -5, -1, -1, 0, 1, 0, -2, + -3, -4, -5, -5, -8, -13, -13, -13, -12, -13, -12, -11, -11, + -9, -8, -7, -5, -3, -1, 2, 4, 6, 9, 10, 11, 14, 18, 21, 24, + 27, 30, 34, 37, 37, 39, 40 }; + + atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale; + } + else + if( tb >= 0.10 ) + { + static const double AttenCorrScale = 210.0; + static const signed char AttenCorrs[] = { + -113, -118, -122, -125, -126, -97, -95, -92, -92, -89, -82, + -75, -69, -48, -42, -36, -30, -22, -14, -5, -2, 1, 6, 13, 22, + 28, 35, 41, 48, 55, 56, 56, 61, 65, 71, 77, 81, 83, 85, 85, + 74, 74, 73, 72, 71, 70, 68, 64, 59, 56, 49, 52, 46, 42, 36, + 32, 26, 20, 13, 7, -2, -6, -10, -15, -20, -27, -33, -38, -44, + -43, -48, -53, -57, -63, -69, -73, -75, -79, -81, -74, -76, + -77, -77, -78, -81, -80, -80, -78, -76, -65, -62, -59, -56, + -51, -48, -44, -38, -33, -25, -19, -13, -5, -1, 2, 7, 13, 17, + 21, 25, 30, 35, 40, 45, 50, 53, 56, 57, 55, 58, 59, 62, 64, + 67, 67, 68, 68, 62, 61, 61, 59, 59, 57, 57, 55, 52, 48, 42, + 38, 35, 31, 26, 20, 15, 13, 10, 7, 3, -2, -8, -13, -17, -23, + -28, -34, -37, -40, -41, -45, -48, -50, -53, -57, -59, -62, + -63, -63, -57, -57, -56, -56, -54, -54, -53, -49, -48, -41, + -38, -33, -31, -26, -23, -18, -12, -9, -7, -7, -3, 0, 5, 9, + 14, 16, 20, 22, 21, 23, 25, 27, 28, 29, 34, 33, 35, 33, 31, + 30, 29, 29, 26, 26, 25, 24, 20, 19, 15, 10, 8, 4, 1, -2, -6, + -10, -16, -19, -23, -26, -27, -30, -34, -39, -43, -47, -51, + -52, -54, -56, -58, -59, -62, -63, -66, -65, -65, -64, -59, + -57, -54, -52, -48, -44, -42, -37, -32, -22, -17, -10, -3, 5, + 13, 22, 30, 40, 50, 60, 72 }; + + atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale; + } + else + { + static const double AttenCorrScale = 196.0; + static const signed char AttenCorrs[] = { + -15, -17, -20, -20, -20, -21, -20, -16, -17, -18, -17, -13, + -12, -11, -9, -7, -5, -4, -1, 1, 3, 4, 5, 6, 7, 9, 9, 10, 10, + 10, 11, 11, 11, 12, 12, 12, 10, 11, 10, 10, 8, 10, 11, 10, 11, + 11, 13, 14, 15, 19, 27, 26, 23, 18, 14, 8, 4, -2, -6, -12, + -17, -23, -28, -33, -37, -42, -46, -49, -53, -57, -60, -61, + -64, -65, -67, -66, -66, -66, -65, -64, -61, -59, -56, -52, + -48, -42, -38, -31, -27, -19, -13, -7, -1, 8, 14, 22, 29, 37, + 45, 52, 59, 66, 73, 80, 86, 91, 96, 100, 104, 108, 111, 114, + 115, 117, 118, 120, 120, 118, 117, 114, 113, 111, 107, 103, + 99, 95, 89, 84, 78, 72, 66, 60, 52, 44, 37, 30, 21, 14, 6, -3, + -11, -18, -26, -34, -43, -51, -58, -65, -73, -78, -85, -90, + -97, -102, -107, -113, -115, -118, -121, -125, -125, -126, + -126, -126, -125, -124, -121, -119, -115, -111, -109, -101, + -102, -95, -88, -81, -73, -67, -63, -54, -47, -40, -33, -26, + -18, -11, -5, 2, 8, 14, 19, 25, 31, 36, 37, 43, 47, 49, 51, + 52, 57, 57, 56, 57, 58, 58, 58, 57, 56, 52, 52, 50, 48, 44, + 41, 39, 37, 33, 31, 26, 24, 21, 18, 14, 11, 8, 4, 2, -2, -5, + -7, -9, -11, -13, -15, -16, -18, -19, -20, -23, -24, -24, -25, + -27, -26, -27, -29, -30, -31, -32, -35, -36, -39, -40, -44, + -46, -51, -54, -59, -63, -69, -76, -83, -91, -98 }; + + atten -= AttenCorrs[ AttenCorr ] / AttenCorrScale; + } + + pwr = 7.43932822146293e-8 * sqr( atten ) + 0.000102747434588003 * + cos( 0.00785021930010397 * atten ) * cos( 0.633854318781239 + + 0.103208573657699 * atten ) - 0.00798132247867036 - + 0.000903555213543865 * atten - 0.0969365532127236 * exp( + 0.0779275237937911 * atten ) - 1.37304948662012e-5 * atten * cos( + 0.00785021930010397 * atten ); + + if( pwr <= 0.067665322581 ) + { + if( tb >= 0.25 ) + { + hl = 2.6778150875894 / tb + 300.547590563091 * atan( atan( + 2.68959772209918 * pwr )) / ( 5.5099277187035 * tb - tb * + tanh( cos( asinh( atten )))); + + fo1 = 0.987205355829873 * tb + 1.00011788929851 * atan2( + -0.321432067051302 - 6.19131357321578 * sqrt( pwr ), + hl + -1.14861472207245 / ( hl - 14.1821147585957 ) + pow( + 0.9521145021664, pow( atan2( 1.12018764830637, tb ), + 2.10988901686912 * hl - 20.9691278378345 ))); + } + else + if( tb >= 0.10 ) + { + hl = ( 1.56688617018066 + 142.064321294568 * pwr + + 0.00419441117131136 * cos( 243.633511747297 * pwr ) - + 0.022953443903576 * atten - 0.026629568860284 * cos( + 127.715550622571 * pwr )) / tb; + + fo1 = 0.982299356642411 * tb + 0.999441744774215 * asinh(( + -0.361783054039583 - 5.80540593623676 * sqrt( pwr )) / + hl ); + } + else + { + hl = ( 2.45739657014937 + 269.183679500541 * pwr * cos( + 5.73225668178813 + atan2( cosh( 0.988861169868941 - + 17.2201556280744 * pwr ), 1.08340138240431 * pwr ))) / tb; + + fo1 = 2.291956939 * tb + 0.01942450693 * sqr( tb ) * hl - + 4.67538973161837 * pwr * tb - 1.668433124 * tb * + pow( pwr, pwr ); + } + } + else + { + if( tb >= 0.25 ) + { + hl = ( 1.50258368698213 + 158.556968859477 * asinh( pwr ) * + tanh( 57.9466246871383 * tanh( pwr )) - + 0.0105440479814834 * atten ) / tb; + + fo1 = 0.994024401639321 * tb + ( -0.236282717577215 - + 6.8724924545387 * sqrt( sin( pwr ))) / hl; + } + else + if( tb >= 0.10 ) + { + hl = ( 1.50277377248945 + 158.222625721046 * asinh( pwr ) * + tanh( 1.02875299001715 + 42.072277322604 * pwr ) - + 0.0108380943845632 * atten ) / tb; + + fo1 = 0.992539376734551 * tb + ( -0.251747813037178 - + 6.74159892452584 * sqrt( tanh( tanh( tan( pwr ))))) / hl; + } + else + { + hl = ( 1.15990238966306 * pwr - 5.02124037125213 * sqr( + pwr ) - 0.158676856669827 * atten * cos( 1.1609073390614 * + pwr - 6.33932586197475 * pwr * sqr( pwr ))) / tb; + + fo1 = 0.867344453126885 * tb + 0.052693817907757 * tb * log( + pwr ) + 0.0895511178735932 * tb * atan( 59.7538527741309 * + pwr ) - 0.0745653568081453 * pwr * tb; + } + } + + double WinParams[ 2 ]; + WinParams[ 0 ] = 125.0; + WinParams[ 1 ] = pwr; + + CDSPSincFilterGen sinc; + sinc.Len2 = 0.25 * hl / ReqNormFreq; + sinc.Freq1 = 0.0; + sinc.Freq2 = R8B_PI * ( 1.0 - fo1 ) * ReqNormFreq; + sinc.initBand( CDSPSincFilterGen :: wftKaiser, WinParams, true ); + + KernelLen = sinc.KernelLen; + BlockLenBits = getBitOccupancy( KernelLen - 1 ) + R8B_EXTFFT; + const int BlockLen = 1 << BlockLenBits; + + KernelBlock.alloc( BlockLen * 2 ); + sinc.generateBand( &KernelBlock[ 0 ], + &CDSPSincFilterGen :: calcWindowKaiser ); + + if( ReqPhase == fprLinearPhase ) + { + IsZeroPhase = true; + Latency = sinc.fl2; + LatencyFrac = 0.0; + } + else + { + IsZeroPhase = false; + double DCGroupDelay; + + calcMinPhaseTransform( &KernelBlock[ 0 ], KernelLen, 16, false, + &DCGroupDelay ); + + Latency = (int) DCGroupDelay; + LatencyFrac = DCGroupDelay - Latency; + } + + CDSPRealFFTKeeper ffto( BlockLenBits + 1 ); + + if( IsZeroPhase ) + { + // Calculate DC gain. + + double s = 0.0; + int i; + + for( i = 0; i < KernelLen; i++ ) + { + s += KernelBlock[ i ]; + } + + s = ffto -> getInvMulConst() * ReqGain / s; + + // Time-shift the filter so that zero-phase response is produced. + // Simultaneously multiply by "s". + + for( i = 0; i <= sinc.fl2; i++ ) + { + KernelBlock[ i ] = KernelBlock[ sinc.fl2 + i ] * s; + } + + for( i = 1; i <= sinc.fl2; i++ ) + { + KernelBlock[ BlockLen * 2 - i ] = KernelBlock[ i ]; + } + + memset( &KernelBlock[ sinc.fl2 + 1 ], 0, + ( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ])); + + ffto -> forward( KernelBlock ); + ffto -> convertToZP( KernelBlock ); + } + else + { + normalizeFIRFilter( &KernelBlock[ 0 ], KernelLen, + ffto -> getInvMulConst() * ReqGain ); + + memset( &KernelBlock[ KernelLen ], 0, + ( BlockLen * 2 - KernelLen ) * sizeof( KernelBlock[ 0 ])); + + ffto -> forward( KernelBlock ); + } + + R8BCONSOLE( "CDSPFIRFilter: flt_len=%i latency=%i nfreq=%.4f " + "tb=%.1f att=%.1f gain=%.3f\n", KernelLen, Latency, + ReqNormFreq, ReqTransBand, ReqAtten, ReqGain ); + } +}; + +/** + * @brief FIR filter cache class. + * + * Class that implements cache for calculated FIR filters. The required FIR + * filter should be obtained via the getLPFilter() static function. + */ + +class CDSPFIRFilterCache : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPFIRFilterCache ); + + friend class CDSPFIRFilter; + +public: + /** + * @return The number of filters present in the cache now. This value can + * be monitored for debugging "forgotten" filters. + */ + + static int getObjCount() + { + R8BSYNC( StateSync ); + + return( ObjCount ); + } + + /** + * Function calculates or returns reference to a previously calculated + * (cached) low-pass FIR filter. Note that the real transition band and + * attenuation achieved by the filter varies with the magnitude of the + * required attenuation, and are never 100% exact. + * + * @param ReqNormFreq Required normalized frequency, in the range 0 to 1, + * inclusive. This is the point after which the stop-band spans. + * @param ReqTransBand Required transition band, in percent of the + * 0 to ReqNormFreq spectral bandwidth, in the range + * CDSPFIRFilter::getLPMinTransBand() to + * CDSPFIRFilter::getLPMaxTransBand(), inclusive. The transition band + * specifies the part of the spectrum between the -3 dB and ReqNormFreq + * points. The real resulting -3 dB point varies in the range from -3.00 + * to -3.05 dB, but is generally very close to -3 dB. + * @param ReqAtten Required stop-band attenuation in decibel, in the range + * CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(), + * inclusive. Note that the actual stop-band attenuation of the resulting + * filter may be 0.40-4.46 dB higher. + * @param ReqPhase Required filter's phase response. + * @param ReqGain Required overall filter's gain (1.0 for unity gain). + * @param AttenCorrs Attentuation correction table, to pass to the filter + * generation function. For internal use. + * @return A reference to a new or a previously calculated low-pass FIR + * filter object with the required characteristics. A reference count is + * incremented in the returned filter object which should be released + * after use via the CDSPFIRFilter::unref() function. + */ + + static CDSPFIRFilter& getLPFilter( const double ReqNormFreq, + const double ReqTransBand, const double ReqAtten, + const EDSPFilterPhaseResponse ReqPhase, const double ReqGain, + const double* const AttenCorrs = NULL ) + { + R8BASSERT( ReqNormFreq > 0.0 && ReqNormFreq <= 1.0 ); + R8BASSERT( ReqTransBand >= CDSPFIRFilter :: getLPMinTransBand() ); + R8BASSERT( ReqTransBand <= CDSPFIRFilter :: getLPMaxTransBand() ); + R8BASSERT( ReqAtten >= CDSPFIRFilter :: getLPMinAtten() ); + R8BASSERT( ReqAtten <= CDSPFIRFilter :: getLPMaxAtten() ); + R8BASSERT( ReqGain > 0.0 ); + + R8BSYNC( StateSync ); + + CDSPFIRFilter* PrevObj = NULL; + CDSPFIRFilter* CurObj = Objects; + + while( CurObj != NULL ) + { + if( CurObj -> ReqNormFreq == ReqNormFreq && + CurObj -> ReqTransBand == ReqTransBand && + CurObj -> ReqAtten == ReqAtten && + CurObj -> ReqPhase == ReqPhase && + CurObj -> ReqGain == ReqGain ) + { + break; + } + + if( CurObj -> Next == NULL && ObjCount >= R8B_FILTER_CACHE_MAX ) + { + if( CurObj -> RefCount == 0 ) + { + // Delete the last filter which is not used. + + PrevObj -> Next = NULL; + delete CurObj; + ObjCount--; + } + else + { + // Move the last filter to the top of the list since it + // seems to be in use for a long time. + + PrevObj -> Next = NULL; + CurObj -> Next = Objects.unkeep(); + Objects = CurObj; + } + + CurObj = NULL; + break; + } + + PrevObj = CurObj; + CurObj = CurObj -> Next; + } + + if( CurObj != NULL ) + { + CurObj -> RefCount++; + + if( PrevObj == NULL ) + { + return( *CurObj ); + } + + // Remove the filter from the list temporarily. + + PrevObj -> Next = CurObj -> Next; + } + else + { + // Create a new filter object (with RefCount == 1) and build the + // filter kernel. + + CurObj = new CDSPFIRFilter(); + CurObj -> ReqNormFreq = ReqNormFreq; + CurObj -> ReqTransBand = ReqTransBand; + CurObj -> ReqAtten = ReqAtten; + CurObj -> ReqPhase = ReqPhase; + CurObj -> ReqGain = ReqGain; + ObjCount++; + + CurObj -> buildLPFilter( AttenCorrs ); + } + + // Insert the filter at the start of the list. + + CurObj -> Next = Objects.unkeep(); + Objects = CurObj; + + return( *CurObj ); + } + +private: + static CSyncObject StateSync; ///< Cache state synchronizer. + ///< + static CPtrKeeper< CDSPFIRFilter* > Objects; ///< The chain of cached + ///< objects. + ///< + static int ObjCount; ///< The number of objects currently preset in the + ///< cache. + ///< +}; + +// --------------------------------------------------------------------------- +// CDSPFIRFilter PUBLIC +// --------------------------------------------------------------------------- + +inline void CDSPFIRFilter :: unref() +{ + R8BSYNC( CDSPFIRFilterCache :: StateSync ); + + RefCount--; +} + +// --------------------------------------------------------------------------- + +} // namespace r8b + +#endif // R8B_CDSPFIRFILTER_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h new file mode 100644 index 00000000..9fb770c1 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPFracInterpolator.h @@ -0,0 +1,1180 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPFracInterpolator.h + * + * @brief Fractional delay interpolator and filter bank classes. + * + * This file includes fractional delay interpolator class. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPFRACINTERPOLATOR_INCLUDED +#define R8B_CDSPFRACINTERPOLATOR_INCLUDED + +#include "CDSPSincFilterGen.h" +#include "CDSPProcessor.h" + +namespace r8b { + +#if R8B_FLTTEST + extern int InterpFilterFracs; ///< Force this number of fractional filter + ///< positions. -1 - use default. + ///< +#endif // R8B_FLTTEST + +/** + * @brief Sinc function-based fractional delay filter bank class. + * + * Class implements storage and initialization of a bank of sinc-based + * fractional delay filters, expressed as 0th, 1st, 2nd or 3rd order + * polynomial interpolation coefficients. The filters are windowed by the + * "Kaiser" power-raised window function. + */ + +class CDSPFracDelayFilterBank : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPFracDelayFilterBank ); + + friend class CDSPFracDelayFilterBankCache; + +public: + /** + * Constructor. + * + * @param aFilterFracs The number of fractional delay positions to sample, + * -1 - use default. + * @param aElementSize The size of each filter's tap, in "double" values. + * This parameter corresponds to the complexity of interpolation. 4 should + * be set for 3rd order, 3 for 2nd order, 2 for linear interpolation, 1 + * for whole-numbered stepping. + * @param aInterpPoints The number of points the interpolation is based + * on. This value should not be confused with the ElementSize. Set to 2 + * for linear or no interpolation. + * @param aReqAtten Required filter attentuation. + * @param aIsThird "True" if one-third filter is required. + */ + + CDSPFracDelayFilterBank( const int aFilterFracs, const int aElementSize, + const int aInterpPoints, const double aReqAtten, const bool aIsThird ) + : InitFilterFracs( aFilterFracs ) + , ElementSize( aElementSize ) + , InterpPoints( aInterpPoints ) + , ReqAtten( aReqAtten ) + , IsThird( aIsThird ) + , Next( NULL ) + , RefCount( 1 ) + { + R8BASSERT( ElementSize >= 1 && ElementSize <= 4 ); + + // Kaiser window function Params, for half and third-band. + + const double* const Params = getWinParams( ReqAtten, IsThird, + FilterLen ); + + FilterSize = FilterLen * ElementSize; + + if( InitFilterFracs == -1 ) + { + FilterFracs = (int) ceil( pow( 6.4, ReqAtten / 50.0 )); + + #if R8B_FLTTEST + + if( InterpFilterFracs != -1 ) + { + FilterFracs = InterpFilterFracs; + } + + #endif // R8B_FLTTEST + } + else + { + FilterFracs = InitFilterFracs; + } + + Table.alloc( FilterSize * ( FilterFracs + InterpPoints )); + + CDSPSincFilterGen sinc; + sinc.Len2 = FilterLen / 2; + + double* p = Table; + const int pc2 = InterpPoints / 2; + int i; + + for( i = -pc2 + 1; i <= FilterFracs + pc2; i++ ) + { + sinc.FracDelay = (double) ( FilterFracs - i ) / FilterFracs; + sinc.initFrac( CDSPSincFilterGen :: wftKaiser, Params, true ); + sinc.generateFrac( p, &CDSPSincFilterGen :: calcWindowKaiser, + ElementSize ); + + normalizeFIRFilter( p, FilterLen, 1.0, ElementSize ); + p += FilterSize; + } + + const int TablePos2 = FilterSize; + const int TablePos3 = FilterSize * 2; + const int TablePos4 = FilterSize * 3; + const int TablePos5 = FilterSize * 4; + const int TablePos6 = FilterSize * 5; + const int TablePos7 = FilterSize * 6; + const int TablePos8 = FilterSize * 7; + double* const TableEnd = Table + ( FilterFracs + 1 ) * FilterSize; + p = Table; + + if( InterpPoints == 8 ) + { + if( ElementSize == 3 ) + { + // Calculate 2nd order spline (polynomial) interpolation + // coefficients using 8 points. + + while( p < TableEnd ) + { + calcSpline2p8Coeffs( p, p[ 0 ], p[ TablePos2 ], + p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ], + p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]); + + p += ElementSize; + } + + #if defined( R8B_SIMD_ISH ) + shuffle2_3( Table, TableEnd ); + #endif // SIMD + } + else + if( ElementSize == 4 ) + { + // Calculate 3rd order spline (polynomial) interpolation + // coefficients using 8 points. + + while( p < TableEnd ) + { + calcSpline3p8Coeffs( p, p[ 0 ], p[ TablePos2 ], + p[ TablePos3 ], p[ TablePos4 ], p[ TablePos5 ], + p[ TablePos6 ], p[ TablePos7 ], p[ TablePos8 ]); + + p += ElementSize; + } + + #if defined( R8B_SIMD_ISH ) + shuffle2_4( Table, TableEnd ); + #endif // SIMD + } + } + else + { + if( ElementSize == 2 ) + { + // Calculate linear interpolation coefficients. + + while( p < TableEnd ) + { + p[ 1 ] = p[ TablePos2 ] - p[ 0 ]; + p += ElementSize; + } + + #if defined( R8B_SIMD_ISH ) + shuffle2_2( Table, TableEnd ); + #endif // SIMD + } + } + + R8BCONSOLE( "CDSPFracDelayFilterBank: fracs=%i order=%i taps=%i " + "att=%.1f third=%i\n", FilterFracs, ElementSize - 1, FilterLen, + ReqAtten, (int) IsThird ); + } + + ~CDSPFracDelayFilterBank() + { + delete Next; + } + + /** + * Function "rounds" the specified attenuation to the nearest effective + * value. + * + * @param[in,out] att Required filter attentuation. Will be rounded to the + * nearest value. + * @param aIsThird "True" if one-third filter is required. + */ + + static void roundReqAtten( double& att, const bool aIsThird ) + { + int tmp; + getWinParams( att, aIsThird, tmp ); + } + + /** + * Function returns the length of the filter. Always an even number, not + * less than 6. + */ + + int getFilterLen() const + { + return( FilterLen ); + } + + /** + * Function returns the number of fractional positions sampled by the + * bank. + */ + + int getFilterFracs() const + { + return( FilterFracs ); + } + + /** + * @param i Filter index, in the range 0 to FilterFracs, inclusive. + * @return Reference to the filter. + */ + + const double& operator []( const int i ) const + { + R8BASSERT( i >= 0 && i <= FilterFracs ); + + return( Table[ i * FilterSize ]); + } + + /** + * This function should be called when the filter bank obtained via the + * filter bank cache is no longer needed. + */ + + void unref(); + +private: + int FilterLen; ///< Filter length. Always an even number, not less than 6. + ///< + int FilterFracs; ///< Fractional position count. + ///< + int InitFilterFracs; ///< Fractional position count as supplied to the + ///< constructor, may equal -1. + ///< + int ElementSize; ///< Filter element size. + ///< + int InterpPoints; ///< Interpolation points to use. + ///< + double ReqAtten; ///< Filter's attentuation. + ///< + bool IsThird; ///< "True" if one-third filter is in use. + ///< + int FilterSize; ///< This constant specifies the "size" of a single filter + ///< in "double" elements. + ///< + CFixedBuffer< double > Table; ///< The table of fractional delay filters + ///< for all discrete fractional x = 0..1 sample positions, and + ///< interpolation coefficients. + ///< + CDSPFracDelayFilterBank* Next; ///< Next filter bank in cache's list. + ///< + int RefCount; ///< The number of references made to *this filter bank. + ///< Not considered for "static" filter bank objects. + ///< + + /** + * Function returns windowing function parameters for the specified + * attenuation and filter type. + * + * @param[in,out] att Required filter attentuation. Will be rounded to the + * nearest value. + * @param aIsThird "True" if one-third filter is required. + * @param[out] fltlen Resulting filter length. + */ + + static const double* getWinParams( double& att, const bool aIsThird, + int& fltlen ) + { + static const int Coeffs2Base = 8; + static const int Coeffs2Count = 12; + static const double Coeffs2[ Coeffs2Count ][ 3 ] = { + { 4.1308468534586913, 1.1752580009977263, 55.5446 }, // 0.0256 + { 4.4241520324148826, 1.8004881791443044, 81.4191 }, // 0.0886 + { 5.2615232289173663, 1.8133318236025469, 96.3392 }, // 0.0481 + { 5.9433751227216174, 1.8730186391986436, 111.1315 }, // 0.0264 + { 6.8308658290513815, 1.8549555110340281, 125.4653 }, // 0.0146 + { 7.6648458290312904, 1.8565766090828464, 139.7379 }, // 0.0081 + { 8.2038728664307605, 1.9269521820570166, 154.0532 }, // 0.0045 + { 8.7865150946655142, 1.9775307667441668, 168.2101 }, // 0.0025 + { 9.5945017884101773, 1.9718456992078597, 182.1076 }, // 0.0014 + { 10.5163141145985240, 1.9504067820201083, 195.5668 }, // 0.0008 + { 10.2382465206362470, 2.1608923446870087, 209.0610 }, // 0.0004 + { 10.9976060250714000, 2.1536533525688935, 222.5010 }, // 0.0003 + }; + + static const int Coeffs3Base = 6; + static const int Coeffs3Count = 10; + static const double Coeffs3[ Coeffs3Count ][ 3 ] = { + { 3.9888564562781847, 1.5869927184268915, 66.5701 }, // 0.0467 + { 4.6986694038145007, 1.8086068597928262, 86.4715 }, // 0.0136 + { 5.5995071329337822, 1.8930163360942349, 106.1195 }, // 0.0040 + { 6.3627287800257228, 1.9945748322093975, 125.2307 }, // 0.0012 + { 7.4299550711428308, 1.9893400572347544, 144.3469 }, // 0.0004 + { 8.0667715944075642, 2.0928201458699909, 163.4099 }, // 0.0001 + { 8.7469970226288822, 2.1640279784268355, 181.0694 }, // 0.0000 + { 10.0823430069835230, 2.0896678025321922, 199.2880 }, // 0.0000 + { 10.9222206090489510, 2.1221681162186004, 216.6865 }, // 0.0000 + { 21.2017743894772010, 1.1856768080118900, 233.9188 }, // 0.0000 + }; + + const double* Params; + int i = 0; + + if( aIsThird ) + { + while( i != Coeffs3Count - 1 && Coeffs3[ i ][ 2 ] < att ) + { + i++; + } + + Params = &Coeffs3[ i ][ 0 ]; + att = Coeffs3[ i ][ 2 ]; + fltlen = Coeffs3Base + i * 2; + } + else + { + while( i != Coeffs2Count - 1 && Coeffs2[ i ][ 2 ] < att ) + { + i++; + } + + Params = &Coeffs2[ i ][ 0 ]; + att = Coeffs2[ i ][ 2 ]; + fltlen = Coeffs2Base + i * 2; + } + + return( Params ); + } + + /** + * Function shuffles 2 order-2 filter points for SIMD operation. + * + * @param p Filter table start pointer. + * @param pe Filter table end pointer. + */ + + static void shuffle2_2( double* p, double* const pe ) + { + while( p != pe ) + { + const double t = p[ 2 ]; + p[ 2 ] = p[ 1 ]; + p[ 1 ] = t; + + p += 4; + } + } + + /** + * Function shuffles 2 order-3 filter points for SIMD operation. + * + * @param p Filter table start pointer. + * @param pe Filter table end pointer. + */ + + static void shuffle2_3( double* p, double* const pe ) + { + while( p != pe ) + { + const double t1 = p[ 1 ]; + const double t2 = p[ 2 ]; + const double t3 = p[ 3 ]; + const double t4 = p[ 4 ]; + p[ 1 ] = t3; + p[ 2 ] = t1; + p[ 3 ] = t4; + p[ 4 ] = t2; + + p += 6; + } + } + + /** + * Function shuffles 2 order-4 filter points for SIMD operation. + * + * @param p Filter table start pointer. + * @param pe Filter table end pointer. + */ + + static void shuffle2_4( double* p, double* const pe ) + { + while( p != pe ) + { + const double t1 = p[ 1 ]; + const double t2 = p[ 2 ]; + const double t3 = p[ 3 ]; + const double t4 = p[ 4 ]; + const double t5 = p[ 5 ]; + const double t6 = p[ 6 ]; + p[ 1 ] = t4; + p[ 2 ] = t1; + p[ 3 ] = t5; + p[ 4 ] = t2; + p[ 5 ] = t6; + p[ 6 ] = t3; + + p += 8; + } + } +}; + +/** + * @brief Fractional delay filter cache class. + * + * Class implements cache storage of fractional delay filter banks. + */ + +class CDSPFracDelayFilterBankCache : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPFracDelayFilterBankCache ); + + friend class CDSPFracDelayFilterBank; + +public: + /** + * @return The number of filters present in the cache now. This value can + * be monitored for debugging "forgotten" filters. + */ + + static int getObjCount() + { + R8BSYNC( StateSync ); + + return( ObjCount ); + } + + /** + * Function calculates or returns reference to a previously calculated + * (cached) fractional delay filter bank. + * + * @param aFilterFracs The number of fractional delay positions to sample, + * -1 - use default. + * @param aElementSize The size of each filter's tap, in "double" values. + * @param aInterpPoints The number of points the interpolation is based + * on. + * @param ReqAtten Required filter attentuation. + * @param IsThird "True" if one-third filter is required. + * @param IsStatic "True" if a permanent static filter should be returned + * that is never removed from the cache until application terminates. + */ + + static CDSPFracDelayFilterBank& getFilterBank( const int aFilterFracs, + const int aElementSize, const int aInterpPoints, + double ReqAtten, const bool IsThird, const bool IsStatic ) + { + CDSPFracDelayFilterBank :: roundReqAtten( ReqAtten, IsThird ); + + R8BSYNC( StateSync ); + + if( IsStatic ) + { + CDSPFracDelayFilterBank* CurObj = StaticObjects; + + while( CurObj != NULL ) + { + if( CurObj -> InitFilterFracs == aFilterFracs && + CurObj -> ElementSize == aElementSize && + CurObj -> InterpPoints == aInterpPoints && + CurObj -> ReqAtten == ReqAtten && + CurObj -> IsThird == IsThird ) + { + return( *CurObj ); + } + + CurObj = CurObj -> Next; + } + + // Create a new filter bank and build it. + + CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize, + aInterpPoints, ReqAtten, IsThird ); + + // Insert the bank at the start of the list. + + CurObj -> Next = StaticObjects.unkeep(); + StaticObjects = CurObj; + + return( *CurObj ); + } + + CDSPFracDelayFilterBank* PrevObj = NULL; + CDSPFracDelayFilterBank* CurObj = Objects; + + while( CurObj != NULL ) + { + if( CurObj -> InitFilterFracs == aFilterFracs && + CurObj -> ElementSize == aElementSize && + CurObj -> InterpPoints == aInterpPoints && + CurObj -> ReqAtten == ReqAtten && + CurObj -> IsThird == IsThird ) + { + break; + } + + if( CurObj -> Next == NULL && ObjCount >= R8B_FRACBANK_CACHE_MAX ) + { + if( CurObj -> RefCount == 0 ) + { + // Delete the last bank which is not used. + + PrevObj -> Next = NULL; + delete CurObj; + ObjCount--; + } + else + { + // Move the last bank to the top of the list since it + // seems to be in use for a long time. + + PrevObj -> Next = NULL; + CurObj -> Next = Objects.unkeep(); + Objects = CurObj; + } + + CurObj = NULL; + break; + } + + PrevObj = CurObj; + CurObj = CurObj -> Next; + } + + if( CurObj != NULL ) + { + CurObj -> RefCount++; + + if( PrevObj == NULL ) + { + return( *CurObj ); + } + + // Remove the bank from the list temporarily. + + PrevObj -> Next = CurObj -> Next; + } + else + { + // Create a new filter bank (with RefCount == 1) and build it. + + CurObj = new CDSPFracDelayFilterBank( aFilterFracs, aElementSize, + aInterpPoints, ReqAtten, IsThird ); + + ObjCount++; + } + + // Insert the bank at the start of the list. + + CurObj -> Next = Objects.unkeep(); + Objects = CurObj; + + return( *CurObj ); + } + +private: + static CSyncObject StateSync; ///< Cache state synchronizer. + ///< + static CPtrKeeper< CDSPFracDelayFilterBank* > Objects; ///< The chain of + ///< cached objects. + ///< + static CPtrKeeper< CDSPFracDelayFilterBank* > StaticObjects; ///< The + ///< chain of static objects. + ///< + static int ObjCount; ///< The number of objects currently preset in the + ///< Objects cache. + ///< +}; + +// --------------------------------------------------------------------------- +// CDSPFracDelayFilterBank PUBLIC +// --------------------------------------------------------------------------- + +inline void CDSPFracDelayFilterBank :: unref() +{ + R8BSYNC( CDSPFracDelayFilterBankCache :: StateSync ); + + RefCount--; +} + +/** + * @param l Number 1. + * @param s Number 2. + * @param[out] GCD Resulting GCD. + * @return "True" if the greatest common denominator of 2 numbers was + * found. + */ + +inline bool findGCD( double l, double s, double& GCD ) +{ + int it = 0; + + while( it < 50 ) + { + if( s <= 0.0 ) + { + GCD = l; + return( true ); + } + + const double r = l - s; + l = s; + s = ( r < 0.0 ? -r : r ); + it++; + } + + return( false ); +} + +/** + * Function evaluates source and destination sample rate ratio and returns + * the required input and output stepping. Function returns "false" if + * whole stepping cannot be used to perform interpolation using these sample + * rates. + * + * @param SSampleRate Source sample rate. + * @param DSampleRate Destination sample rate. + * @param[out] ResInStep Resulting input step. + * @param[out] ResOutStep Resulting output step. + * @return "True" if stepping was acquired. + */ + +inline bool getWholeStepping( const double SSampleRate, + const double DSampleRate, int& ResInStep, int& ResOutStep ) +{ + double GCD; + + if( !findGCD( SSampleRate, DSampleRate, GCD ) || GCD < 1.0 ) + { + return( false ); + } + + const double InStep0 = SSampleRate / GCD; + ResInStep = (int) InStep0; + const double OutStep0 = DSampleRate / GCD; + ResOutStep = (int) OutStep0; + + if( InStep0 != ResInStep || OutStep0 != ResOutStep ) + { + return( false ); + } + + if( ResOutStep > 1500 ) + { + // Do not allow large output stepping due to low cache + // performance of large filter banks. + + return( false ); + } + + return( true ); +} + +/** + * @brief Fractional delay filter bank-based interpolator class. + * + * Class implements the fractional delay interpolator. This implementation at + * first puts the input signal into a ring buffer and then performs + * interpolation. The interpolation is performed using sinc-based fractional + * delay filters. These filters are contained in a bank, and for higher + * precision they are interpolated between adjacent filters. + * + * To increase sample timing precision, this class uses "resettable counter" + * approach. This gives zero overall sample timing error. With the + * R8B_FASTTIMING configuration option enabled, the sample timing experiences + * a very minor drift. + */ + +class CDSPFracInterpolator : public CDSPProcessor +{ +public: + /** + * Constructor initalizes the interpolator. It is important to call the + * getMaxOutLen() function afterwards to obtain the optimal output buffer + * length. + * + * @param aSrcSampleRate Source sample rate. + * @param aDstSampleRate Destination sample rate. + * @param ReqAtten Required filter attentuation. + * @param IsThird "True" if one-third filter is required. + * @param PrevLatency Latency, in samples (any value >=0), which was left + * in the output signal by a previous process. This latency will be + * consumed completely. + */ + + CDSPFracInterpolator( const double aSrcSampleRate, + const double aDstSampleRate, const double ReqAtten, + const bool IsThird, const double PrevLatency ) + : SrcSampleRate( aSrcSampleRate ) + , DstSampleRate( aDstSampleRate ) + #if R8B_FASTTIMING + , FracStep( aSrcSampleRate / aDstSampleRate ) + #endif // R8B_FASTTIMING + { + R8BASSERT( SrcSampleRate > 0.0 ); + R8BASSERT( DstSampleRate > 0.0 ); + R8BASSERT( PrevLatency >= 0.0 ); + R8BASSERT( BufLenBits >= 5 ); + + InitFracPos = PrevLatency; + Latency = (int) InitFracPos; + InitFracPos -= Latency; + + R8BASSERT( Latency >= 0 ); + + #if R8B_FLTTEST + + IsWhole = false; + LatencyFrac = 0.0; + FilterBank = new CDSPFracDelayFilterBank( -1, 3, 8, ReqAtten, + IsThird ); + + #else // R8B_FLTTEST + + IsWhole = getWholeStepping( SrcSampleRate, DstSampleRate, InStep, + OutStep ); + + if( IsWhole ) + { + InitFracPosW = (int) ( InitFracPos * OutStep ); + LatencyFrac = InitFracPos - (double) InitFracPosW / OutStep; + FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank( + OutStep, 1, 2, ReqAtten, IsThird, false ); + } + else + { + LatencyFrac = 0.0; + FilterBank = &CDSPFracDelayFilterBankCache :: getFilterBank( + -1, 3, 8, ReqAtten, IsThird, true ); + } + + #endif // R8B_FLTTEST + + FilterLen = FilterBank -> getFilterLen(); + fl2 = FilterLen >> 1; + fll = fl2 - 1; + flo = fll + fl2; + flb = BufLen - fll; + + R8BASSERT(( 1 << BufLenBits ) >= FilterLen * 3 ); + + static const CConvolveFn FltConvFn0[ 13 ] = { + &CDSPFracInterpolator :: convolve0< 6 >, + &CDSPFracInterpolator :: convolve0< 8 >, + &CDSPFracInterpolator :: convolve0< 10 >, + &CDSPFracInterpolator :: convolve0< 12 >, + &CDSPFracInterpolator :: convolve0< 14 >, + &CDSPFracInterpolator :: convolve0< 16 >, + &CDSPFracInterpolator :: convolve0< 18 >, + &CDSPFracInterpolator :: convolve0< 20 >, + &CDSPFracInterpolator :: convolve0< 22 >, + &CDSPFracInterpolator :: convolve0< 24 >, + &CDSPFracInterpolator :: convolve0< 26 >, + &CDSPFracInterpolator :: convolve0< 28 >, + &CDSPFracInterpolator :: convolve0< 30 > + }; + + convfn = ( IsWhole ? FltConvFn0[ fl2 - 3 ] : + &CDSPFracInterpolator :: convolve2 ); + + R8BCONSOLE( "CDSPFracInterpolator: src=%.2f dst=%.2f taps=%i " + "fracs=%i whole=%i third=%i step=%.6f\n", SrcSampleRate, + DstSampleRate, FilterLen, ( IsWhole ? OutStep : + FilterBank -> getFilterFracs() ), (int) IsWhole, (int) IsThird, + aSrcSampleRate / aDstSampleRate ); + + clear(); + } + + virtual ~CDSPFracInterpolator() + { + #if R8B_FLTTEST + + delete FilterBank; + + #else // R8B_FLTTEST + + FilterBank -> unref(); + + #endif // R8B_FLTTEST + } + + virtual int getLatency() const + { + return( 0 ); + } + + virtual double getLatencyFrac() const + { + return( LatencyFrac ); + } + + virtual int getMaxOutLen( const int MaxInLen ) const + { + R8BASSERT( MaxInLen >= 0 ); + + return( (int) ceil( MaxInLen * DstSampleRate / SrcSampleRate ) + 1 ); + } + + virtual void clear() + { + LatencyLeft = Latency; + BufLeft = 0; + WritePos = 0; + ReadPos = flb; // Set "read" position to account for filter's + // latency at zero fractional delay. + + memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ])); + + if( IsWhole ) + { + InPosFracW = InitFracPosW; + } + else + { + InPosFrac = InitFracPos; + + #if !R8B_FASTTIMING + InCounter = 0; + InPosInt = 0; + InPosShift = InitFracPos * DstSampleRate / SrcSampleRate; + #endif // !R8B_FASTTIMING + } + } + + virtual int process( double* ip, int l, double*& op0 ) + { + R8BASSERT( l >= 0 ); + R8BASSERT( ip != op0 || l == 0 || SrcSampleRate > DstSampleRate ); + + if( LatencyLeft != 0 ) + { + if( LatencyLeft >= l ) + { + LatencyLeft -= l; + return( 0 ); + } + + l -= LatencyLeft; + ip += LatencyLeft; + LatencyLeft = 0; + } + + double* op = op0; + + while( l > 0 ) + { + // Add new input samples to both halves of the ring buffer. + + const int b = min( min( l, BufLen - WritePos ), flb - BufLeft ); + + double* const wp1 = Buf + WritePos; + memcpy( wp1, ip, b * sizeof( wp1[ 0 ])); + + if( WritePos < flo ) + { + const int c = min( b, flo - WritePos ); + memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ])); + } + + ip += b; + WritePos = ( WritePos + b ) & BufLenMask; + l -= b; + BufLeft += b; + + // Produce as many output samples as possible. + + op = ( *this.*convfn )( op ); + } + + #if !R8B_FASTTIMING + + if( !IsWhole && InCounter > 1000 ) + { + // Reset the interpolation position counter to achieve a + // higher sample timing precision. + + InCounter = 0; + InPosInt = 0; + InPosShift = InPosFrac * DstSampleRate / SrcSampleRate; + } + + #endif // !R8B_FASTTIMING + + return( (int) ( op - op0 )); + } + +private: + static const int BufLenBits = 8; ///< The length of the ring buffer, + ///< expressed as Nth power of 2. This value can be reduced if it is + ///< known that only short input buffers will be passed to the + ///< interpolator. The minimum value of this parameter is 5, and + ///< 1 << BufLenBits should be at least 3 times larger than the + ///< FilterLen. However, this condition can be easily met if the input + ///< signal is suitably downsampled first before the interpolation is + ///< performed. + ///< + static const int BufLen = 1 << BufLenBits; ///< The length of the ring + ///< buffer. The actual length is twice as long to allow "beyond max + ///< position" positioning. + ///< + static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer + ///< position wrapping. + ///< + int FilterLen; ///< Filter length, in taps. Even value. + ///< + int fl2; ///< Right-side (half) filter length. + ///< + int fll; ///< Input latency. + ///< + int flo; ///< Overrun length. + ///< + int flb; ///< Initial read position and maximal buffer write length. + ///< + double Buf[ BufLen + 29 ]; ///< The ring buffer, including overrun + ///< protection for maximal filter length. + ///< + double SrcSampleRate; ///< Source sample rate. + ///< + double DstSampleRate; ///< Destination sample rate. + ///< + bool IsWhole; ///< "True" if whole-number stepping is in use. + ///< + int InStep; ///< Input whole-number stepping. + ///< + int OutStep; ///< Output whole-number stepping (corresponds to filter bank + ///< size). + ///< + double InitFracPos; ///< Initial fractional position, in samples, in the + ///< range [0; 1). + ///< + int InitFracPosW; ///< Initial fractional position for whole-number + ///< stepping. + ///< + int Latency; ///< Initial latency that should be removed from the input. + ///< + double LatencyFrac; ///< Left-over fractional latency. + ///< + int BufLeft; ///< The number of samples left in the buffer to process. + ///< + int WritePos; ///< The current buffer write position. Incremented together + ///< with the BufLeft variable. + ///< + int ReadPos; ///< The current buffer read position. + ///< + int LatencyLeft; ///< Input latency left to remove. + ///< + double InPosFrac; ///< Interpolation position (fractional part). + ///< + int InPosFracW; ///< Interpolation position (fractional part) for + ///< whole-number stepping. Corresponds to the index into the filter + ///< bank. + ///< + CDSPFracDelayFilterBank* FilterBank; ///< Filter bank in use, may be + ///< whole-number stepping filter bank or static bank. + ///< +#if R8B_FASTTIMING + double FracStep; ///< Fractional sample timing step. +#else // R8B_FASTTIMING + int InCounter; ///< Interpolation step counter. + ///< + int InPosInt; ///< Interpolation position (integer part). + ///< + double InPosShift; ///< Interpolation position fractional shift. + ///< +#endif // R8B_FASTTIMING + + typedef double*( CDSPFracInterpolator :: *CConvolveFn )( double* op ); ///< + ///< Convolution function type. + ///< + CConvolveFn convfn; ///< Convolution function in use. + ///< + + /** + * Convolution function for 0th order resampling. + * + * @param[out] op Output buffer. + * @return Advanced "op" value. + * @tparam fltlen Filter length, in taps. + */ + + template< int fltlen > + double* convolve0( double* op ) + { + while( BufLeft > fl2 ) + { + const double* const ftp = &(*FilterBank)[ InPosFracW ]; + const double* const rp = Buf + ReadPos; + int i; + + #if defined( R8B_SSE2 ) && !defined( __INTEL_COMPILER ) + + __m128d s = _mm_setzero_pd(); + + for( i = 0; i < fltlen; i += 2 ) + { + const __m128d m = _mm_mul_pd( _mm_load_pd( ftp + i ), + _mm_loadu_pd( rp + i )); + + s = _mm_add_pd( s, m ); + } + + _mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 ))); + + #elif defined( R8B_NEON ) + + float64x2_t s = vdupq_n_f64( 0.0 ); + + for( i = 0; i < fltlen; i += 2 ) + { + s = vmlaq_f64( s, vld1q_f64( ftp + i ), vld1q_f64( rp + i )); + } + + *op = vaddvq_f64( s ); + + #else // SIMD + + double s = 0.0; + + for( i = 0; i < fltlen; i++ ) + { + s += ftp[ i ] * rp[ i ]; + } + + *op = s; + + #endif // SIMD + + op++; + + InPosFracW += InStep; + const int PosIncr = InPosFracW / OutStep; + InPosFracW -= PosIncr * OutStep; + + ReadPos = ( ReadPos + PosIncr ) & BufLenMask; + BufLeft -= PosIncr; + } + + return( op ); + } + + /** + * Convolution function for 2nd order resampling. + * + * @param[out] op Output buffer. + * @return Advanced "op" value. + */ + + double* convolve2( double* op ) + { + const CDSPFracDelayFilterBank& fb = *FilterBank; + const int fltlen = FilterLen; + + while( BufLeft > fl2 ) + { + double x = InPosFrac * fb.getFilterFracs(); + const int fti = (int) x; // Function table index. + x -= fti; // Coefficient for interpolation between + // adjacent fractional delay filters. + const double x2d = x * x; + const double* ftp = &fb[ fti ]; + const double* const rp = Buf + ReadPos; + int i; + + #if defined( R8B_SSE2 ) && defined( R8B_SIMD_ISH ) + + const __m128d x1 = _mm_set1_pd( x ); + const __m128d x2 = _mm_set1_pd( x2d ); + __m128d s = _mm_setzero_pd(); + + for( i = 0; i < fltlen; i += 2 ) + { + const __m128d ftp2 = _mm_load_pd( ftp + 2 ); + const __m128d xx1 = _mm_mul_pd( ftp2, x1 ); + const __m128d ftp4 = _mm_load_pd( ftp + 4 ); + const __m128d xx2 = _mm_mul_pd( ftp4, x2 ); + const __m128d ftp0 = _mm_load_pd( ftp ); + ftp += 6; + + const __m128d rpi = _mm_loadu_pd( rp + i ); + const __m128d xxs = _mm_add_pd( ftp0, _mm_add_pd( xx1, xx2 )); + + s = _mm_add_pd( s, _mm_mul_pd( rpi, xxs )); + } + + _mm_storel_pd( op, _mm_add_pd( s, _mm_shuffle_pd( s, s, 1 ))); + + #elif defined( R8B_NEON ) && defined( R8B_SIMD_ISH ) + + const float64x2_t x1 = vdupq_n_f64( x ); + const float64x2_t x2 = vdupq_n_f64( x2d ); + float64x2_t s = vdupq_n_f64( 0.0 ); + + for( i = 0; i < fltlen; i += 2 ) + { + const float64x2_t ftp2 = vld1q_f64( ftp + 2 ); + const float64x2_t xx1 = vmulq_f64( ftp2, x1 ); + const float64x2_t ftp4 = vld1q_f64( ftp + 4 ); + const float64x2_t xx2 = vmulq_f64( ftp4, x2 ); + const float64x2_t ftp0 = vld1q_f64( ftp ); + ftp += 6; + + const float64x2_t rpi = vld1q_f64( rp + i ); + const float64x2_t xxs = vaddq_f64( ftp0, + vaddq_f64( xx1, xx2 )); + + s = vmlaq_f64( s, rpi, xxs ); + } + + *op = vaddvq_f64( s ); + + #else // SIMD + + double s = 0.0; + + for( i = 0; i < fltlen; i++ ) + { + s += ( ftp[ 0 ] + ftp[ 1 ] * x + ftp[ 2 ] * x2d ) * rp[ i ]; + ftp += 3; + } + + *op = s; + + #endif // SIMD + + op++; + + #if R8B_FASTTIMING + + InPosFrac += FracStep; + const int PosIncr = (int) InPosFrac; + InPosFrac -= PosIncr; + + #else // R8B_FASTTIMING + + InCounter++; + const double NextInPos = ( InCounter + InPosShift ) * + SrcSampleRate / DstSampleRate; + + const int NextInPosInt = (int) NextInPos; + const int PosIncr = NextInPosInt - InPosInt; + InPosInt = NextInPosInt; + InPosFrac = NextInPos - NextInPosInt; + + #endif // R8B_FASTTIMING + + ReadPos = ( ReadPos + PosIncr ) & BufLenMask; + BufLeft -= PosIncr; + } + + return( op ); + } +}; + +// --------------------------------------------------------------------------- + +} // namespace r8b + +#endif // R8B_CDSPFRACINTERPOLATOR_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h new file mode 100644 index 00000000..2a10f421 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBDownsampler.h @@ -0,0 +1,401 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPHBDownsampler.h + * + * @brief Half-band downsampling convolver class. + * + * This file includes half-band downsampling convolver class. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPHBDOWNSAMPLER_INCLUDED +#define R8B_CDSPHBDOWNSAMPLER_INCLUDED + +#include "CDSPHBUpsampler.h" + +namespace r8b { + +/** + * @brief Half-band downsampler class. + * + * Class implements brute-force half-band 2X downsampling that uses small + * sparse symmetric FIR filters. The output has 2.0 gain. + */ + +class CDSPHBDownsampler : public CDSPProcessor +{ +public: + /** + * Constructor initalizes the half-band downsampler. + * + * @param ReqAtten Required half-band filter attentuation. + * @param SteepIndex Steepness index - 0=steepest. Corresponds to general + * downsampling ratio, e.g. at 4x downsampling 0 is used, at 8x + * downsampling 1 is used, etc. + * @param IsThird "True" if 1/3 resampling is performed. + * @param PrevLatency Latency, in samples (any value >=0), which was left + * in the output signal by a previous process. Whole-number latency will + * be consumed by *this object while remaining fractional latency can be + * obtained via the getLatencyFrac() function. + */ + + CDSPHBDownsampler( const double ReqAtten, const int SteepIndex, + const bool IsThird, const double PrevLatency ) + { + static const CConvolveFn FltConvFn[ 14 ] = { + &CDSPHBDownsampler :: convolve1, &CDSPHBDownsampler :: convolve2, + &CDSPHBDownsampler :: convolve3, &CDSPHBDownsampler :: convolve4, + &CDSPHBDownsampler :: convolve5, &CDSPHBDownsampler :: convolve6, + &CDSPHBDownsampler :: convolve7, &CDSPHBDownsampler :: convolve8, + &CDSPHBDownsampler :: convolve9, &CDSPHBDownsampler :: convolve10, + &CDSPHBDownsampler :: convolve11, &CDSPHBDownsampler :: convolve12, + &CDSPHBDownsampler :: convolve13, + &CDSPHBDownsampler :: convolve14 }; + + int fltt; + double att; + + if( IsThird ) + { + CDSPHBUpsampler :: getHBFilterThird( ReqAtten, SteepIndex, fltp, + fltt, att ); + } + else + { + CDSPHBUpsampler :: getHBFilter( ReqAtten, SteepIndex, fltp, fltt, + att ); + } + + convfn = FltConvFn[ fltt - 1 ]; + fll = fltt * 2 - 1; + fl2 = fll; + flo = fll + fl2; + flb = BufLen - fll; + BufRP = Buf + fll; + + LatencyFrac = PrevLatency * 0.5; + Latency = (int) LatencyFrac; + LatencyFrac -= Latency; + + R8BASSERT( Latency >= 0 ); + + R8BCONSOLE( "CDSPHBDownsampler: taps=%i third=%i att=%.1f io=1/2\n", + fltt, (int) IsThird, att ); + + clear(); + } + + virtual int getLatency() const + { + return( 0 ); + } + + virtual double getLatencyFrac() const + { + return( LatencyFrac ); + } + + virtual int getMaxOutLen( const int MaxInLen ) const + { + R8BASSERT( MaxInLen >= 0 ); + + return(( MaxInLen + 1 ) >> 1 ); + } + + virtual void clear() + { + LatencyLeft = Latency; + BufLeft = 0; + WritePos = 0; + ReadPos = flb; // Set "read" position to account for filter's latency. + + memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ])); + } + + virtual int process( double* ip, int l, double*& op0 ) + { + R8BASSERT( l >= 0 ); + + double* op = op0; + + while( l > 0 ) + { + // Add new input samples to both halves of the ring buffer. + + const int b = min( min( l, BufLen - WritePos ), flb - BufLeft ); + + double* const wp1 = Buf + WritePos; + memcpy( wp1, ip, b * sizeof( wp1[ 0 ])); + + if( WritePos < flo ) + { + const int c = min( b, flo - WritePos ); + memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ])); + } + + ip += b; + WritePos = ( WritePos + b ) & BufLenMask; + l -= b; + BufLeft += b; + + // Produce output. + + if( BufLeft > fl2 ) + { + const int c = ( BufLeft - fl2 + 1 ) >> 1; + + double* const opend = op + c; + ( *convfn )( op, opend, fltp, BufRP, ReadPos ); + + op = opend; + const int c2 = c + c; + ReadPos = ( ReadPos + c2 ) & BufLenMask; + BufLeft -= c2; + } + } + + int ol = (int) ( op - op0 ); + + if( LatencyLeft != 0 ) + { + if( LatencyLeft >= ol ) + { + LatencyLeft -= ol; + return( 0 ); + } + + ol -= LatencyLeft; + op0 += LatencyLeft; + LatencyLeft = 0; + } + + return( ol ); + } + +private: + static const int BufLenBits = 10; ///< The length of the ring buffer, + ///< expressed as Nth power of 2. This value can be reduced if it is + ///< known that only short input buffers will be passed to the + ///< interpolator. The minimum value of this parameter is 5, and + ///< 1 << BufLenBits should be at least 3 times larger than the + ///< FilterLen. + ///< + static const int BufLen = 1 << BufLenBits; ///< The length of the ring + ///< buffer. The actual length is twice as long to allow "beyond max + ///< position" positioning. + ///< + static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer + ///< position wrapping. + ///< + double Buf[ BufLen + 54 ]; ///< The ring buffer, including overrun + ///< protection for the largest filter. + ///< + const double* fltp; ///< Half-band filter taps. + ///< + int fll; ///< Input latency. + ///< + int fl2; ///< Right-side filter length. + ///< + int flo; ///< Overrun length. + ///< + int flb; ///< Initial read position and maximal buffer write length. + ///< + const double* BufRP; ///< Offseted Buf pointer at ReadPos=0. + ///< + int Latency; ///< Initial latency that should be removed from the output. + ///< + double LatencyFrac; ///< Fractional latency left on the output. + ///< + int BufLeft; ///< The number of samples left in the buffer to process. + ///< When this value is below FilterLenD2Plus1, the interpolation + ///< cycle ends. + ///< + int WritePos; ///< The current buffer write position. Incremented together + ///< with the BufLeft variable. + ///< + int ReadPos; ///< The current buffer read position. + ///< + int LatencyLeft; ///< Latency left to remove. + ///< + typedef void( *CConvolveFn )( double* op, double* const opend, + const double* const flt, const double* const rp0, int rpos ); ///< + ///< Convolution function type. + ///< + CConvolveFn convfn; ///< Convolution function in use. + ///< + +#define R8BHBC1( fn ) \ + static void fn( double* op, double* const opend, const double* const flt, \ + const double* const rp0, int rpos ) \ + { \ + while( op != opend ) \ + { \ + const double* const rp = rp0 + rpos; \ + *op = rp[ 0 ] + + +#define R8BHBC2 \ + rpos = ( rpos + 2 ) & BufLenMask; \ + op++; \ + } \ + } + + R8BHBC1( convolve1 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]); + R8BHBC2 + + R8BHBC1( convolve2 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]); + R8BHBC2 + + R8BHBC1( convolve3 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]); + R8BHBC2 + + R8BHBC1( convolve4 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]); + R8BHBC2 + + R8BHBC1( convolve5 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]); + R8BHBC2 + + R8BHBC1( convolve6 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]); + R8BHBC2 + + R8BHBC1( convolve7 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]); + R8BHBC2 + + R8BHBC1( convolve8 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]); + R8BHBC2 + + R8BHBC1( convolve9 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) + + flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]); + R8BHBC2 + + R8BHBC1( convolve10 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) + + flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) + + flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]); + R8BHBC2 + + R8BHBC1( convolve11 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) + + flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) + + flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) + + flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]); + R8BHBC2 + + R8BHBC1( convolve12 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) + + flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) + + flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) + + flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) + + flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]); + R8BHBC2 + + R8BHBC1( convolve13 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) + + flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) + + flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) + + flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) + + flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) + + flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]); + R8BHBC2 + + R8BHBC1( convolve14 ) + flt[ 0 ] * ( rp[ 1 ] + rp[ -1 ]) + + flt[ 1 ] * ( rp[ 3 ] + rp[ -3 ]) + + flt[ 2 ] * ( rp[ 5 ] + rp[ -5 ]) + + flt[ 3 ] * ( rp[ 7 ] + rp[ -7 ]) + + flt[ 4 ] * ( rp[ 9 ] + rp[ -9 ]) + + flt[ 5 ] * ( rp[ 11 ] + rp[ -11 ]) + + flt[ 6 ] * ( rp[ 13 ] + rp[ -13 ]) + + flt[ 7 ] * ( rp[ 15 ] + rp[ -15 ]) + + flt[ 8 ] * ( rp[ 17 ] + rp[ -17 ]) + + flt[ 9 ] * ( rp[ 19 ] + rp[ -19 ]) + + flt[ 10 ] * ( rp[ 21 ] + rp[ -21 ]) + + flt[ 11 ] * ( rp[ 23 ] + rp[ -23 ]) + + flt[ 12 ] * ( rp[ 25 ] + rp[ -25 ]) + + flt[ 13 ] * ( rp[ 27 ] + rp[ -27 ]); + R8BHBC2 + +#undef R8BHBC1 +#undef R8BHBC2 +}; + +// --------------------------------------------------------------------------- + +} // namespace r8b + +#endif // R8B_CDSPHBDOWNSAMPLER_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h new file mode 100644 index 00000000..e9b03c70 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.h @@ -0,0 +1,804 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPHBUpsampler.h + * + * @brief Half-band upsampling class. + * + * This file includes half-band upsampling class. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPHBUPSAMPLER_INCLUDED +#define R8B_CDSPHBUPSAMPLER_INCLUDED + +#include "CDSPProcessor.h" + +namespace r8b { + +/** + * @brief Half-band upsampling class. + * + * Class implements brute-force half-band 2X upsampling that uses small + * sparse symmetric FIR filters. It is very efficient and should be used at + * latter upsampling steps after initial steep 2X upsampling. + */ + +class CDSPHBUpsampler : public CDSPProcessor +{ +public: + /** + * Function that provides filter data for various steepness indices and + * attenuations. + * + * @param ReqAtten Required half-band filter attentuation. + * @param SteepIndex Steepness index - 0=steepest. Corresponds to general + * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used, + * etc. + */ + + static void getHBFilter( const double ReqAtten, const int SteepIndex, + const double*& flt, int& fltt, double& att ) + { + static const double HBKernel_4A[ 4 ] = { // -54.5176 dB, 4 + 6.1729335650971517e-001, -1.5963945620743250e-001, + 5.5073370934086312e-002, -1.4603578989932850e-002,}; + static const double HBKernel_5A[ 5 ] = { // -66.3075 dB, 4 + 6.2068807424902472e-001, -1.6827573634467302e-001, + 6.5263016720721170e-002, -2.2483331611592005e-002, + 5.2917326684281110e-003,}; + static const double HBKernel_6A[ 6 ] = { // -89.5271 dB, 4 + 6.2187202340480707e-001, -1.7132842113816371e-001, + 6.9019169178765674e-002, -2.5799728312695277e-002, + 7.4880112525741666e-003, -1.2844465869952567e-003,}; + static const double HBKernel_7A[ 7 ] = { // -105.2842 dB, 4 + 6.2354494135775851e-001, -1.7571220703702045e-001, + 7.4529843603968457e-002, -3.0701736822442153e-002, + 1.0716755639039573e-002, -2.7833422930759735e-003, + 4.1118797093875510e-004,}; + static const double HBKernel_8A[ 8 ] = { // -121.0063 dB, 4 + 6.2488363107953926e-001, -1.7924942606514119e-001, + 7.9068155655640557e-002, -3.4907523415495731e-002, + 1.3710256799907897e-002, -4.3991142586987933e-003, + 1.0259190163889602e-003, -1.3278941979339359e-004,}; + static const double HBKernel_9A[ 9 ] = { // -136.6982 dB, 4 + 6.2597763804021977e-001, -1.8216414325139055e-001, + 8.2879104876726728e-002, -3.8563442248249404e-002, + 1.6471530499739394e-002, -6.0489108881335227e-003, + 1.7805283804140392e-003, -3.7533200112729561e-004, + 4.3172840558735476e-005,}; + static const double HBKernel_10A[ 10 ] = { // -152.3572 dB, 4 + 6.2688767582974092e-001, -1.8460766807559420e-001, + 8.6128943000481864e-002, -4.1774474147006607e-002, + 1.9014801985747346e-002, -7.6870397465866507e-003, + 2.6264590175341853e-003, -7.1106660285478562e-004, + 1.3645852036179345e-004, -1.4113888783332969e-005,}; + static const double HBKernel_11A[ 11 ] = { // -183.7962 dB, 4 + 6.2667167706948146e-001, -1.8407153342635879e-001, + 8.5529995610836046e-002, -4.1346831462361310e-002, + 1.8844831691322637e-002, -7.7125170365394992e-003, + 2.7268674860562087e-003, -7.9745028501057233e-004, + 1.8116344606360795e-004, -2.8569149754241848e-005, + 2.3667022010173616e-006,}; + static const double HBKernel_12A[ 12 ] = { // -199.4768 dB, 4 + 6.2747849730367999e-001, -1.8623616784506747e-001, + 8.8409755898467945e-002, -4.4207468821462342e-002, + 2.1149175945115381e-002, -9.2551508371115209e-003, + 3.5871562170822330e-003, -1.1923167653750219e-003, + 3.2627812189920129e-004, -6.9106902511490413e-005, + 1.0122897863125124e-005, -7.7531878906846174e-007,}; + static const double HBKernel_13A[ 13 ] = { // -215.1364 dB, 4 + 6.2816416252367324e-001, -1.8809076955230414e-001, + 9.0918539867353029e-002, -4.6765502683599310e-002, + 2.3287520498995663e-002, -1.0760627245014184e-002, + 4.4853922948425683e-003, -1.6438775426910800e-003, + 5.1441312354764978e-004, -1.3211725685765050e-004, + 2.6191319837779187e-005, -3.5802430606313093e-006, + 2.5491278270628601e-007,}; + static const double HBKernel_14A[ 14 ] = { // -230.7526 dB, 4 + 6.2875473120929948e-001, -1.8969941936903847e-001, + 9.3126094480960403e-002, -4.9067251179869126e-002, + 2.5273008851199916e-002, -1.2218646153393291e-002, + 5.4048942085580280e-003, -2.1409919546078581e-003, + 7.4250292812927973e-004, -2.1924542206832172e-004, + 5.3015808983125091e-005, -9.8743034923598196e-006, + 1.2650391141650221e-006, -8.4146674637474946e-008,}; + static const int FltCountA = 11; + static const int FlttBaseA = 4; + static const double FltAttensA[ FltCountA ] = { + 54.5176, 66.3075, 89.5271, 105.2842, 121.0063, 136.6982, 152.3572, 183.7962, 199.4768, 215.1364, 230.7526, }; + static const double* const FltPtrsA[ FltCountA ] = { + HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, HBKernel_10A, HBKernel_11A, HBKernel_12A, HBKernel_13A, HBKernel_14A, }; + static const double HBKernel_2B[ 2 ] = { // -56.6007 dB, 8 + 5.7361525854329076e-001, -7.5092074924827903e-002,}; + static const double HBKernel_3B[ 3 ] = { // -83.0295 dB, 8 + 5.9277038608066912e-001, -1.0851340190268854e-001, + 1.5813570475513079e-002,}; + static const double HBKernel_4B[ 4 ] = { // -123.4724 dB, 8 + 6.0140277542879617e-001, -1.2564483854574138e-001, + 2.7446500598038322e-002, -3.2051079559057435e-003,}; + static const double HBKernel_5B[ 5 ] = { // -152.4411 dB, 8 + 6.0818642429088932e-001, -1.3981140187175697e-001, + 3.8489164054503623e-002, -7.6218861797853104e-003, + 7.5772358130952392e-004,}; + static const double HBKernel_6B[ 6 ] = { // -181.2501 dB, 8 + 6.1278392271464355e-001, -1.5000053762513338e-001, + 4.7575323511364960e-002, -1.2320702802243476e-002, + 2.1462442592348487e-003, -1.8425092381892940e-004,}; + static const double HBKernel_7B[ 7 ] = { // -209.9472 dB, 8 + 6.1610372263478952e-001, -1.5767891882524138e-001, + 5.5089691170294691e-002, -1.6895755656366061e-002, + 3.9416643438213977e-003, -6.0603623791604668e-004, + 4.5632602433393365e-005,}; + static const double HBKernel_8B[ 8 ] = { // -238.5616 dB, 8 + 6.1861282914465976e-001, -1.6367179451225150e-001, + 6.1369861342939716e-002, -2.1184466539006987e-002, + 5.9623357510842061e-003, -1.2483098507454090e-003, + 1.7099297537964702e-004, -1.1448313239478885e-005,}; + static const int FltCountB = 7; + static const int FlttBaseB = 2; + static const double FltAttensB[ FltCountB ] = { + 56.6007, 83.0295, 123.4724, 152.4411, 181.2501, 209.9472, 238.5616, }; + static const double* const FltPtrsB[ FltCountB ] = { + HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, HBKernel_7B, HBKernel_8B, }; + static const double HBKernel_2C[ 2 ] = { // -89.0473 dB, 16 + 5.6430278013478008e-001, -6.4338068855763375e-002,}; + static const double HBKernel_3C[ 3 ] = { // -130.8951 dB, 16 + 5.8706402915551448e-001, -9.9362380958670449e-002, + 1.2298637065869358e-002,}; + static const double HBKernel_4C[ 4 ] = { // -172.3192 dB, 16 + 5.9896586134984675e-001, -1.2111680603434927e-001, + 2.4763118076458895e-002, -2.6121758132212989e-003,}; + static const double HBKernel_5C[ 5 ] = { // -213.4984 dB, 16 + 6.0626808285230716e-001, -1.3588224032740795e-001, + 3.5544305238309003e-002, -6.5127022377289654e-003, + 5.8255449565950768e-004,}; + static const double HBKernel_6C[ 6 ] = { // -254.5186 dB, 16 + 6.1120171263351242e-001, -1.4654486853757870e-001, + 4.4582959299131253e-002, -1.0840543858123995e-002, + 1.7343706485509962e-003, -1.3363018567985596e-004,}; + static const int FltCountC = 5; + static const int FlttBaseC = 2; + static const double FltAttensC[ FltCountC ] = { + 89.0473, 130.8951, 172.3192, 213.4984, 254.5186, }; + static const double* const FltPtrsC[ FltCountC ] = { + HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, HBKernel_6C, }; + static const double HBKernel_1D[ 1 ] = { // -54.4754 dB, 32 + 5.0188900022775451e-001,}; + static const double HBKernel_2D[ 2 ] = { // -113.2139 dB, 32 + 5.6295152180538044e-001, -6.2953706070191726e-002,}; + static const double HBKernel_3D[ 3 ] = { // -167.1447 dB, 32 + 5.8621968728755036e-001, -9.8080551656524531e-002, + 1.1860868761997080e-002,}; + static const double HBKernel_4D[ 4 ] = { // -220.6519 dB, 32 + 5.9835028657163591e-001, -1.1999986086623511e-001, + 2.4132530854004228e-002, -2.4829565686819706e-003,}; + static const int FltCountD = 4; + static const int FlttBaseD = 1; + static const double FltAttensD[ FltCountD ] = { + 54.4754, 113.2139, 167.1447, 220.6519, }; + static const double* const FltPtrsD[ FltCountD ] = { + HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, }; + static const double HBKernel_1E[ 1 ] = { // -66.5391 dB, 64 + 5.0047102586416625e-001,}; + static const double HBKernel_2E[ 2 ] = { // -137.3173 dB, 64 + 5.6261293163933568e-001, -6.2613067826620017e-002,}; + static const double HBKernel_3E[ 3 ] = { // -203.2997 dB, 64 + 5.8600808139396787e-001, -9.7762185880067784e-002, + 1.1754104554493029e-002,}; + static const double HBKernel_4E[ 4 ] = { // -268.8550 dB, 64 + 5.9819599352772002e-001, -1.1972157555011861e-001, + 2.3977305567947922e-002, -2.4517235455853992e-003,}; + static const int FltCountE = 4; + static const int FlttBaseE = 1; + static const double FltAttensE[ FltCountE ] = { + 66.5391, 137.3173, 203.2997, 268.8550, }; + static const double* const FltPtrsE[ FltCountE ] = { + HBKernel_1E, HBKernel_2E, HBKernel_3E, HBKernel_4E, }; + static const double HBKernel_1F[ 1 ] = { // -82.4633 dB, 128 + 5.0007530666642896e-001,}; + static const double HBKernel_2F[ 2 ] = { // -161.4049 dB, 128 + 5.6252823610146030e-001, -6.2528244608044792e-002,}; + static const double HBKernel_3F[ 3 ] = { // -239.4313 dB, 128 + 5.8595514744674237e-001, -9.7682725156791952e-002, + 1.1727577711117231e-002,}; + static const int FltCountF = 3; + static const int FlttBaseF = 1; + static const double FltAttensF[ FltCountF ] = { + 82.4633, 161.4049, 239.4313, }; + static const double* const FltPtrsF[ FltCountF ] = { + HBKernel_1F, HBKernel_2F, HBKernel_3F, }; + static const double HBKernel_1G[ 1 ] = { // -94.5052 dB, 256 + 5.0001882524896712e-001,}; + static const double HBKernel_2G[ 2 ] = { // -185.4886 dB, 256 + 5.6250705922479682e-001, -6.2507059756378394e-002,}; + static const double HBKernel_3G[ 3 ] = { // -275.5501 dB, 256 + 5.8594191201187384e-001, -9.7662868266991207e-002, + 1.1720956255134043e-002,}; + static const int FltCountG = 3; + static const int FlttBaseG = 1; + static const double FltAttensG[ FltCountG ] = { + 94.5052, 185.4886, 275.5501, }; + static const double* const FltPtrsG[ FltCountG ] = { + HBKernel_1G, HBKernel_2G, HBKernel_3G, }; + + int k = 0; + + if( SteepIndex <= 0 ) + { + while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsA[ k ]; + fltt = FlttBaseA + k; + att = FltAttensA[ k ]; + } + else + if( SteepIndex == 1 ) + { + while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsB[ k ]; + fltt = FlttBaseB + k; + att = FltAttensB[ k ]; + } + else + if( SteepIndex == 2 ) + { + while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsC[ k ]; + fltt = FlttBaseC + k; + att = FltAttensC[ k ]; + } + else + if( SteepIndex == 3 ) + { + while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsD[ k ]; + fltt = FlttBaseD + k; + att = FltAttensD[ k ]; + } + else + if( SteepIndex == 4 ) + { + while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsE[ k ]; + fltt = FlttBaseE + k; + att = FltAttensE[ k ]; + } + else + if( SteepIndex == 5 ) + { + while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsF[ k ]; + fltt = FlttBaseF + k; + att = FltAttensF[ k ]; + } + else + { + while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsG[ k ]; + fltt = FlttBaseG + k; + att = FltAttensG[ k ]; + } + } + + /** + * Function that provides filter data for various steepness indices and + * attenuations. For 1/3 resamplings. + * + * @param ReqAtten Required half-band filter attentuation. + * @param SteepIndex Steepness index - 0=steepest. Corresponds to general + * upsampling/downsampling ratio, e.g. at 4x 0 is used, at 8x 1 is used, + * etc. + */ + + static void getHBFilterThird( const double ReqAtten, const int SteepIndex, + const double*& flt, int& fltt, double& att ) + { + static const double HBKernel_3A[ 3 ] = { // -66.3726 dB, 6 + 5.9811355069551475e-001, -1.1793396656733847e-001, + 2.0300557211946322e-002,}; + static const double HBKernel_4A[ 4 ] = { // -90.2546 dB, 6 + 6.0645499250612578e-001, -1.3555496505481171e-001, + 3.4022804962365975e-002, -4.9535418595798757e-003,}; + static const double HBKernel_5A[ 5 ] = { // -126.5507 dB, 6 + 6.1014115058940210e-001, -1.4393081816629907e-001, + 4.1760642892852244e-002, -8.9692183234056175e-003, + 9.9871340618342070e-004,}; + static const double HBKernel_6A[ 6 ] = { // -150.1839 dB, 6 + 6.1439563420546972e-001, -1.5360187826905250e-001, + 5.0840891345687034e-002, -1.4053648740561121e-002, + 2.6771286587305727e-003, -2.5815816044823123e-004,}; + static const double HBKernel_7A[ 7 ] = { // -173.7068 dB, 6 + 6.1747493476329918e-001, -1.6087373733313212e-001, + 5.8263075641409430e-002, -1.8872408173431318e-002, + 4.7421376543513687e-003, -8.0196529612267474e-004, + 6.7964807393798996e-005,}; + static const double HBKernel_8A[ 8 ] = { // -197.1454 dB, 6 + 6.1980610947775050e-001, -1.6654070578314714e-001, + 6.4416567441730327e-002, -2.3307744348719822e-002, + 6.9909157372312443e-003, -1.5871946293364403e-003, + 2.4017727382382763e-004, -1.8125308241541697e-005,}; + static const double HBKernel_9A[ 9 ] = { // -220.5199 dB, 6 + 6.2163188951899306e-001, -1.7108115323810941e-001, + 6.9588370095600260e-002, -2.7339625080613838e-002, + 9.2954469183791771e-003, -2.5537179959555429e-003, + 5.2572290897951021e-004, -7.1813356135154921e-005, + 4.8802382808892154e-006,}; + static const int FltCountA = 7; + static const int FlttBaseA = 3; + static const double FltAttensA[ FltCountA ] = { + 66.3726, 90.2546, 126.5507, 150.1839, 173.7068, 197.1454, 220.5199, }; + static const double* const FltPtrsA[ FltCountA ] = { + HBKernel_3A, HBKernel_4A, HBKernel_5A, HBKernel_6A, HBKernel_7A, HBKernel_8A, HBKernel_9A, }; + static const double HBKernel_2B[ 2 ] = { // -71.0965 dB, 12 + 5.6748544264806311e-001, -6.7764090509431732e-002,}; + static const double HBKernel_3B[ 3 ] = { // -115.7707 dB, 12 + 5.8793612182667199e-001, -1.0070583248877293e-001, + 1.2771337947163834e-002,}; + static const double HBKernel_4B[ 4 ] = { // -152.1535 dB, 12 + 5.9960155600862808e-001, -1.2228154335199336e-001, + 2.5433718917694709e-002, -2.7537562530837154e-003,}; + static const double HBKernel_5B[ 5 ] = { // -188.2914 dB, 12 + 6.0676859170554343e-001, -1.3689667009876413e-001, + 3.6288512631926818e-002, -6.7838855305035351e-003, + 6.2345167677087547e-004,}; + static const double HBKernel_6B[ 6 ] = { // -224.2705 dB, 12 + 6.1161456341904397e-001, -1.4743901958274458e-001, + 4.5344160157313275e-002, -1.1207371780924531e-002, + 1.8328497112594935e-003, -1.4518193006359589e-004,}; + static const int FltCountB = 5; + static const int FlttBaseB = 2; + static const double FltAttensB[ FltCountB ] = { + 71.0965, 115.7707, 152.1535, 188.2914, 224.2705, }; + static const double* const FltPtrsB[ FltCountB ] = { + HBKernel_2B, HBKernel_3B, HBKernel_4B, HBKernel_5B, HBKernel_6B, }; + static const double HBKernel_1C[ 1 ] = { // -49.4544 dB, 24 + 5.0336730531430562e-001,}; + static const double HBKernel_2C[ 2 ] = { // -103.1970 dB, 24 + 5.6330232648142819e-001, -6.3309247177420452e-002,}; + static const double HBKernel_3C[ 3 ] = { // -152.1195 dB, 24 + 5.8643891113580415e-001, -9.8411593011583087e-002, + 1.1972706651483846e-002,}; + static const double HBKernel_4C[ 4 ] = { // -200.6182 dB, 24 + 5.9851012363917222e-001, -1.2028885239978220e-001, + 2.4294521083140615e-002, -2.5157924156609776e-003,}; + static const double HBKernel_5C[ 5 ] = { // -248.8730 dB, 24 + 6.0590922882030196e-001, -1.3515953438018685e-001, + 3.5020857107815606e-002, -6.3256196990467053e-003, + 5.5506815147598793e-004,}; + static const int FltCountC = 5; + static const int FlttBaseC = 1; + static const double FltAttensC[ FltCountC ] = { + 49.4544, 103.1970, 152.1195, 200.6182, 248.8730, }; + static const double* const FltPtrsC[ FltCountC ] = { + HBKernel_1C, HBKernel_2C, HBKernel_3C, HBKernel_4C, HBKernel_5C, }; + static const double HBKernel_1D[ 1 ] = { // -61.5357 dB, 48 + 5.0083794231068057e-001,}; + static const double HBKernel_2D[ 2 ] = { // -127.3167 dB, 48 + 5.6270074379958690e-001, -6.2701174487726344e-002,}; + static const double HBKernel_3D[ 3 ] = { // -188.2990 dB, 48 + 5.8606296210323228e-001, -9.7844644765123029e-002, + 1.1781683046528768e-002,}; + static const double HBKernel_4D[ 4 ] = { // -248.8580 dB, 48 + 5.9823601243162516e-001, -1.1979368994739022e-001, + 2.4017458606412575e-002, -2.4597810910081913e-003,}; + static const int FltCountD = 4; + static const int FlttBaseD = 1; + static const double FltAttensD[ FltCountD ] = { + 61.5357, 127.3167, 188.2990, 248.8580, }; + static const double* const FltPtrsD[ FltCountD ] = { + HBKernel_1D, HBKernel_2D, HBKernel_3D, HBKernel_4D, }; + static const double HBKernel_1E[ 1 ] = { // -77.4651 dB, 96 + 5.0013388897382527e-001,}; + static const double HBKernel_2E[ 2 ] = { // -151.4084 dB, 96 + 5.6255019604317880e-001, -6.2550222932381064e-002,}; + static const double HBKernel_3E[ 3 ] = { // -224.4365 dB, 96 + 5.8596887234201078e-001, -9.7703321113080305e-002, + 1.1734448777069783e-002,}; + static const int FltCountE = 3; + static const int FlttBaseE = 1; + static const double FltAttensE[ FltCountE ] = { + 77.4651, 151.4084, 224.4365, }; + static const double* const FltPtrsE[ FltCountE ] = { + HBKernel_1E, HBKernel_2E, HBKernel_3E, }; + static const double HBKernel_1F[ 1 ] = { // -89.5075 dB, 192 + 5.0003346776264190e-001,}; + static const double HBKernel_2F[ 2 ] = { // -175.4932 dB, 192 + 5.6251254964097952e-001, -6.2512551321105267e-002,}; + static const double HBKernel_3F[ 3 ] = { // -260.5645 dB, 192 + 5.8594534336747051e-001, -9.7668015838639821e-002, + 1.1722672471262996e-002,}; + static const int FltCountF = 3; + static const int FlttBaseF = 1; + static const double FltAttensF[ FltCountF ] = { + 89.5075, 175.4932, 260.5645, }; + static const double* const FltPtrsF[ FltCountF ] = { + HBKernel_1F, HBKernel_2F, HBKernel_3F, }; + static const double HBKernel_1G[ 1 ] = { // -101.5490 dB, 384 + 5.0000836666064941e-001,}; + static const double HBKernel_2G[ 2 ] = { // -199.5761 dB, 384 + 5.6250313744943459e-001, -6.2503137554435345e-002,}; + static const double HBKernel_3G[ 3 ] = { // -296.5185 dB, 384 + 5.8593945786963764e-001, -9.7659186853499613e-002, + 1.1719728983863425e-002,}; + static const int FltCountG = 3; + static const int FlttBaseG = 1; + static const double FltAttensG[ FltCountG ] = { + 101.5490, 199.5761, 296.5185, }; + static const double* const FltPtrsG[ FltCountG ] = { + HBKernel_1G, HBKernel_2G, HBKernel_3G, }; + + int k = 0; + + if( SteepIndex <= 0 ) + { + while( k != FltCountA - 1 && FltAttensA[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsA[ k ]; + fltt = FlttBaseA + k; + att = FltAttensA[ k ]; + } + else + if( SteepIndex == 1 ) + { + while( k != FltCountB - 1 && FltAttensB[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsB[ k ]; + fltt = FlttBaseB + k; + att = FltAttensB[ k ]; + } + else + if( SteepIndex == 2 ) + { + while( k != FltCountC - 1 && FltAttensC[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsC[ k ]; + fltt = FlttBaseC + k; + att = FltAttensC[ k ]; + } + else + if( SteepIndex == 3 ) + { + while( k != FltCountD - 1 && FltAttensD[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsD[ k ]; + fltt = FlttBaseD + k; + att = FltAttensD[ k ]; + } + else + if( SteepIndex == 4 ) + { + while( k != FltCountE - 1 && FltAttensE[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsE[ k ]; + fltt = FlttBaseE + k; + att = FltAttensE[ k ]; + } + else + if( SteepIndex == 5 ) + { + while( k != FltCountF - 1 && FltAttensF[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsF[ k ]; + fltt = FlttBaseF + k; + att = FltAttensF[ k ]; + } + else + { + while( k != FltCountG - 1 && FltAttensG[ k ] < ReqAtten ) + { + k++; + } + + flt = FltPtrsG[ k ]; + fltt = FlttBaseG + k; + att = FltAttensG[ k ]; + } + } + + /** + * Constructor initalizes the half-band upsampler. + * + * @param ReqAtten Required half-band filter attentuation. + * @param SteepIndex Steepness index - 0=steepest. Corresponds to general + * upsampling ratio, e.g. at 4x upsampling 0 is used, at 8x upsampling 1 + * is used, etc. + * @param IsThird "True" if 1/3 of frequency response resampling is + * performed. + * @param PrevLatency Latency, in samples (any value >=0), which was left + * in the output signal by a previous process. Whole-number latency will + * be consumed by *this object while remaining fractional latency can be + * obtained via the getLatencyFrac() function. + * @param aDoConsumeLatency "True" if the output latency should be + * consumed. Does not apply to the fractional part of the latency (if such + * part is available). + */ + + CDSPHBUpsampler( const double ReqAtten, const int SteepIndex, + const bool IsThird, const double PrevLatency, + const bool aDoConsumeLatency = true ) + : DoConsumeLatency( aDoConsumeLatency ) + { + static const CConvolveFn FltConvFn[ 14 ] = { + &CDSPHBUpsampler :: convolve1, &CDSPHBUpsampler :: convolve2, + &CDSPHBUpsampler :: convolve3, &CDSPHBUpsampler :: convolve4, + &CDSPHBUpsampler :: convolve5, &CDSPHBUpsampler :: convolve6, + &CDSPHBUpsampler :: convolve7, &CDSPHBUpsampler :: convolve8, + &CDSPHBUpsampler :: convolve9, &CDSPHBUpsampler :: convolve10, + &CDSPHBUpsampler :: convolve11, &CDSPHBUpsampler :: convolve12, + &CDSPHBUpsampler :: convolve13, &CDSPHBUpsampler :: convolve14 }; + + const double* fltp0; + int fltt; + double att; + + if( IsThird ) + { + getHBFilterThird( ReqAtten, SteepIndex, fltp0, fltt, att ); + } + else + { + getHBFilter( ReqAtten, SteepIndex, fltp0, fltt, att ); + } + + // Copy obtained filter to address-aligned buffer. + + fltp = alignptr( FltBuf, 16 ); + memcpy( fltp, fltp0, fltt * sizeof( fltp[ 0 ])); + + convfn = FltConvFn[ fltt - 1 ]; + fll = fltt - 1; + fl2 = fltt; + flo = fll + fl2; + BufRP = Buf + fll; + + LatencyFrac = PrevLatency * 2.0; + Latency = (int) LatencyFrac; + LatencyFrac -= Latency; + + R8BASSERT( Latency >= 0 ); + + if( DoConsumeLatency ) + { + flb = BufLen - fll; + } + else + { + Latency += fl2 + fl2; + flb = BufLen - flo; + } + + R8BCONSOLE( "CDSPHBUpsampler: sti=%i third=%i taps=%i att=%.1f " + "io=2/1\n", SteepIndex, (int) IsThird, fltt, att ); + + clear(); + } + + virtual int getLatency() const + { + return( DoConsumeLatency ? 0 : Latency ); + } + + virtual double getLatencyFrac() const + { + return( LatencyFrac ); + } + + virtual int getMaxOutLen( const int MaxInLen ) const + { + R8BASSERT( MaxInLen >= 0 ); + + return( MaxInLen << 1 ); + } + + virtual void clear() + { + if( DoConsumeLatency ) + { + LatencyLeft = Latency; + BufLeft = 0; + } + else + { + LatencyLeft = 0; + BufLeft = fl2; + } + + WritePos = 0; + ReadPos = flb; // Set "read" position to account for filter's latency. + + memset( &Buf[ ReadPos ], 0, ( BufLen - flb ) * sizeof( Buf[ 0 ])); + } + + virtual int process( double* ip, int l, double*& op0 ) + { + R8BASSERT( l >= 0 ); + + double* op = op0; + + while( l > 0 ) + { + // Add new input samples to both halves of the ring buffer. + + const int b = min( min( l, BufLen - WritePos ), flb - BufLeft ); + + double* const wp1 = Buf + WritePos; + memcpy( wp1, ip, b * sizeof( wp1[ 0 ])); + + if( WritePos < flo ) + { + const int c = min( b, flo - WritePos ); + memcpy( wp1 + BufLen, wp1, c * sizeof( wp1[ 0 ])); + } + + ip += b; + WritePos = ( WritePos + b ) & BufLenMask; + l -= b; + BufLeft += b; + + // Produce output. + + if( BufLeft > fl2 ) + { + const int c = BufLeft - fl2; + + double* const opend = op + ( c + c ); + ( *convfn )( op, opend, fltp, BufRP, ReadPos ); + + op = opend; + ReadPos = ( ReadPos + c ) & BufLenMask; + BufLeft -= c; + } + } + + int ol = (int) ( op - op0 ); + + if( LatencyLeft != 0 ) + { + if( LatencyLeft >= ol ) + { + LatencyLeft -= ol; + return( 0 ); + } + + ol -= LatencyLeft; + op0 += LatencyLeft; + LatencyLeft = 0; + } + + return( ol ); + } + +private: + static const int BufLenBits = 9; ///< The length of the ring buffer, + ///< expressed as Nth power of 2. This value can be reduced if it is + ///< known that only short input buffers will be passed to the + ///< interpolator. The minimum value of this parameter is 5, and + ///< 1 << BufLenBits should be at least 3 times larger than the + ///< FilterLen. + ///< + static const int BufLen = 1 << BufLenBits; ///< The length of the ring + ///< buffer. The actual length is twice as long to allow "beyond max + ///< position" positioning. + ///< + static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer + ///< position wrapping. + ///< + double Buf[ BufLen + 27 ]; ///< The ring buffer, including overrun + ///< protection for the largest filter. + ///< + double FltBuf[ 14 + 2 ]; ///< Holder for half-band filter taps, used with + ///< 16-byte address-aligning, for SIMD use. + ///< + double* fltp; ///< Half-band filter taps, points to FltBuf. + ///< + int fll; ///< Input latency. + ///< + int fl2; ///< Right-side filter length. + ///< + int flo; ///< Overrrun length. + ///< + int flb; ///< Initial read position and maximal buffer write length. + ///< + const double* BufRP; ///< Offseted Buf pointer at ReadPos=0. + ///< + bool DoConsumeLatency; ///< "True" if the output latency should be + ///< consumed. Does not apply to the fractional part of the latency + ///< (if such part is available). + ///< + int Latency; ///< Initial latency that should be removed from the output. + ///< + double LatencyFrac; ///< Fractional latency left on the output. + ///< + int BufLeft; ///< The number of samples left in the buffer to process. + ///< When this value is below FilterLenD2Plus1, the interpolation + ///< cycle ends. + ///< + int WritePos; ///< The current buffer write position. Incremented together + ///< with the BufLeft variable. + ///< + int ReadPos; ///< The current buffer read position. + ///< + int LatencyLeft; ///< Latency left to remove. + ///< + typedef void( *CConvolveFn )( double* op, double* const opend, + const double* const flt, const double* const rp0, int rpos ); ///< + ///< Convolution function type. + ///< + CConvolveFn convfn; ///< Convolution function in use. + ///< + +#define R8BHBC1( fn ) \ + static void fn( double* op, double* const opend, const double* const flt, \ + const double* const rp0, int rpos ) \ + { \ + while( op != opend ) \ + { \ + const double* const rp = rp0 + rpos; \ + op[ 0 ] = rp[ 0 ]; + +#define R8BHBC2 \ + rpos = ( rpos + 1 ) & BufLenMask; \ + op += 2; \ + } \ + } + +#include "CDSPHBUpsampler.inc" + +#undef R8BHBC1 +#undef R8BHBC2 +}; + +// --------------------------------------------------------------------------- + +} // namespace r8b + +#endif // R8B_CDSPHBUPSAMPLER_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc new file mode 100644 index 00000000..f9431b1e --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPHBUpsampler.inc @@ -0,0 +1,711 @@ +// Auto-generated by `genhbc`, do not edit! + +#if defined( R8B_SSE2 ) + +R8BHBC1( convolve1 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]); +R8BHBC2 + +R8BHBC1( convolve2 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +R8BHBC1( convolve3 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +op[ 1 ] += flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]); +R8BHBC2 + +R8BHBC1( convolve4 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +R8BHBC1( convolve5 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +op[ 1 ] += flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]); +R8BHBC2 + +R8BHBC1( convolve6 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +R8BHBC1( convolve7 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +op[ 1 ] += flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]); +R8BHBC2 + +R8BHBC1( convolve8 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +R8BHBC1( convolve9 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +op[ 1 ] += flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]); +R8BHBC2 + +R8BHBC1( convolve10 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +R8BHBC1( convolve11 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +op[ 1 ] += flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]); +R8BHBC2 + +R8BHBC1( convolve12 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +R8BHBC1( convolve13 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +op[ 1 ] += flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]); +R8BHBC2 + +R8BHBC1( convolve14 ) +__m128d v1, v2, m1, s1; +v2 = _mm_loadu_pd( rp - 1 ); v1 = _mm_loadu_pd( rp + 1 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 0 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = m1; +__m128d v3, v4, m3, s3; +v4 = _mm_loadu_pd( rp - 3 ); v3 = _mm_loadu_pd( rp + 3 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 2 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = m3; +v2 = _mm_loadu_pd( rp - 5 ); v1 = _mm_loadu_pd( rp + 5 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 4 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 7 ); v3 = _mm_loadu_pd( rp + 7 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 6 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +v2 = _mm_loadu_pd( rp - 9 ); v1 = _mm_loadu_pd( rp + 9 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 8 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +v4 = _mm_loadu_pd( rp - 11 ); v3 = _mm_loadu_pd( rp + 11 ); +m3 = _mm_mul_pd( _mm_load_pd( flt + 10 ), + _mm_add_pd( v3, _mm_shuffle_pd( v4, v4, 1 ))); +s3 = _mm_add_pd( s3, m3 ); +v2 = _mm_loadu_pd( rp - 13 ); v1 = _mm_loadu_pd( rp + 13 ); +m1 = _mm_mul_pd( _mm_load_pd( flt + 12 ), + _mm_add_pd( v1, _mm_shuffle_pd( v2, v2, 1 ))); +s1 = _mm_add_pd( s1, m1 ); +s1 = _mm_add_pd( s1, s3 ); +_mm_storel_pd( op + 1, _mm_add_pd( s1, _mm_shuffle_pd( s1, s1, 1 ))); +R8BHBC2 + +#elif defined( R8B_NEON ) + +R8BHBC1( convolve1 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]); +R8BHBC2 + +R8BHBC1( convolve2 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +R8BHBC1( convolve3 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +op[ 1 ] = vaddvq_f64( s1 ) + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]); +R8BHBC2 + +R8BHBC1( convolve4 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +R8BHBC1( convolve5 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ) + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]); +R8BHBC2 + +R8BHBC1( convolve6 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +R8BHBC1( convolve7 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ) + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]); +R8BHBC2 + +R8BHBC1( convolve8 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +R8BHBC1( convolve9 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ) + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]); +R8BHBC2 + +R8BHBC1( convolve10 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +R8BHBC1( convolve11 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ) + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]); +R8BHBC2 + +R8BHBC1( convolve12 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +R8BHBC1( convolve13 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ) + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]); +R8BHBC2 + +R8BHBC1( convolve14 ) +float64x2_t v1, v2, s1; +s1 = vdupq_n_f64( 0.0 ); +v2 = vld1q_f64( rp - 1 ); v1 = vld1q_f64( rp + 1 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 0 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +float64x2_t v3, v4, s3; +s3 = vdupq_n_f64( 0.0 ); +v4 = vld1q_f64( rp - 3 ); v3 = vld1q_f64( rp + 3 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 2 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 5 ); v1 = vld1q_f64( rp + 5 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 4 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 7 ); v3 = vld1q_f64( rp + 7 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 6 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 9 ); v1 = vld1q_f64( rp + 9 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 8 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +v4 = vld1q_f64( rp - 11 ); v3 = vld1q_f64( rp + 11 ); +s3 = vmlaq_f64( s3, vld1q_f64( flt + 10 ), + vaddq_f64( v3, vextq_f64( v4, v4, 1 ))); +v2 = vld1q_f64( rp - 13 ); v1 = vld1q_f64( rp + 13 ); +s1 = vmlaq_f64( s1, vld1q_f64( flt + 12 ), + vaddq_f64( v1, vextq_f64( v2, v2, 1 ))); +s1 = vaddq_f64( s1, s3 ); +op[ 1 ] = vaddvq_f64( s1 ); +R8BHBC2 + +#else // SIMD + +R8BHBC1( convolve1 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]); +R8BHBC2 + +R8BHBC1( convolve2 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]); +R8BHBC2 + +R8BHBC1( convolve3 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]); +R8BHBC2 + +R8BHBC1( convolve4 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]); +R8BHBC2 + +R8BHBC1( convolve5 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]); +R8BHBC2 + +R8BHBC1( convolve6 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]); +R8BHBC2 + +R8BHBC1( convolve7 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]); +R8BHBC2 + +R8BHBC1( convolve8 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]); +R8BHBC2 + +R8BHBC1( convolve9 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]) + + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]); +R8BHBC2 + +R8BHBC1( convolve10 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]) + + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]) + + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]); +R8BHBC2 + +R8BHBC1( convolve11 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]) + + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]) + + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]) + + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]); +R8BHBC2 + +R8BHBC1( convolve12 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]) + + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]) + + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]) + + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]) + + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]); +R8BHBC2 + +R8BHBC1( convolve13 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]) + + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]) + + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]) + + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]) + + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]) + + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]); +R8BHBC2 + +R8BHBC1( convolve14 ) +op[ 1 ] = flt[ 0 ] * ( rp[ 1 ] + rp[ 0 ]) + + flt[ 1 ] * ( rp[ 2 ] + rp[ -1 ]) + + flt[ 2 ] * ( rp[ 3 ] + rp[ -2 ]) + + flt[ 3 ] * ( rp[ 4 ] + rp[ -3 ]) + + flt[ 4 ] * ( rp[ 5 ] + rp[ -4 ]) + + flt[ 5 ] * ( rp[ 6 ] + rp[ -5 ]) + + flt[ 6 ] * ( rp[ 7 ] + rp[ -6 ]) + + flt[ 7 ] * ( rp[ 8 ] + rp[ -7 ]) + + flt[ 8 ] * ( rp[ 9 ] + rp[ -8 ]) + + flt[ 9 ] * ( rp[ 10 ] + rp[ -9 ]) + + flt[ 10 ] * ( rp[ 11 ] + rp[ -10 ]) + + flt[ 11 ] * ( rp[ 12 ] + rp[ -11 ]) + + flt[ 12 ] * ( rp[ 13 ] + rp[ -12 ]) + + flt[ 13 ] * ( rp[ 14 ] + rp[ -13 ]); +R8BHBC2 + +#endif // SIMD diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h new file mode 100644 index 00000000..51c7cbc9 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPProcessor.h @@ -0,0 +1,103 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPProcessor.h + * + * @brief The base virtual class for DSP processing algorithms. + * + * This file includes the base virtual class for DSP processing algorithm + * classes like FIR filtering and interpolation. + * + * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPPROCESSOR_INCLUDED +#define R8B_CDSPPROCESSOR_INCLUDED + +#include "r8bbase.h" + +namespace r8b { + +/** + * @brief The base virtual class for DSP processing algorithms. + * + * This class can be used as a base class for various DSP processing + * algorithms (processors). DSP processors that are derived from this class + * can be seamlessly integrated into various DSP processing graphs. + */ + +class CDSPProcessor : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPProcessor ); + +public: + CDSPProcessor() + { + } + + virtual ~CDSPProcessor() + { + } + + /** + * @return The latency, in samples, which is present in the output signal. + * This value is usually zero if the DSP processor "consumes" the latency + * automatically. + */ + + virtual int getLatency() const = 0; + + /** + * @return Fractional latency, in samples, which is present in the output + * signal. This value is usually zero if a linear-phase filtering is used. + * With minimum-phase filters in use, this value can be non-zero even if + * the getLatency() function returns zero. + */ + + virtual double getLatencyFrac() const = 0; + + /** + * @param MaxInLen The number of samples planned to process at once, at + * most. + * @return The maximal length of the output buffer required when + * processing the "MaxInLen" number of input samples. + */ + + virtual int getMaxOutLen( const int MaxInLen ) const = 0; + + /** + * Function clears (resets) the state of *this object and returns it to + * the state after construction. All input data accumulated in the + * internal buffer so far will be discarded. + */ + + virtual void clear() = 0; + + /** + * Function performs DSP processing. + * + * @param ip Input data pointer. + * @param l0 How many samples to process. + * @param[out] op0 Output data pointer. The capacity of this buffer should + * be equal to the value returned by the getMaxOutLen() function for the + * given "l0". This buffer can be equal to "ip" only if the + * getMaxOutLen( l0 ) function returned a value lesser than "l0". This + * pointer can be incremented on function's return if latency compensation + * was performed by the processor. Note that on function's return, this + * pointer may point to some internal buffers, including the "ip" buffer, + * ignoring the originally passed value. + * @return The number of output samples written to the "op0" buffer and + * available after processing. This value can be smaller or larger in + * comparison to the original "l0" value due to processing and filter's + * latency compensation that took place, and due to resampling if it was + * performed. + */ + + virtual int process( double* ip, int l0, double*& op0 ) = 0; +}; + +} // namespace r8b + +#endif // R8B_CDSPPROCESSOR_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h new file mode 100644 index 00000000..5d88fa33 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPRealFFT.h @@ -0,0 +1,820 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPRealFFT.h + * + * @brief Real-valued FFT transform class. + * + * This file includes FFT object implementation. All created FFT objects are + * kept in a global list after use, for a future reusal. Such approach + * minimizes time necessary to initialize the FFT object of the required + * length. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPREALFFT_INCLUDED +#define R8B_CDSPREALFFT_INCLUDED + +#include "r8bbase.h" + +#if !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE + #include "fft4g.h" +#endif // !R8B_IPP && !R8B_PFFFT && !R8B_PFFFT_DOUBLE + +#if R8B_PFFFT + #include "pffft.h" +#endif // R8B_PFFFT + +#if R8B_PFFFT_DOUBLE + #include "pffft_double/pffft_double.h" +#endif // R8B_PFFFT_DOUBLE + +namespace r8b { + +/** + * @brief Real-valued FFT transform class. + * + * Class implements a wrapper for real-valued discrete fast Fourier transform + * functions. The object of this class can only be obtained via the + * CDSPRealFFTKeeper class. + * + * Uses functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * + * Also uses Intel IPP library functions if available (if the R8B_IPP=1 macro + * was defined). Note that IPP library's FFT functions are 2-3 times more + * efficient on the modern Intel Core i7-3770K processor than Ooura's + * functions. It may be worthwhile investing in IPP. Note, that FFT functions + * take less than 20% of the overall sample rate conversion time. However, + * when the "power of 2" resampling is used the performance of FFT functions + * becomes "everything". + */ + +class CDSPRealFFT : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPRealFFT ); + + friend class CDSPRealFFTKeeper; + +public: + /** + * @return A multiplication constant that should be used after inverse + * transform to obtain a correct value scale. + */ + + double getInvMulConst() const + { + return( InvMulConst ); + } + + /** + * @return The length (the number of real values in a transform) of *this + * FFT object, expressed as Nth power of 2. + */ + + int getLenBits() const + { + return( LenBits ); + } + + /** + * @return The length (the number of real values in a transform) of *this + * FFT object. + */ + + int getLen() const + { + return( Len ); + } + + /** + * Function performs in-place forward FFT. + * + * @param[in,out] p Pointer to data block to transform, length should be + * equal to *this object's getLen(). + */ + + void forward( double* const p ) const + { + #if R8B_FLOATFFT + + float* const op = (float*) p; + int i; + + for( i = 0; i < Len; i++ ) + { + op[ i ] = (float) p[ i ]; + } + + #endif // R8B_FLOATFFT + + #if R8B_IPP + + ippsFFTFwd_RToPerm_64f( p, p, SPtr, WorkBuffer ); + + #elif R8B_PFFFT + + pffft_transform_ordered( setup, op, op, work, PFFFT_FORWARD ); + + #elif R8B_PFFFT_DOUBLE + + pffftd_transform_ordered( setup, p, p, work, PFFFT_FORWARD ); + + #else // R8B_PFFFT_DOUBLE + + ooura_fft :: rdft( Len, 1, p, wi.getPtr(), wd.getPtr() ); + + #endif // R8B_IPP + } + + /** + * Function performs in-place inverse FFT. + * + * @param[in,out] p Pointer to data block to transform, length should be + * equal to *this object's getLen(). + */ + + void inverse( double* const p ) const + { + #if R8B_IPP + + ippsFFTInv_PermToR_64f( p, p, SPtr, WorkBuffer ); + + #elif R8B_PFFFT + + pffft_transform_ordered( setup, (float*) p, (float*) p, work, + PFFFT_BACKWARD ); + + #elif R8B_PFFFT_DOUBLE + + pffftd_transform_ordered( setup, p, p, work, PFFFT_BACKWARD ); + + #else // R8B_PFFFT_DOUBLE + + ooura_fft :: rdft( Len, -1, p, wi.getPtr(), wd.getPtr() ); + + #endif // R8B_IPP + + #if R8B_FLOATFFT + + const float* const ip = (const float*) p; + int i; + + for( i = Len - 1; i >= 0; i-- ) + { + p[ i ] = ip[ i ]; + } + + #endif // R8B_FLOATFFT + } + + /** + * Function multiplies two complex-valued data blocks and places result in + * a new data block. Length of all data blocks should be equal to *this + * object's block length. Input blocks should have been produced with the + * forward() function of *this object. + * + * @param aip1 Input data block 1. + * @param aip2 Input data block 2. + * @param[out] aop Output data block, should not be equal to aip1 nor + * aip2. + */ + + void multiplyBlocks( const double* const aip1, const double* const aip2, + double* const aop ) const + { + #if R8B_FLOATFFT + + const float* const ip1 = (const float*) aip1; + const float* const ip2 = (const float*) aip2; + float* const op = (float*) aop; + + #else // R8B_FLOATFFT + + const double* const ip1 = aip1; + const double* const ip2 = aip2; + double* const op = aop; + + #endif // R8B_FLOATFFT + + #if R8B_IPP + + ippsMulPerm_64f( (Ipp64f*) ip1, (Ipp64f*) ip2, (Ipp64f*) op, Len ); + + #else // R8B_IPP + + op[ 0 ] = ip1[ 0 ] * ip2[ 0 ]; + op[ 1 ] = ip1[ 1 ] * ip2[ 1 ]; + + int i = 2; + + while( i < Len ) + { + op[ i ] = ip1[ i ] * ip2[ i ] - ip1[ i + 1 ] * ip2[ i + 1 ]; + op[ i + 1 ] = ip1[ i ] * ip2[ i + 1 ] + ip1[ i + 1 ] * ip2[ i ]; + i += 2; + } + + #endif // R8B_IPP + } + + /** + * Function multiplies two complex-valued data blocks in-place. Length of + * both data blocks should be equal to *this object's block length. Blocks + * should have been produced with the forward() function of *this object. + * + * @param aip Input data block 1. + * @param[in,out] aop Output/input data block 2. + */ + + void multiplyBlocks( const double* const aip, double* const aop ) const + { + #if R8B_FLOATFFT + + const float* const ip = (const float*) aip; + float* const op = (float*) aop; + float t; + + #else // R8B_FLOATFFT + + const double* const ip = aip; + double* const op = aop; + + #if !R8B_IPP + double t; + #endif // !R8B_IPP + + #endif // R8B_FLOATFFT + + #if R8B_IPP + + ippsMulPerm_64f( (Ipp64f*) op, (Ipp64f*) ip, (Ipp64f*) op, Len ); + + #else // R8B_IPP + + op[ 0 ] *= ip[ 0 ]; + op[ 1 ] *= ip[ 1 ]; + + int i = 2; + + while( i < Len ) + { + t = op[ i ] * ip[ i ] - op[ i + 1 ] * ip[ i + 1 ]; + op[ i + 1 ] = op[ i ] * ip[ i + 1 ] + op[ i + 1 ] * ip[ i ]; + op[ i ] = t; + i += 2; + } + + #endif // R8B_IPP + } + + /** + * Function multiplies two complex-valued data blocks in-place, + * considering that the "ip" block contains "zero-phase" response. Length + * of both data blocks should be equal to *this object's block length. + * Blocks should have been produced with the forward() function of *this + * object. + * + * @param aip Input data block 1, "zero-phase" response. This block should + * be first transformed via the convertToZP() function. + * @param[in,out] aop Output/input data block 2. + */ + + void multiplyBlocksZP( const double* const aip, double* const aop ) const + { + #if R8B_FLOATFFT + + const float* const ip = (const float*) aip; + float* const op = (float*) aop; + + #else // R8B_FLOATFFT + + const double* ip = aip; + double* op = aop; + + #endif // R8B_FLOATFFT + + // SIMD implementations assume that pointers are address-aligned. + + #if !R8B_FLOATFFT && defined( R8B_SSE2 ) + + int c8 = Len >> 3; + + while( c8 != 0 ) + { + const __m128d iv1 = _mm_load_pd( ip ); + const __m128d iv2 = _mm_load_pd( ip + 2 ); + const __m128d ov1 = _mm_load_pd( op ); + const __m128d ov2 = _mm_load_pd( op + 2 ); + _mm_store_pd( op, _mm_mul_pd( iv1, ov1 )); + _mm_store_pd( op + 2, _mm_mul_pd( iv2, ov2 )); + + const __m128d iv3 = _mm_load_pd( ip + 4 ); + const __m128d ov3 = _mm_load_pd( op + 4 ); + const __m128d iv4 = _mm_load_pd( ip + 6 ); + const __m128d ov4 = _mm_load_pd( op + 6 ); + _mm_store_pd( op + 4, _mm_mul_pd( iv3, ov3 )); + _mm_store_pd( op + 6, _mm_mul_pd( iv4, ov4 )); + + ip += 8; + op += 8; + c8--; + } + + int c = Len & 7; + + while( c != 0 ) + { + *op *= *ip; + ip++; + op++; + c--; + } + + #elif !R8B_FLOATFFT && defined( R8B_NEON ) + + int c8 = Len >> 3; + + while( c8 != 0 ) + { + const float64x2_t iv1 = vld1q_f64( ip ); + const float64x2_t iv2 = vld1q_f64( ip + 2 ); + const float64x2_t ov1 = vld1q_f64( op ); + const float64x2_t ov2 = vld1q_f64( op + 2 ); + vst1q_f64( op, vmulq_f64( iv1, ov1 )); + vst1q_f64( op + 2, vmulq_f64( iv2, ov2 )); + + const float64x2_t iv3 = vld1q_f64( ip + 4 ); + const float64x2_t iv4 = vld1q_f64( ip + 6 ); + const float64x2_t ov3 = vld1q_f64( op + 4 ); + const float64x2_t ov4 = vld1q_f64( op + 6 ); + vst1q_f64( op + 4, vmulq_f64( iv3, ov3 )); + vst1q_f64( op + 6, vmulq_f64( iv4, ov4 )); + + ip += 8; + op += 8; + c8--; + } + + int c = Len & 7; + + while( c != 0 ) + { + *op *= *ip; + ip++; + op++; + c--; + } + + #else // SIMD + + int i; + + for( i = 0; i < Len; i++ ) + { + op[ i ] *= ip[ i ]; + } + + #endif // SIMD + } + + /** + * Function converts the specified forward-transformed block into + * "zero-phase" form suitable for use with the multiplyBlocksZ() function. + * + * @param[in,out] ap Block to transform. + */ + + void convertToZP( double* const ap ) const + { + #if R8B_FLOATFFT + + float* const p = (float*) ap; + + #else // R8B_FLOATFFT + + double* const p = ap; + + #endif // R8B_FLOATFFT + + int i = 2; + + while( i < Len ) + { + p[ i + 1 ] = p[ i ]; + i += 2; + } + } + +private: + int LenBits; ///< Length of FFT block (expressed as Nth power of 2). + ///< + int Len; ///< Length of FFT block (number of real values). + ///< + double InvMulConst; ///< Inverse FFT multiply constant. + ///< + CDSPRealFFT* Next; ///< Next object in a singly-linked list. + ///< + + #if R8B_IPP + IppsFFTSpec_R_64f* SPtr; ///< Pointer to initialized data buffer + ///< to be passed to IPP's FFT functions. + ///< + CFixedBuffer< unsigned char > SpecBuffer; ///< Working buffer. + ///< + CFixedBuffer< unsigned char > WorkBuffer; ///< Working buffer. + ///< + #elif R8B_PFFFT + PFFFT_Setup* setup; ///< PFFFT setup object. + ///< + CFixedBuffer< float > work; ///< Working buffer. + ///< + #elif R8B_PFFFT_DOUBLE + PFFFTD_Setup* setup; ///< PFFFTD setup object. + ///< + CFixedBuffer< double > work; ///< Working buffer. + ///< + #else // R8B_PFFFT_DOUBLE + CFixedBuffer< int > wi; ///< Working buffer (ints). + ///< + CFixedBuffer< double > wd; ///< Working buffer (doubles). + ///< + #endif // R8B_IPP + + /** + * A simple class that keeps the pointer to the object and deletes it + * automatically. + */ + + class CObjKeeper + { + R8BNOCTOR( CObjKeeper ); + + public: + CObjKeeper() + : Object( NULL ) + { + } + + ~CObjKeeper() + { + delete Object; + } + + CObjKeeper& operator = ( CDSPRealFFT* const aObject ) + { + Object = aObject; + return( *this ); + } + + operator CDSPRealFFT* () const + { + return( Object ); + } + + private: + CDSPRealFFT* Object; ///< FFT object being kept. + ///< + }; + + CDSPRealFFT() + { + } + + /** + * Constructor initializes FFT object. + * + * @param aLenBits The length of FFT block (Nth power of 2), specifies the + * number of real values in a block. Values from 1 to 30 inclusive are + * supported. + */ + + CDSPRealFFT( const int aLenBits ) + : LenBits( aLenBits ) + , Len( 1 << aLenBits ) + #if R8B_IPP + , InvMulConst( 1.0 / Len ) + #elif R8B_PFFFT + , InvMulConst( 1.0 / Len ) + #elif R8B_PFFFT_DOUBLE + , InvMulConst( 1.0 / Len ) + #else // R8B_PFFFT_DOUBLE + , InvMulConst( 2.0 / Len ) + #endif // R8B_IPP + { + #if R8B_IPP + + int SpecSize; + int SpecBufferSize; + int BufferSize; + + ippsFFTGetSize_R_64f( LenBits, IPP_FFT_NODIV_BY_ANY, + ippAlgHintFast, &SpecSize, &SpecBufferSize, &BufferSize ); + + CFixedBuffer< unsigned char > InitBuffer( SpecBufferSize ); + SpecBuffer.alloc( SpecSize ); + WorkBuffer.alloc( BufferSize ); + + ippsFFTInit_R_64f( &SPtr, LenBits, IPP_FFT_NODIV_BY_ANY, + ippAlgHintFast, SpecBuffer, InitBuffer ); + + #elif R8B_PFFFT + + setup = pffft_new_setup( Len, PFFFT_REAL ); + work.alloc( Len ); + + #elif R8B_PFFFT_DOUBLE + + setup = pffftd_new_setup( Len, PFFFT_REAL ); + work.alloc( Len ); + + #else // R8B_PFFFT_DOUBLE + + wi.alloc( (int) ceil( 2.0 + sqrt( (double) ( Len >> 1 )))); + wi[ 0 ] = 0; + wd.alloc( Len >> 1 ); + + #endif // R8B_IPP + } + + ~CDSPRealFFT() + { + #if R8B_PFFFT + pffft_destroy_setup( setup ); + #elif R8B_PFFFT_DOUBLE + pffftd_destroy_setup( setup ); + #endif // R8B_PFFFT_DOUBLE + + delete Next; + } +}; + +/** + * @brief A "keeper" class for real-valued FFT transform objects. + * + * Class implements "keeper" functionality for handling CDSPRealFFT objects. + * The allocated FFT objects are placed on the global static list of objects + * for future reuse instead of deallocation. + */ + +class CDSPRealFFTKeeper : public R8B_BASECLASS +{ + R8BNOCTOR( CDSPRealFFTKeeper ); + +public: + CDSPRealFFTKeeper() + : Object( NULL ) + { + } + + /** + * Function acquires FFT object with the specified block length. + * + * @param LenBits The length of FFT block (Nth power of 2), in the range + * [1; 30] inclusive, specifies the number of real values in a FFT block. + */ + + CDSPRealFFTKeeper( const int LenBits ) + { + Object = acquire( LenBits ); + } + + ~CDSPRealFFTKeeper() + { + if( Object != NULL ) + { + release( Object ); + } + } + + /** + * @return Pointer to the acquired FFT object. + */ + + const CDSPRealFFT* operator -> () const + { + R8BASSERT( Object != NULL ); + + return( Object ); + } + + /** + * Function acquires FFT object with the specified block length. This + * function can be called any number of times. + * + * @param LenBits The length of FFT block (Nth power of 2), in the range + * [1; 30] inclusive, specifies the number of real values in a FFT block. + */ + + void init( const int LenBits ) + { + if( Object != NULL ) + { + if( Object -> LenBits == LenBits ) + { + return; + } + + release( Object ); + } + + Object = acquire( LenBits ); + } + + /** + * Function releases a previously acquired FFT object. + */ + + void reset() + { + if( Object != NULL ) + { + release( Object ); + Object = NULL; + } + } + +private: + CDSPRealFFT* Object; ///< FFT object. + ///< + + static CSyncObject StateSync; ///< FFTObjects synchronizer. + ///< + static CDSPRealFFT :: CObjKeeper FFTObjects[]; ///< Pool of FFT objects of + ///< various lengths. + ///< + + /** + * Function acquires FFT object from the global pool. + * + * @param LenBits FFT block length (expressed as Nth power of 2). + */ + + CDSPRealFFT* acquire( const int LenBits ) + { + R8BASSERT( LenBits > 0 && LenBits <= 30 ); + + R8BSYNC( StateSync ); + + if( FFTObjects[ LenBits ] == NULL ) + { + return( new CDSPRealFFT( LenBits )); + } + + CDSPRealFFT* ffto = FFTObjects[ LenBits ]; + FFTObjects[ LenBits ] = ffto -> Next; + + return( ffto ); + } + + /** + * Function releases a previously acquired FFT object. + * + * @param ffto FFT object to release. + */ + + void release( CDSPRealFFT* const ffto ) + { + R8BSYNC( StateSync ); + + ffto -> Next = FFTObjects[ ffto -> LenBits ]; + FFTObjects[ ffto -> LenBits ] = ffto; + } +}; + +/** + * Function calculates the minimum-phase transform of the filter kernel, using + * a discrete Hilbert transform in cepstrum domain. + * + * For more details, see part III.B of + * http://www.hpl.hp.com/personal/Niranjan_Damera-Venkata/files/ComplexMinPhase.pdf + * + * @param[in,out] Kernel Filter kernel buffer. + * @param KernelLen Filter kernel's length, in samples. + * @param LenMult Kernel length multiplier. Used as a coefficient of the + * "oversampling" in the frequency domain. Such oversampling is needed to + * improve the precision of the minimum-phase transform. If the filter's + * attenuation is high, this multiplier should be increased or otherwise the + * required attenuation will not be reached due to "smoothing" effect of this + * transform. + * @param DoFinalMul "True" if the final multiplication after transform should + * be performed or not. Such multiplication returns the gain of the signal to + * its original value. This parameter can be set to "false" if normalization + * of the resulting filter kernel is planned to be used. + * @param[out] DCGroupDelay If not NULL, this variable receives group delay + * at DC offset, in samples (can be a non-integer value). + */ + +inline void calcMinPhaseTransform( double* const Kernel, const int KernelLen, + const int LenMult = 2, const bool DoFinalMul = true, + double* const DCGroupDelay = NULL ) +{ + R8BASSERT( KernelLen > 0 ); + R8BASSERT( LenMult >= 2 ); + + const int LenBits = getBitOccupancy(( KernelLen * LenMult ) - 1 ); + const int Len = 1 << LenBits; + const int Len2 = Len >> 1; + int i; + + CFixedBuffer< double > ip( Len ); + CFixedBuffer< double > ip2( Len2 + 1 ); + + memcpy( &ip[ 0 ], Kernel, KernelLen * sizeof( ip[ 0 ])); + memset( &ip[ KernelLen ], 0, ( Len - KernelLen ) * sizeof( ip[ 0 ])); + + CDSPRealFFTKeeper ffto( LenBits ); + ffto -> forward( ip ); + + // Create the "log |c|" spectrum while saving the original power spectrum + // in the "ip2" buffer. + + #if R8B_FLOATFFT + float* const aip = (float*) &ip[ 0 ]; + float* const aip2 = (float*) &ip2[ 0 ]; + const float nzbias = 1e-35; + #else // R8B_FLOATFFT + double* const aip = &ip[ 0 ]; + double* const aip2 = &ip2[ 0 ]; + const double nzbias = 1e-300; + #endif // R8B_FLOATFFT + + aip2[ 0 ] = aip[ 0 ]; + aip[ 0 ] = log( fabs( aip[ 0 ]) + nzbias ); + aip2[ Len2 ] = aip[ 1 ]; + aip[ 1 ] = log( fabs( aip[ 1 ]) + nzbias ); + + for( i = 1; i < Len2; i++ ) + { + aip2[ i ] = sqrt( aip[ i * 2 ] * aip[ i * 2 ] + + aip[ i * 2 + 1 ] * aip[ i * 2 + 1 ]); + + aip[ i * 2 ] = log( aip2[ i ] + nzbias ); + aip[ i * 2 + 1 ] = 0.0; + } + + // Convert to cepstrum and apply discrete Hilbert transform. + + ffto -> inverse( ip ); + + const double m1 = ffto -> getInvMulConst(); + const double m2 = -m1; + + ip[ 0 ] = 0.0; + + for( i = 1; i < Len2; i++ ) + { + ip[ i ] *= m1; + } + + ip[ Len2 ] = 0.0; + + for( i = Len2 + 1; i < Len; i++ ) + { + ip[ i ] *= m2; + } + + // Convert Hilbert-transformed cepstrum back to the "log |c|" spectrum and + // perform its exponentiation, multiplied by the power spectrum previously + // saved in the "ip2" buffer. + + ffto -> forward( ip ); + + aip[ 0 ] = aip2[ 0 ]; + aip[ 1 ] = aip2[ Len2 ]; + + for( i = 1; i < Len2; i++ ) + { + aip[ i * 2 + 0 ] = cos( aip[ i * 2 + 1 ]) * aip2[ i ]; + aip[ i * 2 + 1 ] = sin( aip[ i * 2 + 1 ]) * aip2[ i ]; + } + + ffto -> inverse( ip ); + + if( DoFinalMul ) + { + for( i = 0; i < KernelLen; i++ ) + { + Kernel[ i ] = ip[ i ] * m1; + } + } + else + { + memcpy( &Kernel[ 0 ], &ip[ 0 ], KernelLen * sizeof( Kernel[ 0 ])); + } + + if( DCGroupDelay != NULL ) + { + double tmp; + + calcFIRFilterResponseAndGroupDelay( Kernel, KernelLen, 0.0, + tmp, tmp, *DCGroupDelay ); + } +} + +} // namespace r8b + +#endif // VOX_CDSPREALFFT_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h new file mode 100644 index 00000000..9eb4a43d --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPResampler.h @@ -0,0 +1,769 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPResampler.h + * + * @brief The master sample rate converter (resampler) class. + * + * This file includes the master sample rate converter (resampler) class that + * combines all elements of this library into a single front-end class. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPRESAMPLER_INCLUDED +#define R8B_CDSPRESAMPLER_INCLUDED + +#include "CDSPHBDownsampler.h" +#include "CDSPHBUpsampler.h" +#include "CDSPBlockConvolver.h" +#include "CDSPFracInterpolator.h" + +namespace r8b { + +/** + * @brief The master sample rate converter (resampler) class. + * + * This class can be considered the "master" sample rate converter (resampler) + * class since it combines all functionality of this library into a single + * front-end class to perform sample rate conversion to/from any sample rate, + * including non-integer sample rates. + * + * Note that objects of this class can be constructed on the stack as it has a + * small member data size. The default template parameters of this class are + * suited for 27-bit fixed point resampling. + * + * Use the CDSPResampler16 class for 16-bit resampling. + * + * Use the CDSPResampler16IR class for 16-bit impulse response resampling. + * + * Use the CDSPResampler24 class for 24-bit resampling (including 32-bit + * floating point resampling). + */ + +class CDSPResampler : public CDSPProcessor +{ +public: + /** + * Constructor initalizes the resampler object. + * + * Note that increasing the transition band and decreasing attenuation + * reduces the filter length, this in turn reduces the "input before + * output" delay. However, the filter length has only a minor influence on + * the overall resampling speed. + * + * It should be noted that the ReqAtten specifies the minimal difference + * between the loudest input signal component and the produced aliasing + * artifacts during resampling. For example, if ReqAtten=100 was specified + * when performing 2x upsampling, the analysis of the resulting signal may + * display high-frequency components which are quieter than the loudest + * part of the input signal by only 100 decibel meaning the high-frequency + * part did not become "magically" completely silent after resampling. You + * have to specify a higher ReqAtten value if you need a totally clean + * high-frequency content. On the other hand, it may not be reasonable to + * have a high-frequency content cleaner than the input signal itself: if + * the input signal is 16-bit, setting ReqAtten to 180 will make its + * high-frequency content 24-bit, but the original part of the signal will + * remain 16-bit. + * + * @param SrcSampleRate Source signal sample rate. Both sample rates can + * be specified as a ratio, e.g. SrcSampleRate = 1.0, DstSampleRate = 2.0. + * @param DstSampleRate Destination signal sample rate. The "power of 2" + * ratios between the source and destination sample rates force resampler + * to use several fast "power of 2" resampling steps, without using + * fractional interpolation at all. + * @param aMaxInLen The maximal planned length of the input buffer (in + * samples) that will be passed to the resampler. The resampler relies on + * this value as it allocates intermediate buffers. Input buffers longer + * than this value should never be supplied to the resampler. Note that + * upsampling produces more samples than was provided on input, so at + * higher upsampling ratios it is advisable to use smaller MaxInLen + * values to reduce memory footprint. When downsampling, a larger MaxInLen + * is suggested in order to increase downsampling performance. + * @param ReqTransBand Required transition band, in percent of the + * spectral space of the input signal (or the output signal if + * downsampling is performed) between filter's -3 dB point and the Nyquist + * frequency. The range is from CDSPFIRFilter::getLPMinTransBand() to + * CDSPFIRFilter::getLPMaxTransBand(), inclusive. When upsampling 88200 or + * 96000 audio to a higher sample rates the ReqTransBand can be + * considerably increased, up to 30. The selection of ReqTransBand depends + * on the level of desire to preserve the high-frequency content. While + * values 0.5 to 2 are extremely "greedy" settings, not necessary in most + * cases, values 2 to 3 can be used in most cases. Values 3 to 4 are + * relaxed settings, but they still offer a flat frequency response up to + * 21kHz with 44.1k source or destination sample rate. + * @param ReqAtten Required stop-band attenuation in decibel, in the + * range CDSPFIRFilter::getLPMinAtten() to CDSPFIRFilter::getLPMaxAtten(), + * inclusive. The actual attenuation may be 0.40-4.46 dB higher. The + * general formula for selecting the ReqAtten is 6.02 * Bits + 40, where + * "Bits" is the bit resolution (e.g. 16, 24), "40" is an added resolution + * for dynamic signals; this value can be decreased to 20 to 10 if the + * signal being resampled is non-dynamic (e.g., an impulse response or + * filter, with a non-steep frequency response). + * @param ReqPhase Required filter's phase response. Note that this + * setting does not affect interpolator's phase response which is always + * linear-phase. Also note that if the "power of 2" resampling was engaged + * by the resampler together with the minimum-phase response, the audio + * stream may become fractionally delayed, depending on the minimum-phase + * filter's actual fractional delay. Linear-phase filters do not have + * fractional delay. + * @see CDSPFIRFilterCache::getLPFilter() + */ + + CDSPResampler( const double SrcSampleRate, const double DstSampleRate, + const int aMaxInLen, const double ReqTransBand = 2.0, + const double ReqAtten = 206.91, + const EDSPFilterPhaseResponse ReqPhase = fprLinearPhase ) + : StepCapacity( 0 ) + , StepCount( 0 ) + , MaxInLen( aMaxInLen ) + , CurMaxOutLen( aMaxInLen ) + , LatencyFrac( 0.0 ) + { + R8BASSERT( SrcSampleRate > 0.0 ); + R8BASSERT( DstSampleRate > 0.0 ); + R8BASSERT( MaxInLen > 0 ); + + R8BCONSOLE( "* CDSPResampler: src=%.1f dst=%.1f len=%i tb=%.1f " + "att=%.2f ph=%i\n", SrcSampleRate, DstSampleRate, aMaxInLen, + ReqTransBand, ReqAtten, (int) ReqPhase ); + + if( SrcSampleRate == DstSampleRate ) + { + return; + } + + TmpBufCapacities[ 0 ] = 0; + TmpBufCapacities[ 1 ] = 0; + CurTmpBuf = 0; + + // Try some common efficient ratios requiring only a single step. + + const int CommonRatioCount = 5; + const int CommonRatios[ CommonRatioCount ][ 2 ] = { + { 1, 2 }, + { 1, 3 }, + { 2, 3 }, + { 3, 2 }, + { 3, 4 } + }; + + int i; + + for( i = 0; i < CommonRatioCount; i++ ) + { + const int num = CommonRatios[ i ][ 0 ]; + const int den = CommonRatios[ i ][ 1 ]; + + if( SrcSampleRate * num == DstSampleRate * den ) + { + addProcessor( new CDSPBlockConvolver( + CDSPFIRFilterCache :: getLPFilter( + 1.0 / ( num > den ? num : den ), ReqTransBand, + ReqAtten, ReqPhase, num ), num, den, LatencyFrac )); + + createTmpBuffers(); + return; + } + } + + // Try whole-number power-of-2 or 3*power-of-2 upsampling. + + for( i = 2; i <= 3; i++ ) + { + bool WasFound = false; + int c = 0; + + while( true ) + { + const double NewSR = SrcSampleRate * ( i << c ); + + if( NewSR == DstSampleRate ) + { + WasFound = true; + break; + } + + if( NewSR > DstSampleRate ) + { + break; + } + + c++; + } + + if( WasFound ) + { + addProcessor( new CDSPBlockConvolver( + CDSPFIRFilterCache :: getLPFilter( 1.0 / i, ReqTransBand, + ReqAtten, ReqPhase, i ), i, 1, LatencyFrac )); + + const bool IsThird = ( i == 3 ); + + for( i = 0; i < c; i++ ) + { + addProcessor( new CDSPHBUpsampler( ReqAtten, i, IsThird, + LatencyFrac )); + } + + createTmpBuffers(); + return; + } + } + + if( DstSampleRate * 2.0 > SrcSampleRate ) + { + // Upsampling or fractional downsampling down to 2X. + + const double NormFreq = ( DstSampleRate > SrcSampleRate ? 0.5 : + 0.5 * DstSampleRate / SrcSampleRate ); + + addProcessor( new CDSPBlockConvolver( + CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand, + ReqAtten, ReqPhase, 2.0 ), 2, 1, LatencyFrac )); + + // Try intermediate interpolated resampling with subsequent 2X + // or 3X upsampling. + + const double tbw = 0.0175; // Intermediate filter's transition + // band extension coefficient. + const double ThreshSampleRate = SrcSampleRate / + ( 1.0 - tbw * ReqTransBand ); // Make sure intermediate + // filter's transition band is not steeper than ReqTransBand + // (this keeps the latency under control). + + int c = 0; + int div = 1; + + while( true ) + { + const int ndiv = div * 2; + + if( DstSampleRate < ThreshSampleRate * ndiv ) + { + break; + } + + div = ndiv; + c++; + } + + int c2 = 0; + int div2 = 1; + + while( true ) + { + const int ndiv = div * ( c2 == 0 ? 3 : 2 ); + + if( DstSampleRate < ThreshSampleRate * ndiv ) + { + break; + } + + div2 = ndiv; + c2++; + } + + const double SrcSampleRate2 = SrcSampleRate * 2.0; + int tmp1; + int tmp2; + + if( c == 1 && getWholeStepping( SrcSampleRate2, DstSampleRate, + tmp1, tmp2 )) + { + // Do not use intermediate interpolation if whole stepping is + // available as it performs very fast. + + c = 0; + } + + if( c > 0 ) + { + // Add steps using intermediate interpolation. + + int num; + + if( c2 > 0 && div2 > div ) + { + div = div2; + c = c2; + num = 3; + } + else + { + num = 2; + } + + addProcessor( new CDSPFracInterpolator( SrcSampleRate2 * div, + DstSampleRate, ReqAtten, false, LatencyFrac )); + + double tb = ( 1.0 - SrcSampleRate * div / DstSampleRate ) / + tbw; // Divide TransBand by a constant that assures a + // linear response in the pass-band. + + if( tb > CDSPFIRFilter :: getLPMaxTransBand() ) + { + tb = CDSPFIRFilter :: getLPMaxTransBand(); + } + + addProcessor( new CDSPBlockConvolver( + CDSPFIRFilterCache :: getLPFilter( 1.0 / num, tb, + ReqAtten, ReqPhase, num ), num, 1, LatencyFrac )); + + const bool IsThird = ( num == 3 ); + + for( i = 1; i < c; i++ ) + { + addProcessor( new CDSPHBUpsampler( ReqAtten, i - 1, + IsThird, LatencyFrac )); + } + } + else + { + addProcessor( new CDSPFracInterpolator( SrcSampleRate2, + DstSampleRate, ReqAtten, false, LatencyFrac )); + } + + createTmpBuffers(); + return; + } + + // Use downsampling steps, including power-of-2 downsampling. + + double CheckSR = DstSampleRate * 4.0; + int c = 0; + double FinGain = 1.0; + + while( CheckSR <= SrcSampleRate ) + { + c++; + CheckSR *= 2.0; + FinGain *= 0.5; + } + + const int SrcSRDiv = ( 1 << c ); + int downf; + double NormFreq = 0.5; + bool UseInterp = true; + bool IsThird = false; + + for( downf = 2; downf <= 3; downf++ ) + { + if( DstSampleRate * SrcSRDiv * downf == SrcSampleRate ) + { + NormFreq = 1.0 / downf; + UseInterp = false; + IsThird = ( downf == 3 ); + break; + } + } + + if( UseInterp ) + { + downf = 1; + NormFreq = DstSampleRate * SrcSRDiv / SrcSampleRate; + IsThird = ( NormFreq * 3.0 <= 1.0 ); + } + + for( i = 0; i < c; i++ ) + { + // Use a fixed very relaxed 2X downsampling filters, that at + // the final stage only guarantees stop-band between 0.75 and + // pi. 0.5-0.75 range will be aliased to 0.25-0.5 range which + // will then be filtered out by the final filter. + + addProcessor( new CDSPHBDownsampler( ReqAtten, c - 1 - i, IsThird, + LatencyFrac )); + } + + addProcessor( new CDSPBlockConvolver( + CDSPFIRFilterCache :: getLPFilter( NormFreq, ReqTransBand, + ReqAtten, ReqPhase, FinGain ), 1, downf, LatencyFrac )); + + if( UseInterp ) + { + addProcessor( new CDSPFracInterpolator( SrcSampleRate, + DstSampleRate * SrcSRDiv, ReqAtten, IsThird, LatencyFrac )); + } + + createTmpBuffers(); + } + + virtual ~CDSPResampler() + { + int i; + + for( i = 0; i < StepCount; i++ ) + { + delete Steps[ i ]; + } + } + + virtual int getLatency() const + { + return( 0 ); + } + + virtual double getLatencyFrac() const + { + return( LatencyFrac ); + } + + /** + * This function ignores the supplied parameter and returns the maximal + * output buffer length that depends on the MaxInLen supplied to the + * constructor. + */ + + virtual int getMaxOutLen( const int/* MaxInLen */ ) const + { + return( CurMaxOutLen ); + } + + /** + * Function clears (resets) the state of *this object and returns it to + * the state after construction. All input data accumulated in the + * internal buffer so far will be discarded. + * + * This function makes it possible to use *this object for converting + * separate streams from the same source sample rate to the same + * destination sample rate without reconstructing the object. It is more + * efficient to clear the state of the resampler object than to destroy it + * and create a new object. + */ + + virtual void clear() + { + int i; + + for( i = 0; i < StepCount; i++ ) + { + Steps[ i ] -> clear(); + } + } + + /** + * Function performs sample rate conversion. + * + * If the source and destination sample rates are equal, the resampler + * will do nothing and will simply return the input buffer unchanged. + * + * You do not need to allocate an intermediate output buffer for use with + * this function. If required, the resampler will allocate a suitable + * intermediate output buffer itself. + * + * @param ip0 Input buffer. This buffer is never used as output buffer by + * this function. This pointer may be returned in "op0" if no resampling + * is happening (source sample rate equals destination sample rate). + * @param l The number of samples available in the input buffer. Should + * not exceed the MaxInLen supplied in the constructor. + * @param[out] op0 This variable receives the pointer to the resampled + * data. On function's return, this pointer points to *this object's + * internal buffer. In real-time applications it is suggested to pass this + * pointer to the next output audio block and consume any data left from + * the previous output audio block first before calling the process() + * function again. The buffer pointed to by the "op0" on return is owned + * by the resampler, so it should not be freed by the caller. + * @return The number of samples available in the "op0" output buffer. If + * the data from the output buffer "op0" is going to be written to a + * bigger output buffer, it is suggested to check the returned number of + * samples so that no overflow of the bigger output buffer happens. + */ + + virtual int process( double* ip0, int l, double*& op0 ) + { + R8BASSERT( l >= 0 ); + + double* ip = ip0; + int i; + + for( i = 0; i < StepCount; i++ ) + { + double* op = TmpBufs[ i & 1 ]; + l = Steps[ i ] -> process( ip, l, op ); + ip = op; + } + + op0 = ip; + return( l ); + } + + /** + * Function performs resampling of an input sample buffer of the specified + * length in the "one-shot" mode. This function can be useful when impulse + * response resampling is required. + * + * @param ip Input buffer pointer. + * @param iplen Length of the input buffer in samples. + * @param[out] op Output buffer pointer. + * @param oplen Length of the output buffer in samples. + * @tparam Tin Input buffer's element type. + * @tparam Tout Output buffer's element type. + */ + + template< typename Tin, typename Tout > + void oneshot( const Tin* ip, int iplen, Tout* op, int oplen ) + { + CFixedBuffer< double > Buf( MaxInLen ); + bool IsZero = false; + + while( oplen > 0 ) + { + int rc; + double* p; + int i; + + if( iplen == 0 ) + { + rc = MaxInLen; + p = &Buf[ 0 ]; + + if( !IsZero ) + { + IsZero = true; + memset( p, 0, MaxInLen * sizeof( p[ 0 ])); + } + } + else + { + rc = min( iplen, MaxInLen ); + + if( sizeof( Tin ) == sizeof( double )) + { + p = (double*) ip; + } + else + { + p = &Buf[ 0 ]; + + for( i = 0; i < rc; i++ ) + { + p[ i ] = ip[ i ]; + } + } + + ip += rc; + iplen -= rc; + } + + double* op0; + int wc = process( p, rc, op0 ); + wc = min( oplen, wc ); + + for( i = 0; i < wc; i++ ) + { + op[ i ] = (Tout) op0[ i ]; + } + + op += wc; + oplen -= wc; + } + + clear(); + } + + /** + * Function obtains overall input sample count required to produce first + * output sample. Function works by iteratively passing 1 sample at a time + * until output begins. This is a relatively CPU-consuming operation. This + * function should be called after the clear() function call or after + * object's construction. The function itself calls the clear() function + * before return. + * + * Note that it is advisable to cache the value returned by this function, + * for each SrcSampleRate/DstSampleRate pair, if it is called frequently. + */ + + int getInLenBeforeOutStart() + { + int inc = 0; + + while( true ) + { + double ins = 0.0; + double* op; + + if( process( &ins, 1, op ) > 0 ) + { + clear(); + return( inc ); + } + + inc++; + } + } + +private: + CFixedBuffer< CDSPProcessor* > Steps; ///< Array of processing steps. + ///< + int StepCapacity; ///< The capacity of the Steps array. + ///< + int StepCount; ///< The number of created processing steps. + ///< + int MaxInLen; ///< Maximal input length. + ///< + CFixedBuffer< double > TmpBufAll; ///< Buffer containing both temporary + ///< buffers. + ///< + double* TmpBufs[ 2 ]; ///< Temporary output buffers. + ///< + int TmpBufCapacities[ 2 ]; ///< Capacities of temporary buffers, updated + ///< during processing steps building. + ///< + int CurTmpBuf; ///< Current temporary buffer. + ///< + int CurMaxOutLen; ///< Current maximal output length. + ///< + double LatencyFrac; ///< Current fractional latency. After object's + ///< construction, equals to the remaining fractional latency in the + ///< output. + ///< + + /** + * Function adds processor, updates MaxOutLen variable and adjusts length + * of temporary internal buffers. + * + * @param Proc Processor to add. This pointer is inherited and will be + * destroyed on *this object's destruction. + */ + + void addProcessor( CDSPProcessor* const Proc ) + { + if( StepCount == StepCapacity ) + { + // Reallocate and increase Steps array's capacity. + + const int NewCapacity = StepCapacity + 8; + Steps.realloc( StepCapacity, NewCapacity ); + StepCapacity = NewCapacity; + } + + LatencyFrac = Proc -> getLatencyFrac(); + CurMaxOutLen = Proc -> getMaxOutLen( CurMaxOutLen ); + + if( CurMaxOutLen > TmpBufCapacities[ CurTmpBuf ]) + { + TmpBufCapacities[ CurTmpBuf ] = CurMaxOutLen; + } + + CurTmpBuf ^= 1; + + Steps[ StepCount ] = Proc; + StepCount++; + } + + /** + * Function creates temporary buffers. + */ + + void createTmpBuffers() + { + const int ol = TmpBufCapacities[ 0 ] + TmpBufCapacities[ 1 ]; + + if( ol > 0 ) + { + TmpBufAll.alloc( ol ); + TmpBufs[ 0 ] = &TmpBufAll[ 0 ]; + TmpBufs[ 1 ] = &TmpBufAll[ TmpBufCapacities[ 0 ]]; + } + + R8BCONSOLE( "* CDSPResampler: init done\n" ); + } +}; + +/** + * @brief The resampler class for 16-bit resampling. + * + * This class defines resampling parameters suitable for 16-bit resampling, + * using linear-phase low-pass filter. See the r8b::CDSPResampler class for + * details. + */ + +class CDSPResampler16 : public CDSPResampler +{ +public: + /** + * Constructor initializes the 16-bit resampler. See the + * r8b::CDSPResampler class for details. + * + * @param SrcSampleRate Source signal sample rate. + * @param DstSampleRate Destination signal sample rate. + * @param aMaxInLen The maximal planned length of the input buffer (in + * samples) that will be passed to the resampler. + * @param ReqTransBand Required transition band, in percent. + */ + + CDSPResampler16( const double SrcSampleRate, const double DstSampleRate, + const int aMaxInLen, const double ReqTransBand = 2.0 ) + : CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand, + 136.45, fprLinearPhase ) + { + } +}; + +/** + * @brief The resampler class for 16-bit impulse response resampling. + * + * This class defines resampling parameters suitable for 16-bit impulse + * response resampling, using linear-phase low-pass filter. Impulse responses + * are non-dynamic signals, and thus need resampler with a lesser SNR. See the + * r8b::CDSPResampler class for details. + */ + +class CDSPResampler16IR : public CDSPResampler +{ +public: + /** + * Constructor initializes the 16-bit impulse response resampler. See the + * r8b::CDSPResampler class for details. + * + * @param SrcSampleRate Source signal sample rate. + * @param DstSampleRate Destination signal sample rate. + * @param aMaxInLen The maximal planned length of the input buffer (in + * samples) that will be passed to the resampler. + * @param ReqTransBand Required transition band, in percent. + */ + + CDSPResampler16IR( const double SrcSampleRate, const double DstSampleRate, + const int aMaxInLen, const double ReqTransBand = 2.0 ) + : CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand, + 109.56, fprLinearPhase ) + { + } +}; + +/** + * @brief The resampler class for 24-bit resampling. + * + * This class defines resampling parameters suitable for 24-bit resampling + * (including 32-bit floating point resampling), using linear-phase low-pass + * filter. See the r8b::CDSPResampler class for details. + */ + +class CDSPResampler24 : public CDSPResampler +{ +public: + /** + * Constructor initializes the 24-bit resampler (including 32-bit floating + * point). See the r8b::CDSPResampler class for details. + * + * @param SrcSampleRate Source signal sample rate. + * @param DstSampleRate Destination signal sample rate. + * @param aMaxInLen The maximal planned length of the input buffer (in + * samples) that will be passed to the resampler. + * @param ReqTransBand Required transition band, in percent. + */ + + CDSPResampler24( const double SrcSampleRate, const double DstSampleRate, + const int aMaxInLen, const double ReqTransBand = 2.0 ) + : CDSPResampler( SrcSampleRate, DstSampleRate, aMaxInLen, ReqTransBand, + 180.15, fprLinearPhase ) + { + } +}; + +} // namespace r8b + +#endif // R8B_CDSPRESAMPLER_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h new file mode 100644 index 00000000..32aa2e5a --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/CDSPSincFilterGen.h @@ -0,0 +1,687 @@ +//$ nobt +//$ nocpp + +/** + * @file CDSPSincFilterGen.h + * + * @brief Sinc function-based FIR filter generator class. + * + * This file includes the CDSPSincFilterGen class implementation that + * generates FIR filters. + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8B_CDSPSINCFILTERGEN_INCLUDED +#define R8B_CDSPSINCFILTERGEN_INCLUDED + +#include "r8bbase.h" + +namespace r8b { + +/** + * @brief Sinc function-based FIR filter generator class. + * + * Structure that holds state used to perform generation of sinc functions of + * various types, windowed by the Blackman window by default (but the window + * function can be changed if necessary). + */ + +class CDSPSincFilterGen +{ +public: + double Len2; ///< Required half filter kernel's length in samples (can be + ///< a fractional value). Final physical kernel length will be + ///< provided in the KernelLen variable. Len2 should be >= 2. + ///< + int KernelLen; ///< Resulting length of the filter kernel, this variable + ///< is set after the call to one of the "init" functions. + ///< + int fl2; ///< Internal "half kernel length" value. This value can be used + ///< as filter's latency in samples (taps), this variable is set after + ///< the call to one of the "init" functions. + ///< + + union + { + struct + { + double Freq1; ///< Required corner circular frequency 1 [0; pi]. + ///< Used only in the generateBand() function. + ///< + double Freq2; ///< Required corner circular frequency 2 [0; pi]. + ///< Used only in the generateBand() function. The range + ///< [Freq1; Freq2] defines a pass band for the generateBand() + ///< function. + ///< + }; + + struct + { + double FracDelay; ///< Fractional delay in the range [0; 1], used + ///< only in the generateFrac() function. Note that the + ///< FracDelay parameter is actually inversed. At 0.0 value it + ///< produces 1 sample delay (with the latency equal to fl2), + ///< at 1.0 value it produces 0 sample delay (with the latency + ///< equal to fl2 - 1). + ///< + }; + }; + + /** + * Window function type. + */ + + enum EWindowFunctionType + { + wftCosine, ///< Generalized cosine window function. No parameters + ///< required. The "Power" parameter is optional. + ///< + wftKaiser, ///< Kaiser window function. Requires the "Beta" parameter. + ///< The "Power" parameter is optional. + ///< + wftGaussian ///< Gaussian window function. Requires the "Sigma" + ///< parameter. The "Power" parameter is optional. + ///< + }; + + typedef double( CDSPSincFilterGen :: *CWindowFunc )(); ///< Window + ///< calculation function pointer type. + ///< + + /** + * Function initializes *this structure for generation of a window + * function, odd-sized. + * + * @param WinType Window function type. + * @param Params Window function's parameters. If NULL, the table values + * may be used. + * @param UsePower "True" if the power factor should be used to raise the + * window function. If "true", the power factor should be specified as the + * last value in the Params array. If Params is NULL, the table or default + * value of -1.0 (off) will be used. + */ + + void initWindow( const EWindowFunctionType WinType = wftCosine, + const double* const Params = NULL, const bool UsePower = false ) + { + R8BASSERT( Len2 >= 2.0 ); + + fl2 = (int) floor( Len2 ); + KernelLen = fl2 + fl2 + 1; + + setWindow( WinType, Params, UsePower, true ); + } + + /** + * Function initializes *this structure for generation of band-limited + * sinc filter kernel. The generateBand() function should be used to + * calculate the filter. + * + * @param WinType Window function type. + * @param Params Window function's parameters. If NULL, the table values + * may be used. + * @param UsePower "True" if the power factor should be used to raise the + * window function. If "true", the power factor should be specified as the + * last value in the Params array. If Params is NULL, the table or default + * value of -1.0 (off) will be used. + */ + + void initBand( const EWindowFunctionType WinType = wftCosine, + const double* const Params = NULL, const bool UsePower = false ) + { + R8BASSERT( Len2 >= 2.0 ); + + fl2 = (int) floor( Len2 ); + KernelLen = fl2 + fl2 + 1; + f1.init( Freq1, 0.0 ); + f2.init( Freq2, 0.0 ); + + setWindow( WinType, Params, UsePower, true ); + } + + /** + * Function initializes *this structure for Hilbert transformation filter + * calculation. Freq1 and Freq2 variables are not used. + * The generateHilbert() function should be used to calculate the filter. + * + * @param WinType Window function type. + * @param Params Window function's parameters. If NULL, the table values + * may be used. + * @param UsePower "True" if the power factor should be used to raise the + * window function. If "true", the power factor should be specified as the + * last value in the Params array. If Params is NULL, the table or default + * value of -1.0 (off) will be used. + */ + + void initHilbert( const EWindowFunctionType WinType = wftCosine, + const double* const Params = NULL, const bool UsePower = false ) + { + R8BASSERT( Len2 >= 2.0 ); + + fl2 = (int) floor( Len2 ); + KernelLen = fl2 + fl2 + 1; + + setWindow( WinType, Params, UsePower, true ); + } + + /** + * Function initializes *this structure for generation of full-bandwidth + * fractional delay sinc filter kernel. Freq1 and Freq2 variables are not + * used. The generateFrac() function should be used to calculate the + * filter. + * + * @param WinType Window function type. + * @param Params Window function's parameters. If NULL, the table values + * may be used. + * @param UsePower "True" if the power factor should be used to raise the + * window function. If "true", the power factor should be specified as the + * last value in the Params array. If Params is NULL, the table or default + * value of -1.0 (off) will be used. + */ + + void initFrac( const EWindowFunctionType WinType = wftCosine, + const double* const Params = NULL, const bool UsePower = false ) + { + R8BASSERT( Len2 >= 2.0 ); + + fl2 = (int) ceil( Len2 ); + KernelLen = fl2 + fl2; + + setWindow( WinType, Params, UsePower, false, FracDelay ); + } + + /** + * @return The next "Hann" window function coefficient. + */ + + double calcWindowHann() + { + return( 0.5 + 0.5 * w1.generate() ); + } + + /** + * @return The next "Hamming" window function coefficient. + */ + + double calcWindowHamming() + { + return( 0.54 + 0.46 * w1.generate() ); + } + + /** + * @return The next "Blackman" window function coefficient. + */ + + double calcWindowBlackman() + { + return( 0.42 + 0.5 * w1.generate() + 0.08 * w2.generate() ); + } + + /** + * @return The next "Nuttall" window function coefficient. + */ + + double calcWindowNuttall() + { + return( 0.355768 + 0.487396 * w1.generate() + + 0.144232 * w2.generate() + 0.012604 * w3.generate() ); + } + + /** + * @return The next "Blackman-Nuttall" window function coefficient. + */ + + double calcWindowBlackmanNuttall() + { + return( 0.3635819 + 0.4891775 * w1.generate() + + 0.1365995 * w2.generate() + 0.0106411 * w3.generate() ); + } + + /** + * @return The next "Kaiser" window function coefficient. + */ + + double calcWindowKaiser() + { + const double n = 1.0 - sqr( wn / Len2 + KaiserLen2Frac ); + wn++; + + if( n < 0.0 ) + { + return( 0.0 ); + } + + return( besselI0( KaiserBeta * sqrt( n )) / KaiserDiv ); + } + + /** + * @return The next "Gaussian" window function coefficient. + */ + + double calcWindowGaussian() + { + const double f = exp( -0.5 * sqr( wn / GaussianSigma + + GaussianSigmaFrac )); + + wn++; + + return( f ); + } + + /** + * Function calculates window function only. + * + * @param[out] op Output buffer, length = KernelLen. + * @param wfunc Window calculation function to use. + */ + + void generateWindow( double* op, + CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman ) + { + op += fl2; + double* op2 = op; + + int l = fl2; + + if( Power < 0.0 ) + { + *op = ( *this.*wfunc )(); + + while( l > 0 ) + { + const double v = ( *this.*wfunc )(); + + op++; + op2--; + *op = v; + *op2 = v; + l--; + } + } + else + { + *op = pows(( *this.*wfunc )(), Power ); + + while( l > 0 ) + { + const double v = pows(( *this.*wfunc )(), Power ); + + op++; + op2--; + *op = v; + *op2 = v; + l--; + } + } + } + + /** + * Function calculates band-limited windowed sinc function-based filter + * kernel. + * + * @param[out] op Output buffer, length = KernelLen. + * @param wfunc Window calculation function to use. + */ + + void generateBand( double* op, + CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman ) + { + op += fl2; + double* op2 = op; + f1.generate(); + f2.generate(); + int t = 1; + + if( Power < 0.0 ) + { + *op = ( Freq2 - Freq1 ) * ( *this.*wfunc )() / R8B_PI; + + while( t <= fl2 ) + { + const double v = ( f2.generate() - f1.generate() ) * + ( *this.*wfunc )() / ( t * R8B_PI ); + + op++; + op2--; + *op = v; + *op2 = v; + t++; + } + } + else + { + *op = ( Freq2 - Freq1 ) * pows(( *this.*wfunc )(), Power ) / + R8B_PI; + + while( t <= fl2 ) + { + const double v = ( f2.generate() - f1.generate() ) * + pows(( *this.*wfunc )(), Power ) / ( t * R8B_PI ); + + op++; + op2--; + *op = v; + *op2 = v; + t++; + } + } + } + + /** + * Function calculates windowed Hilbert transformer filter kernel. + * + * @param[out] op Output buffer, length = KernelLen. + * @param wfunc Window calculation function to use. + */ + + void generateHilbert( double* op, + CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman ) + { + static const double fvalues[ 2 ] = { 0.0, 2.0 / R8B_PI }; + op += fl2; + double* op2 = op; + + ( *this.*wfunc )(); + *op = 0.0; + + int t = 1; + + if( Power < 0.0 ) + { + while( t <= fl2 ) + { + const double v = fvalues[ t & 1 ] * ( *this.*wfunc )() / t; + op++; + op2--; + *op = v; + *op2 = -v; + t++; + } + } + else + { + while( t <= fl2 ) + { + const double v = fvalues[ t & 1 ] * + pows( ( *this.*wfunc )(), Power ) / t; + + op++; + op2--; + *op = v; + *op2 = -v; + t++; + } + } + } + + /** + * Function calculates windowed fractional delay filter kernel. + * + * @param[out] op Output buffer, length = KernelLen. + * @param wfunc Window calculation function to use. + * @param opinc Output buffer increment, in "op" elements. + */ + + void generateFrac( double* op, + CWindowFunc wfunc = &CDSPSincFilterGen :: calcWindowBlackman, + const int opinc = 1 ) + { + R8BASSERT( opinc != 0 ); + + double f[ 2 ]; + f[ 0 ] = sin( FracDelay * R8B_PI ) / R8B_PI; + f[ 1 ] = -f[ 0 ]; + + int t = -fl2; + + if( t + FracDelay < -Len2 ) + { + ( *this.*wfunc )(); + *op = 0.0; + op += opinc; + t++; + } + + int IsZeroX = ( fabs( FracDelay - 1.0 ) < 0x1p-42 ); + int mt = 0 - IsZeroX; + IsZeroX = ( IsZeroX || fabs( FracDelay ) < 0x1p-42 ); + + if( Power < 0.0 ) + { + while( t < mt ) + { + *op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay ); + op += opinc; + t++; + } + + if( IsZeroX ) // t+FracDelay==0 + { + *op = ( *this.*wfunc )(); + } + else + { + *op = f[ t & 1 ] * ( *this.*wfunc )() / FracDelay; // t==0 + } + + mt = fl2 - 2; + + while( t < mt ) + { + op += opinc; + t++; + *op = f[ t & 1 ] * ( *this.*wfunc )() / ( t + FracDelay ); + } + + op += opinc; + t++; + const double ut = t + FracDelay; + *op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * ( *this.*wfunc )() / ut ); + } + else + { + while( t < mt ) + { + *op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) / + ( t + FracDelay ); + + op += opinc; + t++; + } + + if( IsZeroX ) // t+FracDelay==0 + { + *op = pows( ( *this.*wfunc )(), Power ); + } + else + { + *op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) / + FracDelay; // t==0 + } + + mt = fl2 - 2; + + while( t < mt ) + { + op += opinc; + t++; + *op = f[ t & 1 ] * pows( ( *this.*wfunc )(), Power ) / + ( t + FracDelay ); + } + + op += opinc; + t++; + const double ut = t + FracDelay; + *op = ( ut > Len2 ? 0.0 : f[ t & 1 ] * + pows( ( *this.*wfunc )(), Power ) / ut ); + } + } + +private: + double Power; ///< The power factor used to raise the window function. + ///< Equals a negative value if the power factor should not be used. + ///< + CSineGen f1; ///< Sine function 1. Used in the generateBand() function. + ///< + CSineGen f2; ///< Sine function 2. Used in the generateBand() function. + ///< + int wn; ///< Window function integer position. 0 - center of the window + ///< function. This variable may not be used by some window functions. + ///< + CSineGen w1; ///< Cosine wave 1 for window function. + ///< + CSineGen w2; ///< Cosine wave 2 for window function. + ///< + CSineGen w3; ///< Cosine wave 3 for window function. + ///< + + union + { + struct + { + double KaiserBeta; ///< Kaiser window function's "Beta" + ///< coefficient. + ///< + double KaiserDiv; ///< Kaiser window function's divisor. + ///< + double KaiserLen2Frac; ///< Equals FracDelay / Len2. + ///< + }; + + struct + { + double GaussianSigma; ///< Gaussian window function's "Sigma" + ///< coefficient. + ///< + double GaussianSigmaFrac; ///< Equals FracDelay / GaussianSigma. + ///< + }; + }; + + /** + * Function initializes Kaiser window function calculation. The FracDelay + * variable should be initialized when using this window function. + * + * @param Params Function parameters. If NULL, the default values will be + * used. If not NULL, the first parameter should specify the "Beta" value. + * @param UsePower "True" if the power factor should be used to raise the + * window function. + * @param IsCentered "True" if centered window should be used. This + * parameter usually equals to "false" for fractional delay filters only. + */ + + void setWindowKaiser( const double* Params, const bool UsePower, + const bool IsCentered ) + { + wn = ( IsCentered ? 0 : -fl2 ); + + if( Params == NULL ) + { + KaiserBeta = 9.5945013206755156; + Power = ( UsePower ? 1.9718457932433306 : -1.0 ); + } + else + { + KaiserBeta = clampr( Params[ 0 ], 1.0, 350.0 ); + Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 ); + } + + KaiserDiv = besselI0( KaiserBeta ); + KaiserLen2Frac = FracDelay / Len2; + } + + /** + * Function initializes Gaussian window function calculation. The FracDelay + * variable should be initialized when using this window function. + * + * @param Params Function parameters. If NULL, the table values will be + * used. If not NULL, the first parameter should specify the "Sigma" + * value. + * @param UsePower "True" if the power factor should be used to raise the + * window function. + * @param IsCentered "True" if centered window should be used. This + * parameter usually equals to "false" for fractional delay filters only. + */ + + void setWindowGaussian( const double* Params, const bool UsePower, + const bool IsCentered ) + { + wn = ( IsCentered ? 0 : -fl2 ); + + if( Params == NULL ) + { + GaussianSigma = 1.0; + Power = -1.0; + } + else + { + GaussianSigma = clampr( fabs( Params[ 0 ]), 1e-1, 100.0 ); + Power = ( UsePower ? fabs( Params[ 1 ]) : -1.0 ); + } + + GaussianSigma *= Len2; + GaussianSigmaFrac = FracDelay / GaussianSigma; + } + + /** + * Function initializes calculation of window function of the specified + * type. + * + * @param WinType Window function type. + * @param Params Window function's parameters. If NULL, the table values + * may be used. + * @param UsePower "True" if the power factor should be used to raise the + * window function. If "true", the power factor should be specified as the + * last value in the Params array. If Params is NULL, the table or default + * value of -1.0 (off) will be used. + * @param IsCentered "True" if centered window should be used. This + * parameter usually equals to "false" for fractional delay filters only. + * @param UseFracDelay Fractional delay to use. + */ + + void setWindow( const EWindowFunctionType WinType, + const double* const Params, const bool UsePower, + const bool IsCentered, const double UseFracDelay = 0.0 ) + { + FracDelay = UseFracDelay; + + if( WinType == wftCosine ) + { + if( IsCentered ) + { + w1.init( R8B_PI / Len2, R8B_PId2 ); + w2.init( R8B_2PI / Len2, R8B_PId2 ); + w3.init( R8B_3PI / Len2, R8B_PId2 ); + } + else + { + const double step1 = R8B_PI / Len2; + w1.init( step1, R8B_PId2 - step1 * fl2 + step1 * FracDelay ); + + const double step2 = R8B_2PI / Len2; + w2.init( step2, R8B_PId2 - step2 * fl2 + step2 * FracDelay ); + + const double step3 = R8B_3PI / Len2; + w3.init( step3, R8B_PId2 - step3 * fl2 + step3 * FracDelay ); + } + + Power = ( UsePower && Params != NULL ? Params[ 0 ] : -1.0 ); + } + else + if( WinType == wftKaiser ) + { + setWindowKaiser( Params, UsePower, IsCentered ); + } + else + if( WinType == wftGaussian ) + { + setWindowGaussian( Params, UsePower, IsCentered ); + } + } +}; + +} // namespace r8b + +#endif // R8B_CDSPSINCFILTERGEN_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE new file mode 100644 index 00000000..669a7839 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2013-2022 Aleksey Vaneev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt new file mode 100644 index 00000000..2fcb3b9d --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/OpenMPT.txt @@ -0,0 +1,6 @@ +r8brain-free resampling library from https://github.com/avaneev/r8brain-free-src
+version 5.6.
+The example program, DLL and "other" folder, PFFFT sources and logo file have been removed.
+No further local changes have been made.
+For building, premake is used to generate Visual Studio project files.
+See ../build/premake/ for details.
diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md new file mode 100644 index 00000000..af0412c1 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/README.md @@ -0,0 +1,408 @@ +# r8brain-free-src - High-Quality Resampler # + +## Introduction ## + +Open source (under the MIT license) high-quality professional audio sample +rate converter (SRC) / resampler C++ library. Features routines for SRC, +both up- and downsampling, to/from any sample rate, including non-integer +sample rates: it can be also used for conversion to/from SACD/DSD sample +rates, and even go beyond that. SRC routines were implemented in a +multi-platform C++ code, and have a high level of optimality. Also suitable +for fast general-purpose 1D time-series resampling / interpolation (with +relaxed filter parameters). + +The structure of this library's objects is such that they can be frequently +created and destroyed in large applications with a minimal performance impact +due to a high level of reusability of its most "initialization-expensive" +objects: the fast Fourier transform and FIR filter objects. + +The SRC algorithm at first produces 2X oversampled (relative to the source +sample rate, or the destination sample rate if the downsampling is performed) +signal, then performs interpolation using a bank of short (8 to 30 taps, +depending on the required precision) polynomial-interpolated sinc +function-based fractional delay filters. This puts the algorithm into the +league of the fastest among the most precise SRC algorithms. The more precise +alternative being only the whole number-factored SRC, which can be slower. + +P.S. Please credit the creator of this library in your documentation in the +following way: "Sample rate converter designed by Aleksey Vaneev of Voxengo". + +## Requirements ## + +C++ compiler and system with the "double" floating point type (53-bit +mantissa) support. No explicit code for the "float" type is present in this +library, because as practice has shown the "float"-based code performs +considerably slower on a modern processor, at least in this library. This +library does not have dependencies beside the standard C library, the +"windows.h" on Windows and the "pthread.h" on macOS and Linux. + +## Links ## + +* [Documentation](https://www.voxengo.com/public/r8brain-free-src/Documentation/) +* [Discussion](https://www.kvraudio.com/forum/viewtopic.php?t=389711) + +## Usage Information ## + +The sample rate converter (resampler) is represented by the +**r8b::CDSPResampler** class, which is a single front-end class for the +whole library. You do not basically need to use nor understand any other +classes beside this class. Several derived classes that have varying levels +of precision are also available (for full-resolution 16-bit and 24-bit +resampling). + +The code of the library resides in the "r8b" C++ namespace, effectively +isolating it from all other code. The code is thread-safe. A separate +resampler object should be created for each audio channel or stream being +processed concurrently. + +Note that you will need to compile the "r8bbase.cpp" source file and include +the resulting object file into your application build. This source file +includes definitions of several global static objects used by the library. +You may also need to include to your project: the "Kernel32" library (on +Windows) and the "pthread" library on macOS and Linux. + +The library is able to process signal of any scale and loudness: it is not +limited to just a "usual" -1.0 to 1.0 range. + +By defining the `R8B_IPP` configuration macro it is possible to enable Intel +IPP back-end for FFT functions, instead of the default Ooura FFT. IPP FFT +makes sample rate conversion faster by 23% on average. + + #define R8B_IPP 1 + +If a larger initial processing delay and a very minor sample timing error are +not an issue, for the most efficiency you can define these macros at +the beginning of the `r8bconf.h` file, or during compilation: + + #define R8B_IPP 1 + #define R8B_FASTTIMING 1 + #define R8B_EXTFFT 1 + +If you do not have access to the Intel IPP then you may consider enabling the +PFFFT which is only slightly slower than Intel IPP FFT in performance. There +are two macros available: `R8B_PFFFT` and `R8B_PFFFT_DOUBLE`. The first macro +enables PFFFT that works in single-precision resolution, thus limiting the +overall resampler's precision to 24-bit sample rate conversions (for +mission-critical professional audio applications, using the `R8B_PFFFT` macro +is not recommended as its peak error is quite large). The second macro +enables PFFFT implementation that works in double-precision resolution, making +use of SSE2, AVX, and NEON intrinsics, yielding precision that is equal to +both Intel IPP and Ooura FFT implementations. + +To use the PFFFT, define the `R8B_PFFFT` or `R8B_PFFFT_DOUBLE` macro, compile +and include the supplied `pffft.cpp` or `pffft_double/pffft_double.c` file to +your project build. + + #define R8B_PFFFT 1 + + or + + #define R8B_PFFFT_DOUBLE 1 + +The code of this library was commented in the [Doxygen](http://www.doxygen.org/) +style. To generate the documentation locally you may run the +`doxygen ./other/r8bdoxy.txt` command from the library's folder. + +Preliminary tests show that the r8b::CDSPResampler24 resampler class achieves +`31.8*n_cores` Mrops (`44*n_cores` for Intel IPP FFT) when converting 1 +channel of 24-bit audio from 44100 to 96000 sample rate (2% transition band), +on a Ryzen 3700X processor-based 64-bit system. This approximately translates +to a real-time resampling of `720*n_cores` (`1000*n_cores`) audio streams, at +100% CPU load. Speed performance when converting to other sample rates may +vary greatly. When comparing performance of this resampler library to another +library make sure that the competing library is also tuned to produce a fully +linear-phase response, has similar stop-band characteristics and similar +sample timing precision. + +## Dynamic Link Library ## + +The functions of this SRC library are also accessible in simplified form via +the DLL file on Windows, requiring a processor with SSE2 support (Win64 +version includes AVX2 auto-dispatch code). Delphi Pascal interface unit file +for the DLL file is available. DLL and C LIB files are distributed in the DLL +folder on the project's homepage. On non-Windows systems it is preferrable +to use the C++ library directly. Note that the DLL was compiled with the +Intel IPP enabled. + +## Real-Time Applications ## + +The resampler class of this library was designed as an asynchronous processor: +it may produce any number of output samples, depending on the input sample +data length and the resampling parameters. The resampler must be fed with the +input sample data until enough output sample data were produced, with any +excess output samples used before feeding the resampler with more input data. +A "relief" factor here is that the resampler removes the initial processing +latency automatically, and that after initial moments of processing the output +becomes steady, with only minor output sample data length fluctuations. + +So, while for an off-line resampling a "push" method can be used, +demonstrated in the `example.cpp` file, for a real-time resampling a "pull" +method should be used which calls the resampling process until the output +buffer is filled. + +## Notes ## + +When using the **r8b::CDSPResampler** class directly, you may select the +transition band/steepness of the low-pass (reconstruction) filter, expressed +as a percentage of the full spectral bandwidth of the input signal (or the +output signal if the downsampling is performed), and the desired stop-band +attenuation in decibel. + +The transition band is specified as the normalized spectral space of the input +signal (or the output signal if the downsampling is performed) between the +low-pass filter's -3 dB point and the Nyquist frequency, and ranges from 0.5% +to 45%. Stop-band attenuation can be specified in the range from 49 to 218 +decibel. Both the transition band and stop-band attenuation affect +resampler's overall speed performance and initial output delay. For your +information, transition frequency range spans 175% of the specified transition +band, which means that for 2% transition band, frequency response below +0.965\*Nyquist is linear. + +This SRC library also implements a much faster "power of 2" resampling (e.g. +2X, 4X, 8X, 16X, 3X, 3\*2X, 3\*4X, 3\*8X, etc. upsampling and downsampling), +which is engaged automatically if the resampling parameters permit. + +This library was tested for compatibility with [GNU C++](https://gcc.gnu.org/), +[Microsoft Visual C++](https://visualstudio.microsoft.com/), +[LLVM](https://llvm.org/) and [Intel C++](https://software.intel.com/en-us/c-compilers) +compilers, on 32- and 64-bit Windows, macOS, and CentOS Linux. + +Most code is "inline", without the need to compile many source files. The +memory footprint is quite modest. + +## Acknowledgements ## + +r8brain-free-src is bundled with the following code: + +* FFT routines Copyright (c) 1996-2001 Takuya OOURA. +[Homepage](http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html) +* PFFFT Copyright (c) 2013 Julien Pommier. +[Homepage](https://bitbucket.org/jpommier/pffft) +* PFFFT DOUBLE Copyright (c) 2020 Hayati Ayguen, Dario Mambro. +[Homepage](https://github.com/unevens/pffft) + +## Users ## + +This library is used by: + +* [REAPER](https://reaper.fm/) +* [AUDIRVANA](https://audirvana.com/) +* [Red Dead Redemption 2](https://www.rockstargames.com/reddeadredemption2/credits) +* [Mini Piano Lite](https://play.google.com/store/apps/details?id=umito.android.minipiano) +* [OpenMPT](https://openmpt.org/) +* [Boogex Guitar Amp audio plugin](https://www.voxengo.com/product/boogex/) +* [r8brain free sample rate converter](https://www.voxengo.com/product/r8brain/) +* [Voice Aloud Reader](https://play.google.com/store/apps/details?id=com.hyperionics.avarLic) +* [Zynewave Podium](https://zynewave.com/podium/) +* [Phonometrica](http://www.phonometrica-ling.org/index.html) +* [Ripcord](https://cancel.fm/ripcord/) +* [TensorVox](https://github.com/ZDisket/TensorVox) +* [Curvessor](https://github.com/unevens/Curvessor) + +Please send me a note via aleksey.vaneev@gmail.com and I will include a link +to your software product to this list of users. This list is important in +maintaining confidence in this library among the interested parties. The +inclusion into this list is not mandatory. + +## Change Log ## + +Version 5.6: + +* Added SSE and NEON implementations to `CDSPHBUpsampler` yielding 15% +performance improvement of power-of-2 upsampling. +* Added SSE and NEON implementations to the `CDSPRealFFT::multiplyBlocksZP` +function, for 2-3% performance improvement. +* Added intermediate interpolator's transition band limitation, for logical +hardness (not practically needed). +* Added the `aDoConsumeLatency` parameter to `CDSPHBUpsampler` constructor, +for "inline" DSP uses of the class. +* Made various minor changes across the codebase. + +Version 5.5: + +* Hardened positional logic of fractional filter calculation, removed +redundant multiplications. +* Removed unnecessary function templating from the `CDSPSincFilterGen` class. +* Added the `__ARM_NEON` macro to NEON availability detection. + +Version 5.4: + +* Added compiler specializations to previously optimized inner loops. +"Shuffled" SIMD interpolation code is not efficient on Apple M1. Intel C++ +Compiler vectorizes "whole stepping" interpolation as good as a +manually-written SSE. +* Reorganized SIMD instructions for a slightly better performance. +* Changed internal buffer sizes of half-band resamplers (1-2% performance +boost). +* Fixed compiler warnings in PFFFT code. +* Added several asserts to the code. + +Version 5.3: + +* Optimized inner loops of the fractional interpolator, added SSE2 and NEON +intrinsics, resulting in a measurable (8-25%) performance gain. +* Optimized filter calculation functions: changed some divisions by a constant +to multiplications. +* Renamed M_PI macros to R8B_PI, to avoid macro collisions. +* Removed redundant code and macros. + +Version 5.2: + +* Modified `PFFFT` and `PFFFT DOUBLE` conditional pre-processor directives to +always enable NEON on `aarch64`/`arm64` (this includes code built for +Apple M1). + +Version 5.1: + +* Changed alignment in the `CFixedBuffer` class to 64 bytes. This improves AVX +performance of the `PFFFT DOUBLE` implementation by a few percent. +* Removed redundant files from the `pffft_double` folder, integrated the +`pffft_common.c` file into the `pffft_double.c` file. + +Version 5.0: + +* Removed a long-outdated macros from the `r8bconf.h` file. +* Changed a conditional pre-processor directive in the `pf_sse2_double.h` file +as per PFFFT DOUBLE author's suggestion, to allow SSE2 intrinsics in most +compilers. +* Fixed "License.txt" misnaming in the source files to "LICENSE". + +Version 4.10: + +* Added the `PFFFT DOUBLE` implementation support. Now available via the +`R8B_PFFFT_DOUBLE` definition macro. + +Version 4.9: + +* Reoptimized half-band and fractional interpolation filters with a stricter +frequency response linearity constraints. This did not impact the average +speed performance. + +Version 4.8: + +* Added a limit to the intermediate filter's transition band, to keep the +latency under control at any resampling ratio. + +Version 4.7: + +* Added `#ifndef _USE_MATH_DEFINES` to `pffft.cpp`. +* Moved `#include "pffft.h"` to `CDSPRealFFT.h`. + +Version 4.6: + +* Removed the `MaxInLen` parameter from the `oneshot()` function. +* Decreased intermediate low-pass filter's transition band slightly, for more +stable quality. + +Version 4.5: + +* Fixed VS2017 compiler warnings. + +Version 4.4: + +* Fixed the "Declaration hides a member" Intel C++ compiler warnings. + +Version 4.3: + +* Added //$ markers for internal debugging purposes. + +Version 4.2: + +* Backed-off max transition band to 45 and MinAtten to 49. +* Implemented Wave64 and AIFF file input in the `r8bfreesrc` bench tool. The +tool is now compiled with the `R8B_IPP 1` and `R8B_EXTFFT 1` macros to +demonstrate the maximal achievable performance. + +Version 4.1: + +* Updated allowed ReqAtten range to 52-218, ReqTransBand 0.5-56. It is +possible to specify filter parameters slightly beyond these values, but the +resulting filter will be slightly out of specification as well. +* Optimized static filter banks allocation. + +Version 4.0: + +* A major overhaul of interpolation classes: now templated parameters are not +used, all required parameters are calculated at runtime. Static filter bank +object is not used, it is created when necessary, and then cached. +* Implemented one-third interpolation filters, however, this did not +measurably increase resampler's speed. + +Version 3.7: + +* Used ippsMul_64f_I() in the CDSPRealFFT::multiplyBlockZ() function for a +minor conversion speed increase in Intel IPP mode. + +Version 3.6: + +* Added memory alignment to allocated buffers which boosts performance by 1.5% +when Intel IPP FFT is in use. +* Implemented PFFFT support. + +Version 3.5: + +* Improved resampling speed very slightly. +* Updated the `r8bfreesrc` benchmark tool to support RF64 WAV files. + +Version 3.4: + +* Added a more efficient half-band filters for >= 256 resampling ratios. + +Version 3.3: + +* Made minor fix to downsampling for some use cases of CDSPBlockConvolver, +did not affect resampler. +* Converted CDSPHBUpsampler and CDSPHBDownsampler's inner functions to +static functions, which boosted high-ratio resampling performance measurably. + +Version 3.2: + +* Minor fix to the latency consumption mechanism. + +Version 3.1: + +* Reoptimized fractional delay filter's windowing function. + +Version 3.0: + +* Implemented a new variant of the getInLenBeforeOutStart() function. +* Reimplemented oneshot() function to support `float` buffer types. +* Considerably improved downsampling performance at high resampling ratios. +* Implemented intermediate interpolation technique which boosted upsampling +performance for most resampling ratios considerably. +* Removed the ConvCount constant - now resampler supports virtually any +resampling ratios. +* Removed the UsePower2 parameter from the resampler constructor. +* Now resampler's process() function always returns pointer to the internal +buffer, input buffer is returned only if no resampling happens. +* Resampler's getMaxOutLen() function can now be used to obtain the maximal +output length that can be produced by the resampler in a single call. +* Added a more efficient "one-third" filters to half-band upsampler and +downsampler. + +Version 2.1: + +* Optimized 2X half-band downsampler. + +Version 2.0: + +* Optimized power-of-2 upsampling. + +Version 1.9: + +* Optimized half-band downsampling filter. +* Implemented whole-number stepping resampling. +* Added `R8B_EXTFFT` configuration option. +* Fixed initial sub-sample offseting on downsampling. + +Version 1.8: + +* Added `R8B_FASTTIMING` configuration option. + +Version 1.7: + +* Improved sample timing precision. +* Increased CDSPResampler :: ConvCountMax to 28 to support a lot wider +resampling ratios. +* Added `bench` tools. +* Removed getInLenBeforeOutStart() due to incorrect calculation. diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h new file mode 100644 index 00000000..f6f2bc2a --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/fft4g.h @@ -0,0 +1,1316 @@ +//$ nobt +//$ nocpp + +/** + * @file fft4g.h + * + * @brief Wrapper class for Takuya OOURA's FFT functions. + * + * Functions from the FFT package by: Copyright(C) 1996-2001 Takuya OOURA + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * + * Modified and used with permission granted by the license. + * + * Here, the original "fft4g.c" file was wrapped into the "ooura_fft" class. + */ + +#ifndef R8B_FFT4G_INCLUDED +#define R8B_FFT4G_INCLUDED + +/* +Fast Fourier/Cosine/Sine Transform + dimension :one + data length :power of 2 + decimation :frequency + radix :4, 2 + data :inplace + table :use +functions + cdft: Complex Discrete Fourier Transform + rdft: Real Discrete Fourier Transform + ddct: Discrete Cosine Transform + ddst: Discrete Sine Transform + dfct: Cosine Transform of RDFT (Real Symmetric DFT) + dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) +function prototypes + void cdft(int, int, FPType *, int *, FPType *); + void rdft(int, int, FPType *, int *, FPType *); + void ddct(int, int, FPType *, int *, FPType *); + void ddst(int, int, FPType *, int *, FPType *); + void dfct(int, FPType *, FPType *, int *, FPType *); + void dfst(int, FPType *, FPType *, int *, FPType *); + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n + <case2> + X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft(2*n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + cdft(2*n, -1, a, ip, w); + [parameters] + 2*n :data length (int) + n >= 1, n = power of 2 + a[0...2*n-1] :input/output data (FPType *) + input data + a[2*j] = Re(x[j]), + a[2*j+1] = Im(x[j]), 0<=j<n + output data + a[2*k] = Re(X[k]), + a[2*k+1] = Im(X[k]), 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + strictly, + length of ip >= + 2+(1<<(int)(log(n+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (FPType *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft(2*n, -1, a, ip, w); + is + cdft(2*n, 1, a, ip, w); + for (j = 0; j <= 2 * n - 1; j++) { + a[j] *= 1.0 / n; + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 + I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 + <case2> IRDFT (excluding scale) + a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + rdft(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + rdft(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (FPType *) + <case1> + output data + a[2*k] = R[k], 0<=k<n/2 + a[2*k+1] = I[k], 0<k<n/2 + a[1] = R[n/2] + <case2> + input data + a[2*j] = R[j], 0<=j<n/2 + a[2*j+1] = I[j], 0<j<n/2 + a[1] = R[n/2] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (FPType *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft(n, 1, a, ip, w); + is + rdft(n, -1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n + <case2> DCT + C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + ddct(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddct(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (FPType *) + output data + a[k] = C[k], 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (FPType *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddct(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n + <case2> DST + S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n + [usage] + <case1> + ip[0] = 0; // first time only + ddst(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddst(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (FPType *) + <case1> + input data + a[j] = A[j], 0<j<n + a[0] = A[n] + output data + a[k] = S[k], 0<=k<n + <case2> + output data + a[k] = S[k], 0<k<n + a[0] = S[n] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (FPType *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddst(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Cosine Transform of RDFT (Real Symmetric DFT) -------- + [definition] + C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n + [usage] + ip[0] = 0; // first time only + dfct(n, a, t, ip, w); + [parameters] + n :data length - 1 (int) + n >= 2, n = power of 2 + a[0...n] :input/output data (FPType *) + output data + a[k] = C[k], 0<=k<=n + t[0...n/2] :work area (FPType *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (FPType *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + is + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + for (j = 0; j <= n; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- + [definition] + S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n + [usage] + ip[0] = 0; // first time only + dfst(n, a, t, ip, w); + [parameters] + n :data length + 1 (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (FPType *) + output data + a[k] = S[k], 0<k<n + (a[0] is used for work area) + t[0...n/2-1] :work area (FPType *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (FPType *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + dfst(n, a, t, ip, w); + is + dfst(n, a, t, ip, w); + for (j = 1; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +Appendix : + The cos/sin table is recalculated when the larger table required. + w[] and ip[] are compatible with all routines. +*/ + +namespace r8b { + +/** + * @brief A wrapper class around Takuya OOURA's FFT functions. + * + * A wrapper class around fft4g.c file's FFT functions by Takuya OOURA. + * Provides static private functions for use by the CDSPRealFFT class. + */ + +class ooura_fft +{ +friend class CDSPRealFFT; + +private: +typedef double FPType; + +static void cdft(int n, int isgn, FPType *a, int *ip, FPType *w) +{ + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + if (n > 4) { + if (isgn >= 0) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + } else { + bitrv2conj(n, ip + 2, a); + cftbsub(n, a, w); + } + } else if (n == 4) { + cftfsub(n, a, w); + } +} + +static void rdft(int n, int isgn, FPType *a, int *ip, FPType *w) +{ + int nw, nc; + double xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect(nc, ip, w + nw); + } + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5 * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } +} + +static void ddct(int n, int isgn, FPType *a, int *ip, FPType *w) +{ + int j, nw, nc; + double xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = a[j] - a[j - 1]; + a[j] += a[j - 1]; + } + a[1] = a[0] - xr; + a[0] += xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } + dctsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = a[j] - a[j + 1]; + a[j] += a[j + 1]; + } + a[n - 1] = xr; + } +} + +static void ddst(int n, int isgn, FPType *a, int *ip, FPType *w) +{ + int j, nw, nc; + double xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = -a[j] - a[j - 1]; + a[j] -= a[j - 1]; + } + a[1] = a[0] + xr; + a[0] -= xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } + dstsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = -a[j] - a[j + 1]; + a[j] -= a[j + 1]; + } + a[n - 1] = -xr; + } +} + +static void dfct(int n, FPType *a, FPType *t, int *ip, FPType *w) +{ + int j, k, l, m, mh, nw, nc; + double xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + m = n >> 1; + yi = a[m]; + xi = a[0] + a[n]; + a[0] -= a[n]; + t[0] = xi - yi; + t[m] = xi + yi; + if (n > 2) { + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] - a[n - j]; + xi = a[j] + a[n - j]; + yr = a[k] - a[n - k]; + yi = a[k] + a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi - yi; + t[k] = xi + yi; + } + t[mh] = a[mh] + a[n - mh]; + a[mh] -= a[n - mh]; + dctsub(m, a, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, a); + cftfsub(m, a, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, w); + } + a[n - 1] = a[0] - a[1]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] + a[j + 1]; + a[2 * j - 1] = a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dctsub(m, t, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, t); + cftfsub(m, t, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, w); + } + a[n - l] = t[0] - t[1]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = t[j] - t[j + 1]; + a[k + l] = t[j] + t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 0; j < mh; j++) { + k = m - j; + t[j] = t[m + k] - t[m + j]; + t[k] = t[m + k] + t[m + j]; + } + t[mh] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + a[n] = t[2] - t[1]; + a[0] = t[2] + t[1]; + } else { + a[1] = a[0]; + a[2] = t[0]; + a[0] = t[1]; + } +} + +static void dfst(int n, FPType *a, FPType *t, int *ip, FPType *w) +{ + int j, k, l, m, mh, nw, nc; + double xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + if (n > 2) { + m = n >> 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] + a[n - j]; + xi = a[j] - a[n - j]; + yr = a[k] + a[n - k]; + yi = a[k] - a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi + yi; + t[k] = xi - yi; + } + t[0] = a[mh] - a[n - mh]; + a[mh] += a[n - mh]; + a[0] = a[m]; + dstsub(m, a, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, a); + cftfsub(m, a, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, w); + } + a[n - 1] = a[1] - a[0]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] - a[j + 1]; + a[2 * j - 1] = -a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dstsub(m, t, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, t); + cftfsub(m, t, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, w); + } + a[n - l] = t[1] - t[0]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = -t[j] - t[j + 1]; + a[k + l] = t[j] - t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + t[j] = t[m + k] + t[m + j]; + t[k] = t[m + k] - t[m + j]; + } + t[0] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + } + a[0] = 0; +} + +/* -------- initializing routines -------- */ + +static void makewt(int nw, int *ip, FPType *w) +{ + int j, nwh; + double delta, x, y; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = atan(1.0) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = cos(delta * nwh); + w[nwh + 1] = w[nwh]; + if (nwh > 2) { + for (j = 2; j < nwh; j += 2) { + x = cos(delta * j); + y = sin(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2(nw, ip + 2, w); + } + } +} + +static void makect(int nc, int *ip, FPType *c) +{ + int j, nch; + double delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = atan(1.0) / nch; + c[0] = cos(delta * nch); + c[nch] = 0.5 * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5 * cos(delta * j); + c[nc - j] = 0.5 * sin(delta * j); + } + } +} + +/* -------- child routines -------- */ + +static void bitrv2(int n, int *ip, FPType *a) +{ + int j, j1, k, k1, l, m, m2; + double xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } +} + +static void bitrv2conj(int n, int *ip, FPType *a) +{ + int j, j1, k, k1, l, m, m2; + double xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 2 * k + ip[k]; + a[k1 + 1] = -a[k1 + 1]; + j1 = k1 + m2; + k1 = j1 + m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + k1 += m2; + a[k1 + 1] = -a[k1 + 1]; + } + } else { + a[1] = -a[1]; + a[m2 + 1] = -a[m2 + 1]; + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 2 * k + ip[k]; + a[k1 + 1] = -a[k1 + 1]; + a[k1 + m2 + 1] = -a[k1 + m2 + 1]; + } + } +} + +static void cftfsub(int n, FPType *a, const FPType *w) +{ + int j, j1, j2, j3, l; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = a[j + 1] - a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] += a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + +static void cftbsub(int n, FPType *a, const FPType *w) +{ + int j, j1, j2, j3, l; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = -a[j + 1] + a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] = -a[j + 1] - a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + +static void cft1st(int n, FPType *a, const FPType *w) +{ + int j, k1, k2; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + +static void cftmdl(int n, int l, FPType *a, const FPType *w) +{ + int j, j1, j2, j3, k, k1, k2, m, m2; + double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + +static void rftfsub(int n, FPType *a, int nc, const FPType *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + +static void rftbsub(int n, FPType *a, int nc, const FPType *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5 - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] = yi - a[j + 1]; + a[k] += yr; + a[k + 1] = yi - a[k + 1]; + } + a[m + 1] = -a[m + 1]; +} + +static void dctsub(int n, FPType *a, int nc, const FPType *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[j] - wkr * a[k]; + a[j] = wkr * a[j] + wki * a[k]; + a[k] = xr; + } + a[m] *= c[0]; +} + +static void dstsub(int n, FPType *a, int nc, const FPType *c) +{ + int j, k, kk, ks, m; + double wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[k] - wkr * a[j]; + a[k] = wkr * a[k] + wki * a[j]; + a[j] = xr; + } + a[m] *= c[0]; +} +}; + +} // namespace r8b + +#endif // R8B_FFT4G_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp new file mode 100644 index 00000000..f877862e --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.cpp @@ -0,0 +1,38 @@ +/** + * @file r8bbase.cpp + * + * @brief C++ file that should be compiled and included into your application. + * + * This is a single library file that should be compiled and included into the + * project that uses the "r8brain-free-src" sample rate converter. This file + * defines several global static objects used by the library. + * + * You may also need to include to your project: the "Kernel32" library + * (on Windows) and the "pthread" library on macOS and Linux. + * + * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#include "CDSPFIRFilter.h" +#include "CDSPFracInterpolator.h" + +namespace r8b { + +#if R8B_FLTTEST + int InterpFilterFracs = -1; +#endif // R8B_FLTTEST + +CSyncObject CDSPRealFFTKeeper :: StateSync; +CDSPRealFFT :: CObjKeeper CDSPRealFFTKeeper :: FFTObjects[ 31 ]; + +CSyncObject CDSPFIRFilterCache :: StateSync; +CPtrKeeper< CDSPFIRFilter* > CDSPFIRFilterCache :: Objects; +int CDSPFIRFilterCache :: ObjCount = 0; + +CSyncObject CDSPFracDelayFilterBankCache :: StateSync; +CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: Objects; +CPtrKeeper< CDSPFracDelayFilterBank* > CDSPFracDelayFilterBankCache :: StaticObjects; +int CDSPFracDelayFilterBankCache :: ObjCount = 0; + +} // namespace r8b diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h new file mode 100644 index 00000000..32054965 --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bbase.h @@ -0,0 +1,1206 @@ +//$ nobt + +/** + * @file r8bbase.h + * + * @brief The "base" inclusion file with basic classes and functions. + * + * This is the "base" inclusion file for the "r8brain-free-src" sample rate + * converter. This inclusion file contains implementations of several small + * utility classes and functions used by the library. + * + * @mainpage + * + * @section intro_sec Introduction + * + * Open source (under the MIT license) high-quality professional audio sample + * rate converter (SRC) (resampling) library. Features routines for SRC, both + * up- and downsampling, to/from any sample rate, including non-integer sample + * rates: it can be also used for conversion to/from SACD sample rate and even + * go beyond that. SRC routines were implemented in multi-platform C++ code, + * and have a high level of optimality. + * + * For more information, please visit + * https://github.com/avaneev/r8brain-free-src + * + * @section license License + * + * The MIT License (MIT) + * + * r8brain-free-src Copyright (c) 2013-2022 Aleksey Vaneev + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Please credit the creator of this library in your documentation in the + * following way: "Sample rate converter designed by Aleksey Vaneev of + * Voxengo" + * + * @version 5.6 + */ + +#ifndef R8BBASE_INCLUDED +#define R8BBASE_INCLUDED + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <math.h> +#include "r8bconf.h" + +#if defined( _WIN32 ) + #include <windows.h> +#else // defined( _WIN32 ) + #include <pthread.h> +#endif // defined( _WIN32 ) + +#if defined( __SSE4_2__ ) || defined( __SSE4_1__ ) || \ + defined( __SSSE3__ ) || defined( __SSE3__ ) || defined( __SSE2__ ) || \ + defined( __x86_64__ ) || defined( _M_AMD64 ) || defined( _M_X64 ) || \ + defined( __amd64 ) + + #include <immintrin.h> + + #define R8B_SSE2 + #define R8B_SIMD_ISH + +#elif defined( __aarch64__ ) || defined( __arm64__ ) || defined( __ARM_NEON ) + + #include <arm_neon.h> + + #define R8B_NEON + + #if !defined( __APPLE__ ) + #define R8B_SIMD_ISH // Shuffled interpolation is inefficient on M1. + #endif // !defined( __APPLE__ ) + +#endif // ARM64 + +/** + * @brief The "r8brain-free-src" library namespace. + * + * The "r8brain-free-src" sample rate converter library namespace. + */ + +namespace r8b { + +/** + * Macro defines r8brain-free-src version string. + */ + +#define R8B_VERSION "5.6" + +/** + * The macro equals to "pi" constant, fits 53-bit floating point mantissa. + */ + +#define R8B_PI 3.14159265358979324 + +/** + * The R8B_2PI macro equals to "2 * pi" constant, fits 53-bit floating point + * mantissa. + */ + +#define R8B_2PI 6.28318530717958648 + +/** + * The R8B_3PI macro equals to "3 * pi" constant, fits 53-bit floating point + * mantissa. + */ + +#define R8B_3PI 9.42477796076937972 + +/** + * The R8B_PId2 macro equals to "pi divided by 2" constant, fits 53-bit + * floating point mantissa. + */ + +#define R8B_PId2 1.57079632679489662 + +/** + * A special macro that defines empty copy-constructor and copy operator with + * the "private:" prefix. This macro should be used in classes that cannot be + * copied in a standard C++ way. + * + * This macro does not need to be defined in classes derived from a class + * where such macro was already used. + * + * @param ClassName The name of the class which uses this macro. + */ + +#define R8BNOCTOR( ClassName ) \ + private: \ + ClassName( const ClassName& ) { } \ + ClassName& operator = ( const ClassName& ) { return( *this ); } + +/** + * @brief The default base class for objects created on heap. + * + * Class that implements "new" and "delete" operators that use standard + * malloc() and free() functions. + */ + +class CStdClassAllocator +{ +public: + /** + * @param n The size of the object, in bytes. + * @param p Pointer to object's pre-allocated memory block. + * @return Pointer to object. + */ + + void* operator new( size_t, void* p ) + { + return( p ); + } + + /** + * @param n The size of the object, in bytes. + * @return Pointer to the allocated memory block for the object. + */ + + void* operator new( size_t n ) + { + return( :: malloc( n )); + } + + /** + * @param n The size of the object, in bytes. + * @return Pointer to the allocated memory block for the object. + */ + + void* operator new[]( size_t n ) + { + return( :: malloc( n )); + } + + /** + * Operator frees a previously allocated memory block for the object. + * + * @param p Pointer to the allocated memory block for the object. + */ + + void operator delete( void* p ) + { + :: free( p ); + } + + /** + * Operator frees a previously allocated memory block for the object. + * + * @param p Pointer to the allocated memory block for the object. + */ + + void operator delete[]( void* p ) + { + :: free( p ); + } +}; + +/** + * @brief The default base class for objects that allocate blocks of memory. + * + * Memory buffer allocator that uses "stdlib" standard memory functions. + */ + +class CStdMemAllocator : public CStdClassAllocator +{ +public: + /** + * Function allocates memory block. + * + * @param Size The size of the block, in bytes. + * @result The pointer to the allocated block. + */ + + static void* allocmem( const size_t Size ) + { + return( :: malloc( Size )); + } + + /** + * Function reallocates a previously allocated memory block. + * + * @param p Pointer to the allocated block, can be NULL. + * @param Size The new size of the block, in bytes. + * @result The pointer to the (re)allocated block. + */ + + static void* reallocmem( void* p, const size_t Size ) + { + return( :: realloc( p, Size )); + } + + /** + * Function frees a previously allocated memory block. + * + * @param p Pointer to the allocated block, can be NULL. + */ + + static void freemem( void* p ) + { + :: free( p ); + } +}; + +/** + * This function forces the provided "ptr" pointer to be aligned to + * "align" bytes. Works with power-of-2 alignments only. + * + * @param ptr Pointer to align. + * @param align Alignment, in bytes, power-of-2. + * @tparam T Pointer's element type. + */ + +template< typename T > +inline T* alignptr( T* const ptr, const uintptr_t align ) +{ + return( (T*) (( (uintptr_t) ptr + align - 1 ) & ~( align - 1 ))); +} + +/** + * @brief Templated memory buffer class for element buffers of fixed capacity. + * + * Fixed memory buffer object. Supports allocation of a fixed amount of + * memory. Does not store buffer's capacity - the user should know the actual + * capacity of the buffer. Does not feature "internal" storage, memory is + * always allocated via the R8B_MEMALLOCCLASS class's functions. Thus the + * object of this class can be moved in memory. + * + * This class manages memory space only - it does not perform element class + * construction nor destruction operations. + * + * This class applies 64-byte memory address alignment to the allocated data + * block. + * + * @tparam T The type of the stored elements (e.g. "double"). + */ + +template< typename T > +class CFixedBuffer : public R8B_MEMALLOCCLASS +{ + R8BNOCTOR( CFixedBuffer ); + +public: + CFixedBuffer() + : Data0( NULL ) + , Data( NULL ) + { + } + + /** + * Constructor allocates memory so that the specified number of elements + * of type T can be stored in *this buffer object. + * + * @param Capacity Storage for this number of elements to allocate. + */ + + CFixedBuffer( const int Capacity ) + { + R8BASSERT( Capacity > 0 || Capacity == 0 ); + + Data0 = allocmem( Capacity * sizeof( T ) + Alignment ); + Data = (T*) alignptr( Data0, Alignment ); + + R8BASSERT( Data0 != NULL || Capacity == 0 ); + } + + ~CFixedBuffer() + { + freemem( Data0 ); + } + + /** + * Function allocates memory so that the specified number of elements of + * type T can be stored in *this buffer object. + * + * @param Capacity Storage for this number of elements to allocate. + */ + + void alloc( const int Capacity ) + { + R8BASSERT( Capacity > 0 || Capacity == 0 ); + + freemem( Data0 ); + Data0 = allocmem( Capacity * sizeof( T ) + Alignment ); + Data = (T*) alignptr( Data0, Alignment ); + + R8BASSERT( Data0 != NULL || Capacity == 0 ); + } + + /** + * Function reallocates memory so that the specified number of elements of + * type T can be stored in *this buffer object. Previously allocated data + * is copied to the new memory buffer. + * + * @param PrevCapacity Previous capacity of *this buffer. + * @param NewCapacity Storage for this number of elements to allocate. + */ + + void realloc( const int PrevCapacity, const int NewCapacity ) + { + R8BASSERT( PrevCapacity >= 0 ); + R8BASSERT( NewCapacity >= 0 ); + + void* const NewData0 = allocmem( NewCapacity * sizeof( T ) + + Alignment ); + + T* const NewData = (T*) alignptr( NewData0, Alignment ); + const size_t CopySize = ( PrevCapacity > NewCapacity ? + NewCapacity : PrevCapacity ) * sizeof( T ); + + if( CopySize > 0 ) + { + memcpy( NewData, Data, CopySize ); + } + + freemem( Data0 ); + Data0 = NewData0; + Data = NewData; + + R8BASSERT( Data0 != NULL || NewCapacity == 0 ); + } + + /** + * Function deallocates a previously allocated buffer. + */ + + void free() + { + freemem( Data0 ); + Data0 = NULL; + Data = NULL; + } + + /** + * @return Pointer to the first element of the allocated buffer, NULL if + * not allocated. + */ + + T* getPtr() const + { + return( Data ); + } + + /** + * @return Pointer to the first element of the allocated buffer, NULL if + * not allocated. + */ + + operator T* () const + { + return( Data ); + } + +private: + static const size_t Alignment = 64; ///< Buffer address alignment, in + ///< bytes. + ///< + void* Data0; ///< Buffer pointer, original unaligned. + ///< + T* Data; ///< Element buffer pointer, aligned. + ///< +}; + +/** + * @brief Pointer-to-object "keeper" class with automatic deletion. + * + * An auxiliary class that can be used for keeping a pointer to object that + * should be deleted together with the "keeper" by calling object's "delete" + * operator. + * + * @tparam T Pointer type to operate with, must include the asterisk (e.g. + * "CDSPFIRFilter*"). + */ + +template< class T > +class CPtrKeeper +{ + R8BNOCTOR( CPtrKeeper ); + +public: + CPtrKeeper() + : Object( NULL ) + { + } + + /** + * Constructor assigns a pointer to object to *this keeper. + * + * @param aObject Pointer to object to keep, can be NULL. + * @tparam T2 Object's pointer type. + */ + + template< class T2 > + CPtrKeeper( T2 const aObject ) + : Object( aObject ) + { + } + + ~CPtrKeeper() + { + delete Object; + } + + /** + * Function assigns a pointer to object to *this keeper. A previously + * keeped pointer will be reset and object deleted. + * + * @param aObject Pointer to object to keep, can be NULL. + * @tparam T2 Object's pointer type. + */ + + template< class T2 > + void operator = ( T2 const aObject ) + { + reset(); + Object = aObject; + } + + /** + * @return Pointer to keeped object, NULL if no object is being kept. + */ + + T operator -> () const + { + return( Object ); + } + + /** + * @return Pointer to keeped object, NULL if no object is being kept. + */ + + operator T () const + { + return( Object ); + } + + /** + * Function resets the keeped pointer and deletes the keeped object. + */ + + void reset() + { + T DelObj = Object; + Object = NULL; + delete DelObj; + } + + /** + * @return Function returns the keeped pointer and resets it in *this + * keeper without object deletion. + */ + + T unkeep() + { + T ResObject = Object; + Object = NULL; + return( ResObject ); + } + +private: + T Object; ///< Pointer to keeped object. + ///< +}; + +/** + * @brief Multi-threaded synchronization object class. + * + * This class uses standard OS thread-locking (mutex) mechanism which is + * fairly efficient in most cases. + * + * The acquire() function can be called recursively, in the same thread, for + * this kind of thread-locking mechanism. This will not produce a dead-lock. + */ + +class CSyncObject +{ + R8BNOCTOR( CSyncObject ); + +public: + CSyncObject() + { + #if defined( _WIN32 ) + InitializeCriticalSectionAndSpinCount( &CritSec, 4000 ); + #else // defined( _WIN32 ) + pthread_mutexattr_t MutexAttrs; + pthread_mutexattr_init( &MutexAttrs ); + pthread_mutexattr_settype( &MutexAttrs, PTHREAD_MUTEX_RECURSIVE ); + pthread_mutex_init( &Mutex, &MutexAttrs ); + pthread_mutexattr_destroy( &MutexAttrs ); + #endif // defined( _WIN32 ) + } + + ~CSyncObject() + { + #if defined( _WIN32 ) + DeleteCriticalSection( &CritSec ); + #else // defined( _WIN32 ) + pthread_mutex_destroy( &Mutex ); + #endif // defined( _WIN32 ) + } + + /** + * Function "acquires" *this thread synchronizer object immediately or + * waits until another thread releases it. + */ + + void acquire() + { + #if defined( _WIN32 ) + EnterCriticalSection( &CritSec ); + #else // defined( _WIN32 ) + pthread_mutex_lock( &Mutex ); + #endif // defined( _WIN32 ) + } + + /** + * Function "releases" *this previously acquired thread synchronizer + * object. + */ + + void release() + { + #if defined( _WIN32 ) + LeaveCriticalSection( &CritSec ); + #else // defined( _WIN32 ) + pthread_mutex_unlock( &Mutex ); + #endif // defined( _WIN32 ) + } + +private: + #if defined( _WIN32 ) + CRITICAL_SECTION CritSec; ///< Standard Windows critical section + ///< structure. + ///< + #else // defined( _WIN32 ) + pthread_mutex_t Mutex; ///< pthread.h mutex object. + ///< + #endif // defined( _WIN32 ) +}; + +/** + * @brief A "keeper" class for CSyncObject-based synchronization. + * + * Sync keeper class. The object of this class can be used as auto-init and + * auto-deinit object for calling the acquire() and release() functions of an + * object of the CSyncObject class. This "keeper" object is best used in + * functions as an "automatic" object allocated on the stack, possibly via the + * R8BSYNC() macro. + */ + +class CSyncKeeper +{ + R8BNOCTOR( CSyncKeeper ); + +public: + CSyncKeeper() + : SyncObj( NULL ) + { + } + + /** + * @param aSyncObj Pointer to the sync object which should be used for + * sync'ing, can be NULL. + */ + + CSyncKeeper( CSyncObject* const aSyncObj ) + : SyncObj( aSyncObj ) + { + if( SyncObj != NULL ) + { + SyncObj -> acquire(); + } + } + + /** + * @param aSyncObj Reference to the sync object which should be used for + * sync'ing. + */ + + CSyncKeeper( CSyncObject& aSyncObj ) + : SyncObj( &aSyncObj ) + { + SyncObj -> acquire(); + } + + ~CSyncKeeper() + { + if( SyncObj != NULL ) + { + SyncObj -> release(); + } + } + +protected: + CSyncObject* SyncObj; ///< Sync object in use (can be NULL). + ///< +}; + +/** + * The synchronization macro. The R8BSYNC( obj ) macro, which creates and + * object of the r8b::CSyncKeeper class on stack, should be put before + * sections of the code that may potentially change data asynchronously with + * other threads at the same time. The R8BSYNC( obj ) macro "acquires" the + * synchronization object thus blocking execution of other threads that also + * use the same R8BSYNC( obj ) macro. The blocked section begins with the + * R8BSYNC( obj ) macro and finishes at the end of the current C++ code block. + * Multiple R8BSYNC() macros may be defined from within the same code block. + * + * @param SyncObject An object of the CSyncObject type that is used for + * synchronization. + */ + +#define R8BSYNC( SyncObject ) R8BSYNC_( SyncObject, __LINE__ ) +#define R8BSYNC_( SyncObject, id ) R8BSYNC__( SyncObject, id ) +#define R8BSYNC__( SyncObject, id ) CSyncKeeper SyncKeeper##id( SyncObject ) + +/** + * @brief Sine signal generator class. + * + * Class implements sine signal generator without biasing. + */ + +class CSineGen +{ +public: + CSineGen() + { + } + + /** + * Constructor initializes *this sine signal generator, with unity gain + * output. + * + * @param si Sine function increment, in radians. + * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function. + */ + + CSineGen( const double si, const double ph ) + : svalue1( sin( ph )) + , svalue2( sin( ph - si )) + , sincr( 2.0 * cos( si )) + { + } + + /** + * Constructor initializes *this sine signal generator. + * + * @param si Sine function increment, in radians. + * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function. + * @param g The overall gain factor, 1.0 for unity gain (-1.0 to 1.0 + * amplitude). + */ + + CSineGen( const double si, const double ph, const double g ) + : svalue1( sin( ph ) * g ) + , svalue2( sin( ph - si ) * g ) + , sincr( 2.0 * cos( si )) + { + } + + /** + * Function initializes *this sine signal generator, with unity gain + * output. + * + * @param si Sine function increment, in radians. + * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function. + */ + + void init( const double si, const double ph ) + { + svalue1 = sin( ph ); + svalue2 = sin( ph - si ); + sincr = 2.0 * cos( si ); + } + + /** + * Function initializes *this sine signal generator. + * + * @param si Sine function increment, in radians. + * @param ph Starting phase, in radians. Add R8B_PId2 for cosine function. + * @param g The overall gain factor, 1.0 for unity gain (-1.0 to 1.0 + * amplitude). + */ + + void init( const double si, const double ph, const double g ) + { + svalue1 = sin( ph ) * g; + svalue2 = sin( ph - si ) * g; + sincr = 2.0 * cos( si ); + } + + /** + * @return Next value of the sine function, without biasing. + */ + + double generate() + { + const double res = svalue1; + + svalue1 = sincr * res - svalue2; + svalue2 = res; + + return( res ); + } + +private: + double svalue1; ///< Current sine value. + ///< + double svalue2; ///< Previous sine value. + ///< + double sincr; ///< Sine value increment. + ///< +}; + +/** + * @param v Input value. + * @return Calculated bit occupancy of the specified input value. Bit + * occupancy means how many significant lower bits are necessary to store a + * specified value. Function treats the input value as unsigned. + */ + +inline int getBitOccupancy( const int v ) +{ + static const uint8_t OccupancyTable[] = + { + 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + }; + + const int tt = v >> 16; + + if( tt != 0 ) + { + const int t = v >> 24; + + return( t != 0 ? 24 + OccupancyTable[ t & 0xFF ] : + 16 + OccupancyTable[ tt ]); + } + else + { + const int t = v >> 8; + + return( t != 0 ? 8 + OccupancyTable[ t ] : OccupancyTable[ v ]); + } +} + +/** + * Function calculates frequency response of the specified FIR filter at the + * specified circular frequency. Phase can be calculated as atan2( im, re ). + * + * @param flt FIR filter's coefficients. + * @param fltlen Number of coefficients (taps) in the filter. + * @param th Circular frequency [0; pi]. + * @param[out] re0 Resulting real part of the complex frequency response. + * @param[out] im0 Resulting imaginary part of the complex frequency response. + * @param fltlat Filter's latency, in samples. + */ + +inline void calcFIRFilterResponse( const double* flt, int fltlen, + const double th, double& re0, double& im0, const int fltlat = 0 ) +{ + const double sincr = 2.0 * cos( th ); + double cvalue1; + double svalue1; + + if( fltlat == 0 ) + { + cvalue1 = 1.0; + svalue1 = 0.0; + } + else + { + cvalue1 = cos( -fltlat * th ); + svalue1 = sin( -fltlat * th ); + } + + double cvalue2 = cos( -( fltlat + 1 ) * th ); + double svalue2 = sin( -( fltlat + 1 ) * th ); + + double re = 0.0; + double im = 0.0; + + while( fltlen > 0 ) + { + re += cvalue1 * flt[ 0 ]; + im += svalue1 * flt[ 0 ]; + flt++; + fltlen--; + + double tmp = cvalue1; + cvalue1 = sincr * cvalue1 - cvalue2; + cvalue2 = tmp; + + tmp = svalue1; + svalue1 = sincr * svalue1 - svalue2; + svalue2 = tmp; + } + + re0 = re; + im0 = im; +} + +/** + * Function calculates frequency response and group delay of the specified FIR + * filter at the specified circular frequency. The group delay is calculated + * by evaluating the filter's response at close side-band frequencies of "th". + * + * @param flt FIR filter's coefficients. + * @param fltlen Number of coefficients (taps) in the filter. + * @param th Circular frequency [0; pi]. + * @param[out] re Resulting real part of the complex frequency response. + * @param[out] im Resulting imaginary part of the complex frequency response. + * @param[out] gd Resulting group delay at the specified frequency, in + * samples. + */ + +inline void calcFIRFilterResponseAndGroupDelay( const double* const flt, + const int fltlen, const double th, double& re, double& im, double& gd ) +{ + // Calculate response at "th". + + calcFIRFilterResponse( flt, fltlen, th, re, im ); + + // Calculate response at close sideband frequencies. + + const int Count = 2; + const double thd2 = 1e-9; + double ths[ Count ] = { th - thd2, th + thd2 }; + + if( ths[ 0 ] < 0.0 ) + { + ths[ 0 ] = 0.0; + } + + if( ths[ 1 ] > R8B_PI ) + { + ths[ 1 ] = R8B_PI; + } + + double ph1[ Count ]; + int i; + + for( i = 0; i < Count; i++ ) + { + double re1; + double im1; + + calcFIRFilterResponse( flt, fltlen, ths[ i ], re1, im1 ); + ph1[ i ] = atan2( im1, re1 ); + } + + if( fabs( ph1[ 1 ] - ph1[ 0 ]) > R8B_PI ) + { + if( ph1[ 1 ] > ph1[ 0 ]) + { + ph1[ 1 ] -= R8B_2PI; + } + else + { + ph1[ 1 ] += R8B_2PI; + } + } + + const double thd = ths[ 1 ] - ths[ 0 ]; + gd = ( ph1[ 1 ] - ph1[ 0 ]) / thd; +} + +/** + * Function normalizes FIR filter so that its frequency response at DC is + * equal to DCGain. + * + * @param[in,out] p Filter coefficients. + * @param l Filter length. + * @param DCGain Filter's gain at DC (linear, non-decibel value). + * @param pstep "p" array step. + */ + +inline void normalizeFIRFilter( double* const p, const int l, + const double DCGain, const int pstep = 1 ) +{ + R8BASSERT( l > 0 ); + R8BASSERT( pstep != 0 ); + + double s = 0.0; + double* pp = p; + int i = l; + + while( i > 0 ) + { + s += *pp; + pp += pstep; + i--; + } + + s = DCGain / s; + pp = p; + i = l; + + while( i > 0 ) + { + *pp *= s; + pp += pstep; + i--; + } +} + +/** + * Function calculates coefficients used to calculate 3rd order spline + * (polynomial) on the equidistant lattice, using 8 points. + * + * @param[out] c Output coefficients buffer, length = 4. + * @param xm3 Point at x-3 position. + * @param xm2 Point at x-2 position. + * @param xm1 Point at x-1 position. + * @param x0 Point at x position. + * @param x1 Point at x+1 position. + * @param x2 Point at x+2 position. + * @param x3 Point at x+3 position. + * @param x4 Point at x+4 position. + */ + +inline void calcSpline3p8Coeffs( double* const c, const double xm3, + const double xm2, const double xm1, const double x0, const double x1, + const double x2, const double x3, const double x4 ) +{ + c[ 0 ] = x0; + c[ 1 ] = ( 61.0 * ( x1 - xm1 ) + 16.0 * ( xm2 - x2 ) + + 3.0 * ( x3 - xm3 )) / 76.0; + + c[ 2 ] = ( 106.0 * ( xm1 + x1 ) + 10.0 * x3 + 6.0 * xm3 - 3.0 * x4 - + 29.0 * ( xm2 + x2 ) - 167.0 * x0 ) / 76.0; + + c[ 3 ] = ( 91.0 * ( x0 - x1 ) + 45.0 * ( x2 - xm1 ) + + 13.0 * ( xm2 - x3 ) + 3.0 * ( x4 - xm3 )) / 76.0; +} + +/** + * Function calculates coefficients used to calculate 2rd order spline + * (polynomial) on the equidistant lattice, using 8 points. This function is + * based on the calcSpline3p8Coeffs() function, but without the 3rd order + * coefficient. + * + * @param[out] c Output coefficients buffer, length = 3. + * @param xm3 Point at x-3 position. + * @param xm2 Point at x-2 position. + * @param xm1 Point at x-1 position. + * @param x0 Point at x position. + * @param x1 Point at x+1 position. + * @param x2 Point at x+2 position. + * @param x3 Point at x+3 position. + * @param x4 Point at x+4 position. + */ + +inline void calcSpline2p8Coeffs( double* const c, const double xm3, + const double xm2, const double xm1, const double x0, const double x1, + const double x2, const double x3, const double x4 ) +{ + c[ 0 ] = x0; + c[ 1 ] = ( 61.0 * ( x1 - xm1 ) + 16.0 * ( xm2 - x2 ) + + 3.0 * ( x3 - xm3 )) / 76.0; + + c[ 2 ] = ( 106.0 * ( xm1 + x1 ) + 10.0 * x3 + 6.0 * xm3 - 3.0 * x4 - + 29.0 * ( xm2 + x2 ) - 167.0 * x0 ) / 76.0; +} + +/** + * Function calculates coefficients used to calculate 3rd order segment + * interpolation polynomial on the equidistant lattice, using 4 points. + * + * @param[out] c Output coefficients buffer, length = 4. + * @param[in] y Equidistant point values. Value at offset 1 corresponds to + * x=0 point. + */ + +inline void calcSpline3p4Coeffs( double* const c, const double* const y ) +{ + c[ 0 ] = y[ 1 ]; + c[ 1 ] = 0.5 * ( y[ 2 ] - y[ 0 ]); + c[ 2 ] = y[ 0 ] - 2.5 * y[ 1 ] + y[ 2 ] + y[ 2 ] - 0.5 * y[ 3 ]; + c[ 3 ] = 0.5 * ( y[ 3 ] - y[ 0 ] ) + 1.5 * ( y[ 1 ] - y[ 2 ]); +} + +/** + * Function calculates coefficients used to calculate 3rd order segment + * interpolation polynomial on the equidistant lattice, using 6 points. + * + * @param[out] c Output coefficients buffer, length = 4. + * @param[in] y Equidistant point values. Value at offset 2 corresponds to + * x=0 point. + */ + +inline void calcSpline3p6Coeffs( double* const c, const double* const y ) +{ + c[ 0 ] = y[ 2 ]; + c[ 1 ] = ( 11.0 * ( y[ 3 ] - y[ 1 ]) + 2.0 * ( y[ 0 ] - y[ 4 ])) / 14.0; + c[ 2 ] = ( 20.0 * ( y[ 1 ] + y[ 3 ]) + 2.0 * y[ 5 ] - 4.0 * y[ 0 ] - + 7.0 * y[ 4 ] - 31.0 * y[ 2 ]) / 14.0; + + c[ 3 ] = ( 17.0 * ( y[ 2 ] - y[ 3 ]) + 9.0 * ( y[ 4 ] - y[ 1 ]) + + 2.0 * ( y[ 0 ] - y[ 5 ])) / 14.0; +} + +#if !defined( min ) + +/** + * @param v1 Value 1. + * @param v2 Value 2. + * @tparam T Values' type. + * @return The minimum of 2 values. + */ + +template< typename T > +inline T min( const T& v1, const T& v2 ) +{ + return( v1 < v2 ? v1 : v2 ); +} + +#endif // min + +#if !defined( max ) + +/** + * @param v1 Value 1. + * @param v2 Value 2. + * @tparam T Values' type. + * @return The maximum of 2 values. + */ + +template< typename T > +inline T max( const T& v1, const T& v2 ) +{ + return( v1 > v2 ? v1 : v2 ); +} + +#endif // max + +/** + * Function "clamps" (clips) the specified value so that it is not lesser than + * "minv", and not greater than "maxv". + * + * @param Value Value to clamp. + * @param minv Minimal allowed value. + * @param maxv Maximal allowed value. + * @return "Clamped" value. + */ + +inline double clampr( const double Value, const double minv, + const double maxv ) +{ + if( Value < minv ) + { + return( minv ); + } + else + if( Value > maxv ) + { + return( maxv ); + } + else + { + return( Value ); + } +} + +/** + * @param x Value to square. + * @return Squared value of the argument. + */ + +inline double sqr( const double x ) +{ + return( x * x ); +} + +/** + * @param v Input value. + * @param p Power factor. + * @return Returns pow() function's value with input value's sign check. + */ + +inline double pows( const double v, const double p ) +{ + return( v < 0.0 ? -pow( -v, p ) : pow( v, p )); +} + +/** + * @param v Input value. + * @return Calculated single-argument Gaussian function of the input value. + */ + +inline double gauss( const double v ) +{ + return( exp( -( v * v ))); +} + +/** + * @param v Input value. + * @return Calculated inverse hyperbolic sine of the input value. + */ + +inline double asinh( const double v ) +{ + return( log( v + sqrt( v * v + 1.0 ))); +} + +/** + * @param x Input value. + * @return Calculated zero-th order modified Bessel function of the first kind + * of the input value. Approximate value. + */ + +inline double besselI0( const double x ) +{ + const double ax = fabs( x ); + double y; + + if( ax < 3.75 ) + { + y = x / 3.75; + y *= y; + + return( 1.0 + y * ( 3.5156229 + y * ( 3.0899424 + y * ( 1.2067492 + + y * ( 0.2659732 + y * ( 0.360768e-1 + y * 0.45813e-2 )))))); + } + + y = 3.75 / ax; + + return( exp( ax ) / sqrt( ax ) * ( 0.39894228 + y * ( 0.1328592e-1 + + y * ( 0.225319e-2 + y * ( -0.157565e-2 + y * ( 0.916281e-2 + + y * ( -0.2057706e-1 + y * ( 0.2635537e-1 + y * ( -0.1647633e-1 + + y * 0.392377e-2 ))))))))); +} + +} // namespace r8b + +#endif // R8BBASE_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h new file mode 100644 index 00000000..ef08b16b --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8bconf.h @@ -0,0 +1,176 @@ +//$ nobt +//$ nocpp + +/** + * @file r8bconf.h + * + * @brief The "configuration" inclusion file you can modify. + * + * This is the "configuration" inclusion file for the "r8brain-free-src" + * sample rate converter. You may redefine the macros here as you see fit. + * + * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8BCONF_INCLUDED +#define R8BCONF_INCLUDED + +#if !defined( R8B_IPP ) + /** + * Set the R8B_IPP macro definition to 1 to enable the use of Intel IPP's + * fast Fourier transform functions. Also uncomment and correct the IPP + * header inclusion macros. + * + * Do not forget to call the ippInit() function at the start of the + * application, before using this library's functions. + */ + + #define R8B_IPP 0 + +// #include <ippcore.h> +// #include <ipps.h> +#endif // !defined( R8B_IPP ) + +#if !defined( R8BASSERT ) + /** + * Assertion macro used to check for certain run-time conditions. By + * default no action is taken if assertion fails. + * + * @param e Expression to check. + */ + + #define R8BASSERT( e ) +#endif // !defined( R8BASSERT ) + +#if !defined( R8BCONSOLE ) + /** + * Console output macro, used to output various resampler status strings, + * including filter design parameters, convolver parameters. + * + * @param e Expression to send to the console, usually consists of a + * standard "printf" format string followed by several parameters + * (__VA_ARGS__). + */ + + #define R8BCONSOLE( ... ) +#endif // !defined( R8BCONSOLE ) + +#if !defined( R8B_BASECLASS ) + /** + * Macro defines the name of the class from which all classes that are + * designed to be created on heap are derived. The default + * r8b::CStdClassAllocator class uses "stdlib" memory allocation + * functions. + * + * The classes that are best placed on stack or as class members are not + * derived from any class. + */ + + #define R8B_BASECLASS :: r8b :: CStdClassAllocator +#endif // !defined( R8B_BASECLASS ) + +#if !defined( R8B_MEMALLOCCLASS ) + /** + * Macro defines the name of the class that implements raw memory + * allocation functions, see the r8b::CStdMemAllocator class for details. + */ + + #define R8B_MEMALLOCCLASS :: r8b :: CStdMemAllocator +#endif // !defined( R8B_MEMALLOCCLASS ) + +#if !defined( R8B_FILTER_CACHE_MAX ) + /** + * This macro specifies the number of filters kept in the cache at most. + * The actual number can be higher if many different filters are in use at + * the same time. + */ + + #define R8B_FILTER_CACHE_MAX 96 +#endif // !defined( R8B_FILTER_CACHE_MAX ) + +#if !defined( R8B_FRACBANK_CACHE_MAX ) + /** + * This macro specifies the number of whole-number stepping fractional + * delay filter banks kept in the cache at most. The actual number can be + * higher if many different filter banks are in use at the same time. As + * filter banks are usually big objects, it is advisable to keep this + * cache size small. + */ + + #define R8B_FRACBANK_CACHE_MAX 12 +#endif // !defined( R8B_FRACBANK_CACHE_MAX ) + +#if !defined( R8B_FLTTEST ) + /** + * This macro, when equal to 1, enables fractional delay filter bank + * testing: in this mode the filter bank becomes a dynamic member of the + * CDSPFracInterpolator object instead of being a global static object. + */ + + #define R8B_FLTTEST 0 +#endif // !defined( R8B_FLTTEST ) + +#if !defined( R8B_FASTTIMING ) + /** + * This macro, when equal to 1, enables a fast interpolation sample + * timing technique. This technique improves interpolation performance + * (by around 10%) at the expense of a minor sample timing drift which is + * on the order of 1e-6 samples per 10 billion output samples. This + * setting does not apply to whole-number stepping, if it is in use, as + * this stepping provides zero timing error without performance impact. + * Also does not apply to the cases when a whole-numbered (2X, 3X, etc.) + * resampling is in the actual use. + */ + + #define R8B_FASTTIMING 0 +#endif // !defined( R8B_FASTTIMING ) + +#if !defined( R8B_EXTFFT ) + /** + * This macro, when equal to 1, extends length of low-pass filters' FFT + * block by a factor of 2, by zero-padding it. This usually improves the + * overall time performance of the resampler at the expense of a higher + * overall latency (initial processing delay). If such delay is not an + * issue, setting this macro to 1 is preferrable. This macro can only have + * a value of 0 or 1. + */ + + #define R8B_EXTFFT 0 +#endif // !defined( R8B_EXTFFT ) + +#if !defined( R8B_PFFFT ) + /** + * When defined as 1, enables PFFFT routines which are fast, but which + * are limited to 24-bit precision. May be a good choice for time-series + * interpolation, when stop-band attenuation higher than 120 dB is not + * required. + */ + + #define R8B_PFFFT 0 +#endif // !defined( R8B_PFFFT ) + +#if R8B_PFFFT + #define R8B_FLOATFFT 1 +#endif // R8B_PFFFT + +#if !defined( R8B_PFFFT_DOUBLE ) + /** + * When defined as 1, enables PFFFT "double" routines which are fast, and + * which provide the highest precision. + */ + + #define R8B_PFFFT_DOUBLE 0 +#endif // !defined( R8B_PFFFT_DOUBLE ) + +#if !defined( R8B_FLOATFFT ) + /** + * The R8B_FLOATFFT definition enables double-to-float buffer conversions + * for FFT operations, for algorithms that work with "float" values. This + * macro should not be changed from the default "0" here. + */ + + #define R8B_FLOATFFT 0 +#endif // !defined( R8B_FLOATFFT ) + +#endif // R8BCONF_INCLUDED diff --git a/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h new file mode 100644 index 00000000..2f967b4b --- /dev/null +++ b/Src/external_dependencies/openmpt-trunk/include/r8brain/r8butil.h @@ -0,0 +1,306 @@ +//$ nobt +//$ nocpp + +/** + * @file r8butil.h + * + * @brief The inclusion file with several utility functions. + * + * This file includes several utility functions used by various utility + * programs like "calcErrorTable.cpp". + * + * r8brain-free-src Copyright (c) 2013-2021 Aleksey Vaneev + * See the "LICENSE" file for license. + */ + +#ifndef R8BUTIL_INCLUDED +#define R8BUTIL_INCLUDED + +#include "r8bbase.h" + +namespace r8b { + +/** + * @param re Real part of the frequency response. + * @param im Imaginary part of the frequency response. + * @return A magnitude response value converted from the linear scale to the + * logarithmic scale. + */ + +inline double convertResponseToLog( const double re, const double im ) +{ + return( 4.34294481903251828 * log( re * re + im * im + 1e-100 )); +} + +/** + * An utility function that performs frequency response scanning step update + * based on the current magnitude response's slope. + * + * @param[in,out] step The current scanning step. Will be updated on + * function's return. Must be a positive value. + * @param curg Squared magnitude response at the current frequency point. + * @param[in,out] prevg_log Previous magnitude response, log scale. Will be + * updated on function's return. + * @param prec Precision multiplier, affects the size of the step. + * @param maxstep The maximal allowed step. + * @param minstep The minimal allowed step. + */ + +inline void updateScanStep( double& step, const double curg, + double& prevg_log, const double prec, const double maxstep, + const double minstep = 1e-11 ) +{ + double curg_log = 4.34294481903251828 * log( curg + 1e-100 ); + curg_log += ( prevg_log - curg_log ) * 0.7; + + const double slope = fabs( curg_log - prevg_log ); + prevg_log = curg_log; + + if( slope > 0.0 ) + { + step /= prec * slope; + step = max( min( step, maxstep ), minstep ); + } +} + +/** + * Function locates normalized frequency at which the minimum filter gain + * is reached. The scanning is performed from lower (left) to higher + * (right) frequencies, the whole range is scanned. + * + * Function expects that the magnitude response is always reducing from lower + * to high frequencies, starting at "minth". + * + * @param flt Filter response. + * @param fltlen Filter response's length in samples (taps). + * @param[out] ming The current minimal gain (squared). On function's return + * will contain the minimal gain value found (squared). + * @param[out] minth The normalized frequency where the minimal gain is + * currently at. On function's return will point to the normalized frequency + * where the new minimum was found. + * @param thend The ending frequency, inclusive. + */ + +inline void findFIRFilterResponseMinLtoR( const double* const flt, + const int fltlen, double& ming, double& minth, const double thend ) +{ + const double maxstep = minth * 2e-3; + double curth = minth; + double re; + double im; + calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im ); + double prevg_log = convertResponseToLog( re, im ); + double step = 1e-11; + + while( true ) + { + curth += step; + + if( curth > thend ) + { + break; + } + + calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im ); + const double curg = re * re + im * im; + + if( curg > ming ) + { + ming = curg; + minth = curth; + break; + } + + ming = curg; + minth = curth; + + updateScanStep( step, curg, prevg_log, 0.31, maxstep ); + } +} + +/** + * Function locates normalized frequency at which the maximal filter gain + * is reached. The scanning is performed from lower (left) to higher + * (right) frequencies, the whole range is scanned. + * + * Note: this function may "stall" in very rare cases if the magnitude + * response happens to be "saw-tooth" like, requiring a very small stepping to + * be used. If this happens, it may take dozens of seconds to complete. + * + * @param flt Filter response. + * @param fltlen Filter response's length in samples (taps). + * @param[out] maxg The current maximal gain (squared). On function's return + * will contain the maximal gain value (squared). + * @param[out] maxth The normalized frequency where the maximal gain is + * currently at. On function's return will point to the normalized frequency + * where the maximum was reached. + * @param thend The ending frequency, inclusive. + */ + +inline void findFIRFilterResponseMaxLtoR( const double* const flt, + const int fltlen, double& maxg, double& maxth, const double thend ) +{ + const double maxstep = maxth * 1e-4; + double premaxth = maxth; + double premaxg = maxg; + double postmaxth = maxth; + double postmaxg = maxg; + + double prevth = maxth; + double prevg = maxg; + double curth = maxth; + double re; + double im; + calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im ); + double prevg_log = convertResponseToLog( re, im ); + double step = 1e-11; + + bool WasPeak = false; + int AfterPeakCount = 0; + + while( true ) + { + curth += step; + + if( curth > thend ) + { + break; + } + + calcFIRFilterResponse( flt, fltlen, R8B_PI * curth, re, im ); + const double curg = re * re + im * im; + + if( curg > maxg ) + { + premaxth = prevth; + premaxg = prevg; + maxg = curg; + maxth = curth; + WasPeak = true; + AfterPeakCount = 0; + } + else + if( WasPeak ) + { + if( AfterPeakCount == 0 ) + { + postmaxth = curth; + postmaxg = curg; + } + + if( AfterPeakCount == 5 ) + { + // Perform 2 approximate binary searches. + + int k; + + for( k = 0; k < 2; k++ ) + { + double l = ( k == 0 ? premaxth : maxth ); + double curgl = ( k == 0 ? premaxg : maxg ); + double r = ( k == 0 ? maxth : postmaxth ); + double curgr = ( k == 0 ? maxg : postmaxg ); + + while( true ) + { + const double c = ( l + r ) * 0.5; + calcFIRFilterResponse( flt, fltlen, R8B_PI * c, + re, im ); + + const double curg = re * re + im * im; + + if( curgl > curgr ) + { + r = c; + curgr = curg; + } + else + { + l = c; + curgl = curg; + } + + if( r - l < 1e-11 ) + { + if( curgl > curgr ) + { + maxth = l; + maxg = curgl; + } + else + { + maxth = r; + maxg = curgr; + } + + break; + } + } + } + + break; + } + + AfterPeakCount++; + } + + prevth = curth; + prevg = curg; + + updateScanStep( step, curg, prevg_log, 1.0, maxstep ); + } +} + +/** + * Function locates normalized frequency at which the specified maximum + * filter gain is reached. The scanning is performed from higher (right) + * to lower (left) frequencies, scanning stops when the required gain + * value was crossed. Function uses an extremely efficient binary search and + * thus expects that the magnitude response has the "main lobe" form produced + * by windowing, with a minimal pass-band ripple. + * + * @param flt Filter response. + * @param fltlen Filter response's length in samples (taps). + * @param maxg Maximal gain (squared). + * @param[out] th The current normalized frequency. On function's return will + * point to the normalized frequency where "maxg" is reached. + * @param thend The leftmost frequency to scan, inclusive. + */ + +inline void findFIRFilterResponseLevelRtoL( const double* const flt, + const int fltlen, const double maxg, double& th, const double thend ) +{ + // Perform exact binary search. + + double l = thend; + double r = th; + + while( true ) + { + const double c = ( l + r ) * 0.5; + + if( r - l < 1e-14 ) + { + th = c; + break; + } + + double re; + double im; + calcFIRFilterResponse( flt, fltlen, R8B_PI * c, re, im ); + const double curg = re * re + im * im; + + if( curg > maxg ) + { + l = c; + } + else + { + r = c; + } + } +} + +} // namespace r8b + +#endif // R8BUTIL_INCLUDED |