diff options
Diffstat (limited to 'Src/h264dec/ldecod/inc/global.h')
-rw-r--r-- | Src/h264dec/ldecod/inc/global.h | 1230 |
1 files changed, 1230 insertions, 0 deletions
diff --git a/Src/h264dec/ldecod/inc/global.h b/Src/h264dec/ldecod/inc/global.h new file mode 100644 index 00000000..6d2677e6 --- /dev/null +++ b/Src/h264dec/ldecod/inc/global.h @@ -0,0 +1,1230 @@ + +/*! + ************************************************************************ + * \file + * global.h + * \brief + * global definitions for H.264 decoder. + * \author + * Copyright (C) 1999 Telenor Satellite Services,Norway + * Ericsson Radio Systems, Sweden + * + * Inge Lille-Langoy <inge.lille-langoy@telenor.com> + * + * Telenor Satellite Services + * Keysers gt.13 tel.: +47 23 13 86 98 + * N-0130 Oslo,Norway fax.: +47 22 77 79 80 + * + * Rickard Sjoberg <rickard.sjoberg@era.ericsson.se> + * + * Ericsson Radio Systems + * KI/ERA/T/VV + * 164 80 Stockholm, Sweden + * + ************************************************************************ + */ +#ifndef _GLOBAL_H_ +#define _GLOBAL_H_ + +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <assert.h> +#include <time.h> +#include <sys/timeb.h> + +#include <bfc/platform/types.h> +#include "win32.h" +#include "defines.h" +#include "ifunctions.h" +#include "parsetcommon.h" +#include "types.h" +#include "frame.h" +#include "nalucommon.h" +#include "memcache.h" +#include <mmintrin.h> +#ifdef H264_IPP +//#include "../tools/staticlib/ipp_px.h" +#include "ippdefs.h" +#include "ippcore.h" +#include "ipps.h" +#include "ippi.h" +#include "ippvc.h" +#endif +/* benski> not the best place for this but it works for now */ +#ifdef _M_IX86 +// must be a multiple of 16 +#pragma warning(disable: 4799) +static inline void memzero_cache32(void *dst, unsigned long i) +{ + + __asm { + pxor mm0, mm0 + mov edi, dst + +loopwrite: + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + lea edi, [edi+32] + sub i, 32 + jg loopwrite + + } +} + +static inline void memzero_fast32(void *dst, unsigned long i) +{ + + __asm { + pxor mm0, mm0 + mov edi, dst + +loopwrite: + + movntq 0[edi], mm0 + movntq 8[edi], mm0 + movntq 16[edi], mm0 + movntq 24[edi], mm0 + + lea edi, [edi+32] + sub i, 32 + jg loopwrite + + } +} + +static inline void memzero64(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + movq 32[edi], mm0 + movq 40[edi], mm0 + movq 48[edi], mm0 + movq 56[edi], mm0 + } +} + +static inline void memzero128(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + movq 32[edi], mm0 + movq 40[edi], mm0 + movq 48[edi], mm0 + movq 56[edi], mm0 + movq 64[edi], mm0 + movq 72[edi], mm0 + movq 80[edi], mm0 + movq 88[edi], mm0 + movq 96[edi], mm0 + movq 104[edi], mm0 + movq 112[edi], mm0 + movq 120[edi], mm0 + } +} + +static inline void memzero24(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + } +} + +static inline void memzero48(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + movq 16[edi], mm0 + movq 24[edi], mm0 + movq 32[edi], mm0 + movq 40[edi], mm0 + } +} + +static inline void memzero16(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + movq 8[edi], mm0 + } +} + +static inline void memzero8(void *dst) +{ + __asm { + pxor mm0, mm0 + mov edi, dst + + movq 0[edi], mm0 + } +} + +static inline void memset_fast_end() +{ + _mm_empty(); +} + +// Very optimized memcpy() routine for all AMD Athlon and Duron family. +// This code uses any of FOUR different basic copy methods, depending +// on the transfer size. +// NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or +// "Streaming Store"), and also uses the software prefetchnta instructions, +// be sure youre running on Athlon/Duron or other recent CPU before calling! + +#define TINY_BLOCK_COPY 64 // upper limit for movsd type copy +// The smallest copy uses the X86 "movsd" instruction, in an optimized +// form which is an "unrolled loop". + +#define IN_CACHE_COPY 64 * 1024 // upper limit for movq/movq copy w/SW prefetch +// Next is a copy that uses the MMX registers to copy 8 bytes at a time, +// also using the "unrolled loop" optimization. This code uses +// the software prefetch instruction to get the data into the cache. + +#define UNCACHED_COPY 197 * 1024 // upper limit for movq/movntq w/SW prefetch +// For larger blocks, which will spill beyond the cache, its faster to +// use the Streaming Store instruction MOVNTQ. This write instruction +// bypasses the cache and writes straight to main memory. This code also +// uses the software prefetch instruction to pre-read the data. +// USE 64 * 1024 FOR THIS VALUE IF YOURE ALWAYS FILLING A "CLEAN CACHE" + +#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch +#define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch +// For the largest size blocks, a special technique called Block Prefetch +// can be used to accelerate the read operations. Block Prefetch reads +// one address per cache line, for a series of cache lines, in a short loop. +// This is faster than using software prefetch. The technique is great for +// getting maximum read bandwidth, especially in DDR memory systems. + +// Inline assembly syntax for use with Visual C++ + + +static void * memcpy_amd(void *dest, const void *src, size_t n) +{ + __asm { + + mov ecx, [n] // number of bytes to copy + mov edi, [dest] // destination + mov esi, [src] // source + mov ebx, ecx // keep a copy of count + + cld + cmp ecx, TINY_BLOCK_COPY + jb $memcpy_ic_3 // tiny? skip mmx copy + + cmp ecx, 32*1024 // dont align between 32k-64k because + jbe $memcpy_do_align // it appears to be slower + cmp ecx, 64*1024 + jbe $memcpy_align_done +$memcpy_do_align: + mov ecx, 8 // a trick thats faster than rep movsb... + sub ecx, edi // align destination to qword + and ecx, 111b // get the low bits + sub ebx, ecx // update copy count + neg ecx // set up to jump into the array + add ecx, offset $memcpy_align_done + jmp ecx // jump to array of movsbs + +align 4 + movsb + movsb + movsb + movsb + movsb + movsb + movsb + movsb + +$memcpy_align_done: // destination is dword aligned + mov ecx, ebx // number of bytes left to copy + shr ecx, 6 // get 64-byte block count + jz $memcpy_ic_2 // finish the last few bytes + + cmp ecx, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy + jae $memcpy_uc_test + +// This is small block copy that uses the MMX registers to copy 8 bytes +// at a time. It uses the "unrolled loop" optimization, and also uses +// the software prefetch instruction to get the data into the cache. +align 16 +$memcpy_ic_1: // 64-byte block copies, in-cache copy + + prefetchnta [esi + (200*64/34+192)] // start reading ahead + + movq mm0, [esi+0] // read 64 bits + movq mm1, [esi+8] + movq [edi+0], mm0 // write 64 bits + movq [edi+8], mm1 // note: the normal movq writes the + movq mm2, [esi+16] // data to cache// a cache line will be + movq mm3, [esi+24] // allocated as needed, to store the data + movq [edi+16], mm2 + movq [edi+24], mm3 + movq mm0, [esi+32] + movq mm1, [esi+40] + movq [edi+32], mm0 + movq [edi+40], mm1 + movq mm2, [esi+48] + movq mm3, [esi+56] + movq [edi+48], mm2 + movq [edi+56], mm3 + + add esi, 64 // update source pointer + add edi, 64 // update destination pointer + dec ecx // count down + jnz $memcpy_ic_1 // last 64-byte block? + +$memcpy_ic_2: + mov ecx, ebx // has valid low 6 bits of the byte count +$memcpy_ic_3: + shr ecx, 2 // dword count + and ecx, 1111b // only look at the "remainder" bits + neg ecx // set up to jump into the array + add ecx, offset $memcpy_last_few + jmp ecx // jump to array of movsds + +$memcpy_uc_test: + cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy + jae $memcpy_bp_1 + +$memcpy_64_test: + or ecx, ecx // _tail end of block prefetch will jump here + jz $memcpy_ic_2 // no more 64-byte blocks left + +// For larger blocks, which will spill beyond the cache, its faster to +// use the Streaming Store instruction MOVNTQ. This write instruction +// bypasses the cache and writes straight to main memory. This code also +// uses the software prefetch instruction to pre-read the data. +align 16 +$memcpy_uc_1: // 64-byte blocks, uncached copy + + prefetchnta [esi + (200*64/34+192)] // start reading ahead + + movq mm0,[esi+0] // read 64 bits + add edi,64 // update destination pointer + movq mm1,[esi+8] + add esi,64 // update source pointer + movq mm2,[esi-48] + movntq [edi-64], mm0 // write 64 bits, bypassing the cache + movq mm0,[esi-40] // note: movntq also prevents the CPU + movntq [edi-56], mm1 // from READING the destination address + movq mm1,[esi-32] // into the cache, only to be over-written + movntq [edi-48], mm2 // so that also helps performance + movq mm2,[esi-24] + movntq [edi-40], mm0 + movq mm0,[esi-16] + movntq [edi-32], mm1 + movq mm1,[esi-8] + movntq [edi-24], mm2 + movntq [edi-16], mm0 + dec ecx + movntq [edi-8], mm1 + jnz $memcpy_uc_1 // last 64-byte block? + + jmp $memcpy_ic_2 // almost done + +// For the largest size blocks, a special technique called Block Prefetch +// can be used to accelerate the read operations. Block Prefetch reads +// one address per cache line, for a series of cache lines, in a short loop. +// This is faster than using software prefetch, in this case. +// The technique is great for getting maximum read bandwidth, +// especially in DDR memory systems. +$memcpy_bp_1: // large blocks, block prefetch copy + + cmp ecx, CACHEBLOCK // big enough to run another prefetch loop? + jl $memcpy_64_test // no, back to regular uncached copy + + mov eax, CACHEBLOCK / 2 // block prefetch loop, unrolled 2X + add esi, CACHEBLOCK * 64 // move to the top of the block +align 16 +$memcpy_bp_2: + mov edx, [esi-64] // grab one address per cache line + mov edx, [esi-128] // grab one address per cache line + sub esi, 128 // go reverse order + dec eax // count down the cache lines + jnz $memcpy_bp_2 // keep grabbing more lines into cache + + mov eax, CACHEBLOCK // now that its in cache, do the copy +align 16 +$memcpy_bp_3: + movq mm0, [esi ] // read 64 bits + movq mm1, [esi+ 8] + movq mm2, [esi+16] + movq mm3, [esi+24] + movq mm4, [esi+32] + movq mm5, [esi+40] + movq mm6, [esi+48] + movq mm7, [esi+56] + add esi, 64 // update source pointer + movntq [edi ], mm0 // write 64 bits, bypassing cache + movntq [edi+ 8], mm1 // note: movntq also prevents the CPU + movntq [edi+16], mm2 // from READING the destination address + movntq [edi+24], mm3 // into the cache, only to be over-written, + movntq [edi+32], mm4 // so that also helps performance + movntq [edi+40], mm5 + movntq [edi+48], mm6 + movntq [edi+56], mm7 + add edi, 64 // update dest pointer + + dec eax // count down + + jnz $memcpy_bp_3 // keep copying + sub ecx, CACHEBLOCK // update the 64-byte block count + jmp $memcpy_bp_1 // keep processing chunks + +// The smallest copy uses the X86 "movsd" instruction, in an optimized +// form which is an "unrolled loop". Then it handles the last few bytes. +align 4 + movsd + movsd // perform last 1-15 dword copies + movsd + movsd + movsd + movsd + movsd + movsd + movsd + movsd // perform last 1-7 dword copies + movsd + movsd + movsd + movsd + movsd + movsd + +$memcpy_last_few: // dword aligned from before movsds + mov ecx, ebx // has valid low 2 bits of the byte count + and ecx, 11b // the last few cows must come home + jz $memcpy_final // no more, lets leave + rep movsb // the last 1, 2, or 3 bytes + +$memcpy_final: +// emms // clean up the MMX state + sfence // flush the write buffer + mov eax, [dest] // ret value = destination pointer + + } +} + +#elif defined(_M_X64) +static inline void memzero24(void *dst) +{ + int32_t j; + int32_t *d = (int32_t *)dst; + for (j=0;j<24;j+=4) + { + d[j] = 0; + } +} +static inline void memset_fast_end() {} +#else +static inline void memzero_fast16(void *dst, unsigned long i) +{ + int32_t j; + int32_t *d = (int32_t *)dst; + for (j=0;j<i;j+=4) + { + d[j] = 0; + } +} +static inline void memzero24(void *dst) +{ + int32_t j; + int32_t *d = (int32_t *)dst; + for (j=0;j<24;j+=4) + { + d[j] = 0; + } +} +static inline void memset_fast_end() {} +#endif + +#define UNDEFINED_REFERENCE ((int)0x80000000) +typedef int32_t h264_ref_t; + +#define ET_SIZE 300 //!< size of error text buffer +extern char errortext[ET_SIZE]; //!< buffer for error message for exit with error() +extern int sse2_flag, mmx_flag, sse_flag, sse3_flag, sse4_1_flag; +/*********************************************************************** + * T y p e d e f i n i t i o n s f o r J M + *********************************************************************** + */ + +typedef enum +{ + LumaComp = 0, + CrComp = 1, + CbComp = 2 +} Color_Component; + +/*********************************************************************** + * D a t a t y p e s f o r C A B A C + *********************************************************************** + */ + +typedef struct pix_pos +{ + int available; + int mb_addr; + short x; + short y; + short pos_x; + short pos_y; +} PixelPos; + +//! struct to characterize the state of the arithmetic coding engine +typedef struct +{ + unsigned int Drange; + unsigned int Dvalue; + int DbitsLeft; + byte *Dcodestrm; + int *Dcodestrm_len; +} DecodingEnvironment; + +typedef DecodingEnvironment *DecodingEnvironmentPtr; + +typedef short MotionVector[2]; + +//! definition of motion parameters +typedef struct pic_motion +{ + h264_ref_t ref_pic_id; + h264_ref_t ref_id; + MotionVector mv; + char ref_idx; +} PicMotion; + +// TODO: benski> might be more efficient to make a [list][subblock_y][subblock_x] array of these values instead of parallel arrays +typedef struct motion_params +{ + PicMotion **motion[2]; + byte ** moving_block; +} MotionParams; + +//! struct for context management +typedef struct +{ + uint16_t state; // index into state-table CP + unsigned char MPS; // Least Probable Symbol 0/1 CP + unsigned char dummy; // for alignment +} BiContextType; + +typedef BiContextType *BiContextTypePtr; + + +/********************************************************************** + * C O N T E X T S F O R T M L S Y N T A X E L E M E N T S + ********************************************************************** + */ + +#define NUM_MB_TYPE_CTX 11 +#define NUM_B8_TYPE_CTX 9 +#define NUM_MV_RES_CTX 10 +#define NUM_REF_NO_CTX 6 +#define NUM_DELTA_QP_CTX 4 +#define NUM_MB_AFF_CTX 4 +#define NUM_TRANSFORM_SIZE_CTX 3 + +// structures that will be declared somewhere else +struct storable_picture; +struct datapartition; +struct syntaxelement; + +typedef struct +{ + BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX]; + BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX]; + BiContextType mv_res_contexts [2][NUM_MV_RES_CTX]; + BiContextType ref_no_contexts [2][NUM_REF_NO_CTX]; + BiContextType delta_qp_contexts[NUM_DELTA_QP_CTX]; + BiContextType mb_aff_contexts [NUM_MB_AFF_CTX]; +} MotionInfoContexts; + +#define NUM_IPR_CTX 2 +#define NUM_CIPR_CTX 4 +#define NUM_CBP_CTX 4 +#define NUM_BCBP_CTX 4 +#define NUM_MAP_CTX 15 +#define NUM_LAST_CTX 15 +#define NUM_ONE_CTX 5 +#define NUM_ABS_CTX 5 + + +typedef struct +{ + BiContextType transform_size_contexts [NUM_TRANSFORM_SIZE_CTX]; + BiContextType ipr_contexts [NUM_IPR_CTX]; + BiContextType cipr_contexts[NUM_CIPR_CTX]; + BiContextType cbp_contexts [3][NUM_CBP_CTX]; + BiContextType bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX]; + BiContextType map_contexts [2][NUM_BLOCK_TYPES][NUM_MAP_CTX+1]; // +1 for better alignment + BiContextType last_contexts[2][NUM_BLOCK_TYPES][NUM_LAST_CTX+1]; // +1 for better alignment + BiContextType one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX]; + BiContextType abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX]; +} TextureInfoContexts; + + +//*********************** end of data type definition for CABAC ******************* + +/*********************************************************************** + * N e w D a t a t y p e s f o r T M L + *********************************************************************** + */ + +/*! Buffer structure for decoded reference picture marking commands */ +typedef struct DecRefPicMarking_s +{ + int memory_management_control_operation; + int difference_of_pic_nums_minus1; + int long_term_pic_num; + int long_term_frame_idx; + int max_long_term_frame_idx_plus1; + struct DecRefPicMarking_s *Next; +} DecRefPicMarking_t; + + +//! definition of pic motion parameters +typedef struct pic_motion_params2 +{ + h264_ref_t ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x] + h264_ref_t ref_id; //!< reference picture identifier [list][subblock_y][subblock_x] + short mv[2]; //!< motion vector [list][subblock_x][subblock_y][component] + char ref_idx; //!< reference picture [list][subblock_y][subblock_x] + byte mb_field; //!< field macroblock indicator + byte field_frame; //!< indicates if co_located is field or frame. +} PicMotionParams2; + +//! Macroblock +typedef struct macroblock +{ + struct slice *p_Slice; //!< pointer to the current slice + struct img_par *p_Vid; //!< pointer to VideoParameters + struct inp_par *p_Inp; + int mbAddrX; //!< current MB address + int mb_x; + int mb_y; + int block_x; + int block_y; + int block_y_aff; + int pix_x; + int pix_y; + int pix_c_x; + int pix_c_y; + + int subblock_x; + int subblock_y; + + int qp; //!< QP luma + int qpc[2]; //!< QP chroma + int qp_scaled[MAX_PLANE]; //!< QP scaled for all comps. + Boolean is_lossless; + Boolean is_intra_block; + Boolean is_v_block; + + short slice_nr; + short delta_quant; //!< for rate control + + struct macroblock *mb_up; //!< pointer to neighboring MB (CABAC) + struct macroblock *mb_left; //!< pointer to neighboring MB (CABAC) + + // some storage of macroblock syntax elements for global access + int mb_type; + short mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2]; //!< indices correspond to [forw,backw][block_y][block_x][x,y] + int cbp; + int64 cbp_blk [3]; + int64 cbp_bits [3]; + int64 cbp_bits_8x8[3]; + + int i16mode; + char b8mode[4]; + char b8pdir[4]; + char ei_flag; //!< error indicator flag that enables concealment + char dpl_flag; //!< error indicator flag that signals a missing data partition + char ipmode_DPCM; + + short DFDisableIdc; + short DFAlphaC0Offset; + short DFBetaOffset; + + char c_ipred_mode; //!< chroma intra prediction mode + Boolean mb_field; + + int skip_flag; + + int mb_addr_left, mb_addr_up, mb_addr_upper_right, mb_addr_upper_left; + Boolean mb_avail_left, mb_avail_up, mb_avail_upper_right, mb_avail_upper_left; + + Boolean luma_transform_size_8x8_flag; + Boolean NoMbPartLessThan8x8Flag; + + void (*itrans_8x8)(struct macroblock *currMB, ColorPlane pl, int ioff, int joff); + + void (*GetMVPredictor) (struct macroblock *currMB, PixelPos *block, + short pmv[2], short ref_frame, struct pic_motion **motion, int mb_x, int mb_y, int blockshape_x, int blockshape_y); + + int (*read_and_store_CBP_block_bit) (struct macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type); + char (*readRefPictureIdx) (struct syntaxelement *currSE, struct datapartition *dP, int list); + +} Macroblock; + +//! Syntaxelement +typedef struct syntaxelement +{ + int value1; //!< numerical value of syntax element + int value2; //!< for blocked symbols, e.g. run/level + int len; //!< length of code + //int inf; //!< info part of CAVLC code + +#if TRACE + #define TRACESTRING_SIZE 100 //!< size of trace string + char tracestring[TRACESTRING_SIZE]; //!< trace string +#endif + + //! for mapping of CAVLC to syntaxElement + void (*mapping)(int len, int info, int *value1, int *value2); +} SyntaxElement; + + +//! Bitstream +typedef struct +{ + // CABAC Decoding + int read_len; //!< actual position in the codebuffer, CABAC only + int code_len; //!< overall codebuffer length, CABAC only + // CAVLC Decoding + int frame_bitoffset; //!< actual position in the codebuffer, bit-oriented, CAVLC only + int bitstream_length; //!< over codebuffer lnegth, byte oriented, CAVLC only + + byte *streamBuffer; //!< actual codebuffer for read bytes +} Bitstream; + + +/* === 4x4 block typedefs === */ +// 32 bit precision +typedef int h264_int_block_row_t[BLOCK_SIZE]; +typedef h264_int_block_row_t h264_int_block_t[BLOCK_SIZE]; +// 16 bit precision +typedef int16_t h264_short_block_row_t[BLOCK_SIZE]; +typedef h264_short_block_row_t h264_short_block_t[BLOCK_SIZE]; +// 8 bit precision + +/* === 8x8 block typedefs === */ +// 32 bit precision +typedef int h264_int_8x8block_row_t[BLOCK_SIZE_8x8]; +typedef h264_int_8x8block_row_t h264_int_8x8block_t[BLOCK_SIZE_8x8]; +// 16 bit precision +typedef int16_t h264_short_8x8block_row_t[BLOCK_SIZE_8x8]; +typedef h264_short_8x8block_row_t h264_short_8x8block_t[BLOCK_SIZE_8x8]; +// 8 bit precision +typedef imgpel h264_imgpel_8x8block_row_t[BLOCK_SIZE_8x8]; +typedef h264_imgpel_8x8block_row_t h264_imgpel_8x8block_t[BLOCK_SIZE_8x8]; + +/* === 16x16 block typedefs === */ +// 32 bit precision +typedef int h264_int_macroblock_row_t[MB_BLOCK_SIZE]; +typedef h264_int_macroblock_row_t h264_int_macroblock_t[MB_BLOCK_SIZE]; +// 16 bit precision +typedef int16_t h264_short_macroblock_row_t[MB_BLOCK_SIZE]; +typedef h264_short_macroblock_row_t h264_short_macroblock_t[MB_BLOCK_SIZE]; +// 8 bit precision +typedef imgpel h264_imgpel_macroblock_row_t[MB_BLOCK_SIZE]; +typedef h264_imgpel_macroblock_row_t h264_imgpel_macroblock_t[MB_BLOCK_SIZE]; + + + + +typedef int h264_pic_position[2]; +typedef byte h264_4x4_byte[BLOCK_SIZE][BLOCK_SIZE]; +typedef h264_4x4_byte h264_nz_coefficient[3]; + +//! DataPartition +typedef struct datapartition +{ + + Bitstream *bitstream; + DecodingEnvironment de_cabac; + +} DataPartition; + +//! Slice +typedef struct slice +{ + struct img_par *p_Vid; + struct inp_par *p_Inp; + pic_parameter_set_rbsp_t *active_pps; + seq_parameter_set_rbsp_t *active_sps; + + struct colocated_params *p_colocated; + struct colocated_params *Co_located_JV[MAX_PLANE]; //!< p_colocated to be used during 4:4:4 independent mode decoding + + int mb_aff_frame_flag; + int direct_spatial_mv_pred_flag; //!< Indicator for direct mode type (1 for Spatial, 0 for Temporal) + int num_ref_idx_l0_active; //!< number of available list 0 references + int num_ref_idx_l1_active; //!< number of available list 1 references + + int qp; + int slice_qp_delta; + int qs; + int slice_qs_delta; + int slice_type; //!< slice type + int model_number; //!< cabac model number + PictureStructure structure; //!< Identify picture structure type + int start_mb_nr; //!< MUST be set by NAL even in case of ei_flag == 1 + int max_part_nr; + int dp_mode; //!< data partitioning mode + int last_dquant; + + // int last_mb_nr; //!< only valid when entropy coding == CABAC + DataPartition *partArr; //!< array of partitions + MotionInfoContexts *mot_ctx; //!< pointer to struct of context models for use in CABAC + TextureInfoContexts *tex_ctx; //!< pointer to struct of context models for use in CABAC + + int mvscale[6][MAX_REFERENCE_PICTURES]; + + int ref_pic_list_reordering_flag_l0; + int *reordering_of_pic_nums_idc_l0; + int *abs_diff_pic_num_minus1_l0; + int *long_term_pic_idx_l0; + int ref_pic_list_reordering_flag_l1; + int *reordering_of_pic_nums_idc_l1; + int *abs_diff_pic_num_minus1_l1; + int *long_term_pic_idx_l1; + + + short DFDisableIdc; //!< Disable deblocking filter on slice + short DFAlphaC0Offset; //!< Alpha and C0 offset for filtering slice + short DFBetaOffset; //!< Beta offset for filtering slice + + int pic_parameter_set_id; //!<the ID of the picture parameter set the slice is reffering to + + int dpB_NotPresent; //!< non-zero, if data partition B is lost + int dpC_NotPresent; //!< non-zero, if data partition C is lost + + + __declspec(align(32)) h264_imgpel_macroblock_t mb_pred[MAX_PLANE]; + __declspec(align(32)) h264_imgpel_macroblock_t mb_rec[MAX_PLANE]; + __declspec(align(32)) union + { + __declspec(align(32)) h264_short_8x8block_t mb_rres8[MAX_PLANE][4]; + __declspec(align(32)) h264_short_macroblock_t cof[MAX_PLANE]; + __declspec(align(32)) h264_short_block_t cof4[MAX_PLANE][16]; // TODO: get this to work, one of these days + __declspec(align(32)) h264_short_macroblock_t ipcm[MAX_PLANE]; + }; + + int cofu[16]; + + // Scaling matrix info + int InvLevelScale4x4_Intra[3][6][4][4]; + int InvLevelScale4x4_Inter[3][6][4][4]; + int InvLevelScale8x8_Intra[3][6][64]; + int InvLevelScale8x8_Inter[3][6][64]; + + int *qmatrix[12]; + + // Cabac + // TODO: we could optimize coefficient reading by storing the levels/runs instead of coefficients + int16_t coeff[64]; // one more for EOB + int coeff_ctr; + int pos; + + //weighted prediction + unsigned int apply_weights; + unsigned int luma_log2_weight_denom; + unsigned int chroma_log2_weight_denom; + int wp_weight[2][MAX_REFERENCE_PICTURES][3]; // weight in [list][index][component] order + int wp_offset[6][MAX_REFERENCE_PICTURES][3]; // offset in [list][index][component] order + int wbp_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; //weight in [list][fw_index][bw_index][component] order + int wp_round_luma; + int wp_round_chroma; + + void (*read_CBP_and_coeffs_from_NAL) (Macroblock *currMB); + int (*decode_one_component ) (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, struct storable_picture *dec_picture); + int (*readSlice ) (struct img_par *, struct inp_par *); + int (*nal_startcode_follows ) (struct slice*, int ); + void (*read_motion_info_from_NAL) (Macroblock *currMB); + void (*read_one_macroblock ) (Macroblock *currMB); + void (*interpret_mb_mode ) (Macroblock *currMB); + void (*compute_colocated ) (struct slice *currSlice, struct colocated_params *p, struct storable_picture **listX[6]); + + void (*linfo_cbp_intra) (int len,int info,int *cbp, int *dummy); + void (*linfo_cbp_inter) (int len,int info,int *cbp, int *dummy); +} Slice; + +//****************************** ~DM *********************************** + +// image parameters +typedef struct img_par +{ + struct inp_par *p_Inp; + pic_parameter_set_rbsp_t *active_pps; + seq_parameter_set_rbsp_t *active_sps; + seq_parameter_set_rbsp_t SeqParSet[MAXSPS]; + pic_parameter_set_rbsp_t PicParSet[MAXPPS]; + + struct sei_params *p_SEI; + + struct old_slice_par *old_slice; + int number; //!< frame number + unsigned int current_mb_nr; // bitstream order + unsigned int num_dec_mb; + short current_slice_nr; + int *intra_block; + + int qp; //!< quant for the current frame + + int sp_switch; //!< 1 for switching sp, 0 for normal sp + int type; //!< image type INTER/INTRA + int width; + int height; + int width_cr; //!< width chroma + int height_cr; //!< height chroma + int mb_x; + int mb_y; + int block_x; + int block_y; + int pix_c_x; + int pix_c_y; + + int allrefzero; + + byte **ipredmode; //!< prediction type [90][74] + h264_nz_coefficient *nz_coeff; + int **siblock; + int cod_counter; //!< Current count of number of skipped macroblocks in a row + + int structure; //!< Identify picture structure type + + Slice *currentSlice; //!< pointer to current Slice data struct + Macroblock *mb_data; //!< array containing all MBs of a whole frame + Macroblock *mb_data_JV[MAX_PLANE]; //!< mb_data to be used for 4:4:4 independent mode + int colour_plane_id; //!< colour_plane_id of the current coded slice + int ChromaArrayType; + + // For MB level frame/field coding + int mb_aff_frame_flag; + + // for signalling to the neighbour logic that this is a deblocker call + int DeblockCall; + byte mixedModeEdgeFlag; + + // picture error concealment + // concealment_head points to first node in list, concealment_end points to + // last node in list. Initialize both to NULL, meaning no nodes in list yet + struct concealment_node *concealment_head; + struct concealment_node *concealment_end; + + DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations + + int num_ref_idx_l0_active; //!< number of forward reference + int num_ref_idx_l1_active; //!< number of backward reference + + int slice_group_change_cycle; + + int redundant_pic_cnt; + + unsigned int pre_frame_num; //!< store the frame_num in the last decoded slice. For detecting gap in frame_num. + int non_conforming_stream; + + // End JVT-D101 + // POC200301: from unsigned int to int + int toppoc; //poc for this top field // POC200301 + int bottompoc; //poc of bottom field of frame + int framepoc; //poc of this frame // POC200301 + unsigned int frame_num; //frame_num for this frame + unsigned int field_pic_flag; + byte bottom_field_flag; + + //the following is for slice header syntax elements of poc + // for poc mode 0. + unsigned int pic_order_cnt_lsb; + int delta_pic_order_cnt_bottom; + // for poc mode 1. + int delta_pic_order_cnt[3]; + + // //////////////////////// + // for POC mode 0: + signed int PrevPicOrderCntMsb; + unsigned int PrevPicOrderCntLsb; + signed int PicOrderCntMsb; + + // for POC mode 1: + unsigned int AbsFrameNum; + signed int ExpectedPicOrderCnt, PicOrderCntCycleCnt, FrameNumInPicOrderCntCycle; + unsigned int PreviousFrameNum, FrameNumOffset; + int ExpectedDeltaPerPicOrderCntCycle; + int PreviousPOC, ThisPOC; + int PreviousFrameNumOffset; + // ///////////////////////// + + int idr_flag; + int nal_reference_idc; //!< nal_reference_idc from NAL unit + + int idr_pic_id; + + int MaxFrameNum; + + unsigned int PicWidthInMbs; + unsigned int PicHeightInMapUnits; + unsigned int FrameHeightInMbs; + unsigned int PicHeightInMbs; + unsigned int PicSizeInMbs; + unsigned int FrameSizeInMbs; + unsigned int oldFrameSizeInMbs; + + int no_output_of_prior_pics_flag; + int long_term_reference_flag; + int adaptive_ref_pic_buffering_flag; + + int last_has_mmco_5; + int last_pic_bottom_field; + + // Fidelity Range Extensions Stuff + short bitdepth_luma; + short bitdepth_chroma; + int bitdepth_scale[2]; + int bitdepth_luma_qp_scale; + int bitdepth_chroma_qp_scale; + unsigned int dc_pred_value_comp[MAX_PLANE]; //!< component value for DC prediction (depends on component pel bit depth) + int max_pel_value_comp[MAX_PLANE]; //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth) + int Transform8x8Mode; + int profile_idc; + int yuv_format; + int lossless_qpprime_flag; + int num_blk8x8_uv; + int num_uv_blocks; + int num_cdc_coeff; + int mb_cr_size_x; + int mb_cr_size_y; + int mb_cr_size_x_blk; + int mb_cr_size_y_blk; + int mb_size[3][2]; //!< component macroblock dimensions + int mb_size_blk[3][2]; //!< component macroblock dimensions + int mb_size_shift[3][2]; + int subpel_x; + int subpel_y; + int shiftpel_x; + int shiftpel_y; + + int max_vmv_r; //!< maximum vertical motion vector range in luma quarter frame pixel units for the current level_idc + int max_mb_vmv_r; //!< maximum vertical motion vector range in luma quarter pixel units for the current level_idc + + // picture error concealment + int last_ref_pic_poc; + int ref_poc_gap; + int poc_gap; + int earlier_missing_poc; + unsigned int frame_to_conceal; + int IDR_concealment_flag; + int conceal_slice_type; + + // random access point decoding + int recovery_point; + int recovery_point_found; + int recovery_frame_cnt; + int recovery_frame_num; + int recovery_poc; + + int separate_colour_plane_flag; + + int frame_number; + int init_bl_done; + + // Redundant slices. Should be moved to another structure and allocated only if extended profile + unsigned int previous_frame_num; //!< frame number of previous slice + int ref_flag[17]; //!< 0: i-th previous frame is incorrect + //!< non-zero: i-th previous frame is correct + int Is_primary_correct; //!< if primary frame is correct, 0: incorrect + int Is_redundant_correct; //!< if redundant frame is correct, 0:incorrect + int redundant_slice_ref_idx; //!< reference index of redundant slice + + //FILE *p_log; //!< SNR file + int LastAccessUnitExists; + int NALUCount; + + Boolean global_init_done; + + int *qp_per_matrix; + int *qp_rem_matrix; + + struct frame_store *last_out_fs; + int pocs_in_dpb[100]; + + + struct storable_picture *dec_picture; + struct storable_picture *dec_picture_JV[MAX_PLANE]; //!< dec_picture to be used during 4:4:4 independent mode decoding + struct storable_picture *no_reference_picture; //!< dummy storable picture for recovery point + struct storable_picture **listX[6]; + + // Error parameters + struct object_buffer *erc_object_list; + struct ercVariables_s *erc_errorVar; + + int erc_mvperMB; + struct img_par *erc_img; + int ec_flag[SE_MAX_ELEMENTS]; //!< array to set errorconcealment + + struct memory_input_struct *mem_input; + + struct frame_store *out_buffer; + + struct storable_picture *pending_output; + int pending_output_state; + int recovery_flag; + + // dpb + struct decoded_picture_buffer *p_Dpb; + + char listXsize[6]; + // report + char cslice_type[9]; + // FMO + int *MbToSliceGroupMap; + int *MapUnitToSliceGroupMap; + int NumberOfSliceGroups; // the number of slice groups -1 (0 == scan order, 7 == maximum) + +#if (ENABLE_OUTPUT_TONEMAPPING) + struct tone_mapping_struct_s *seiToneMapping; +#endif + + // benski> buffer of storablge pictures ready for output. + // might be able to optimize a tad by making a ringbuffer, but i doubt it matters + struct storable_picture **out_pictures; + size_t size_out_pictures; + size_t num_out_pictures; + + ImageCache image_cache[2]; // [0] is luma [1] is chroma (shared for both planes) + MotionCache motion_cache; + + h264_pic_position *PicPos; //! Helper array to access macroblock positions. + + NALU_t *nalu; // a cache so we don't re-alloc every time + + void (*getNeighbour) (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); + void (*getNeighbourPX_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); + void (*getNeighbourXP_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix); + void (*getNeighbourLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourPXLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourXPLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourLeftLuma)(const Macroblock *currMB, PixelPos *pix); + void (*getNeighbourNXLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN<0, yN full range + void (*getNeighbourLeft) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN<0, yN=0 + void (*getNeighbourUp) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN=0, yN<0 + void (*getNeighbourNX) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN full range + void (*getNeighbourNP) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0 + void (*getNeighbourNPChromaNB)(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0 + void (*getNeighbour0X) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN=0, yN full range + void (*getNeighbour0XLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN=0, yN full range + void (*getNeighbourX0) (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); // xN full range, yN = 0 + void (*getNeighbourUpLuma) (const Macroblock *currMB, PixelPos *pix); // xN=0, yN<0 + void (*getNeighbourNPLumaNB)(const Macroblock *currMB, int yN, PixelPos *pix); + void (*getNeighbourPXLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourPXLumaNB_NoPos)(const Macroblock *currMB, int yN, PixelPos *pix); + void (*getNeighbourPPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourXPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*getNeighbourXPLumaNB_NoPos)(const Macroblock *currMB, int xN, int yN, PixelPos *pix); + void (*get_mb_block_pos) (const h264_pic_position *PicPos, int mb_addr, short *x, short *y); + void (*GetStrength) (byte Strength[16], Macroblock *MbQ, int dir,int edge, int mvlimit, struct storable_picture *p); + void (*EdgeLoopLuma) (ColorPlane pl, struct video_image *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, struct storable_picture *p); + void (*EdgeLoopChroma) (struct video_image *image, byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, struct storable_picture *p); +} VideoParameters; + +// input parameters from configuration file +typedef struct inp_par +{ + int intra_profile_deblocking; //!< Loop filter usage determined by flags and parameters in bitstream + + // Output sequence format related variables + FrameFormat output; //!< output related information + +#ifdef _LEAKYBUCKET_ + unsigned long R_decoder; //!< Decoder Rate in HRD Model + unsigned long B_decoder; //!< Decoder Buffer size in HRD model + unsigned long F_decoder; //!< Decoder Initial buffer fullness in HRD model + char LeakyBucketParamFile[FILE_NAME_SIZE]; //!< LeakyBucketParamFile +#endif + + // picture error concealment + int ref_poc_gap; + int poc_gap; +} InputParameters; + +typedef struct old_slice_par +{ + unsigned field_pic_flag; + unsigned frame_num; + int nal_ref_idc; + unsigned pic_oder_cnt_lsb; + int delta_pic_oder_cnt_bottom; + int delta_pic_order_cnt[2]; + byte bottom_field_flag; + byte idr_flag; + int idr_pic_id; + int pps_id; +} OldSliceParams; + +typedef struct decoder_params +{ + InputParameters *p_Inp; //!< Input Parameters + VideoParameters *p_Vid; //!< Image Parameters + +} DecoderParams; + +#ifdef TRACE +extern FILE *p_trace; //!< Trace file +extern int bitcounter; +#endif + +// prototypes + +extern void error(char *text, int code); + +// dynamic mem allocation +extern int init_global_buffers(VideoParameters *p_Vid); +extern void free_global_buffers(VideoParameters *p_Vid); + +extern int RBSPtoSODB(byte *streamBuffer, int last_byte_pos); +extern int EBSPtoRBSP(byte *streamBuffer, int end_bytepos); + +void FreePartition (DataPartition *dp, int n); +DataPartition *AllocPartition(int n); + +void tracebits(const char *trace_str, int len, int info,int value1); +void tracebits2(const char *trace_str, int len, int info); + +unsigned CeilLog2 ( unsigned uiVal); +unsigned CeilLog2_sf( unsigned uiVal); + +// For 4:4:4 independent mode +extern void change_plane_JV( VideoParameters *p_Vid, int nplane ); +extern void make_frame_picture_JV(VideoParameters *p_Vid); + + +#endif + + |